From e347fa2f459f2841869f937b8cd07f4f25d1bae5 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Aug 2022 02:34:10 +0000 Subject: [PATCH 001/266] add fuzzer for table definition --- docker/test/fuzzer/allow-nullable-key.xml | 6 + docker/test/fuzzer/run-fuzzer.sh | 1 + programs/client/Client.cpp | 188 +++++++++++------ programs/client/Client.h | 1 + src/Client/ClientBase.h | 1 + src/Client/QueryFuzzer.cpp | 245 ++++++++++++++++++++++ src/Client/QueryFuzzer.h | 14 ++ 7 files changed, 390 insertions(+), 66 deletions(-) create mode 100644 docker/test/fuzzer/allow-nullable-key.xml diff --git a/docker/test/fuzzer/allow-nullable-key.xml b/docker/test/fuzzer/allow-nullable-key.xml new file mode 100644 index 00000000000..5a0c2c20e1c --- /dev/null +++ b/docker/test/fuzzer/allow-nullable-key.xml @@ -0,0 +1,6 @@ + + + + 1 + + \ No newline at end of file diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index f74760e3339..393c980afba 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -85,6 +85,7 @@ function configure # TODO figure out which ones are needed cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d + cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d } function watchdog diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 584806951cf..6ef0db93669 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -12,6 +12,7 @@ #include #include "Client.h" #include "Core/Protocol.h" +#include "Parsers/formatAST.h" #include @@ -513,6 +514,66 @@ static bool queryHasWithClause(const IAST & ast) return false; } +std::optional Client::processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query) +{ + processParsedSingleQuery(query_to_execute, query_to_execute, parsed_query); + + const auto * exception = server_exception ? server_exception.get() : client_exception.get(); + // Sometimes you may get TOO_DEEP_RECURSION from the server, + // and TOO_DEEP_RECURSION should not fail the fuzzer check. + if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION) + { + have_error = false; + server_exception.reset(); + client_exception.reset(); + return true; + } + + if (have_error) + { + fmt::print(stderr, "Error on processing query '{}': {}\n", parsed_query->formatForErrorMessage(), exception->message()); + + // Try to reconnect after errors, for two reasons: + // 1. We might not have realized that the server died, e.g. if + // it sent us a trace and closed connection properly. + // 2. The connection might have gotten into a wrong state and + // the next query will get false positive about + // "Unknown packet from server". + try + { + connection->forceConnected(connection_parameters.timeouts); + } + catch (...) + { + // Just report it, we'll terminate below. + fmt::print(stderr, + "Error while reconnecting to the server: {}\n", + getCurrentExceptionMessage(true)); + + // The reconnection might fail, but we'll still be connected + // in the sense of `connection->isConnected() = true`, + // in case when the requested database doesn't exist. + // Disconnect manually now, so that the following code doesn't + // have any doubts, and the connection state is predictable. + connection->disconnect(); + } + } + + if (!connection->isConnected()) + { + // Probably the server is dead because we found an assertion + // failure. Fail fast. + fmt::print(stderr, "Lost connection to the server.\n"); + + // Print the changed settings because they might be needed to + // reproduce the error. + printChangedSettings(); + + return false; + } + + return std::nullopt; +} /// Returns false when server is not available. bool Client::processWithFuzzing(const String & full_query) @@ -557,18 +618,28 @@ bool Client::processWithFuzzing(const String & full_query) // - SET -- The time to fuzz the settings has not yet come // (see comments in Client/QueryFuzzer.cpp) size_t this_query_runs = query_fuzzer_runs; - if (orig_ast->as() || - orig_ast->as() || - orig_ast->as() || - orig_ast->as()) + ASTs inserts_for_fuzzed_tables; + + if (orig_ast->as() || orig_ast->as()) { this_query_runs = 1; } + else if (const auto * create = orig_ast->as()) + { + if (create->columns_list) + this_query_runs = create_query_fuzzer_runs; + else + this_query_runs = 1; + } + else if (const auto * insert = orig_ast->as()) + { + this_query_runs = 1; + inserts_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query); + } String query_to_execute; - ASTPtr parsed_query; - ASTPtr fuzz_base = orig_ast; + for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step) { fmt::print(stderr, "Fuzzing step {} out of {}\n", fuzz_step, this_query_runs); @@ -629,9 +700,9 @@ bool Client::processWithFuzzing(const String & full_query) continue; } - parsed_query = ast_to_process; - query_to_execute = parsed_query->formatForErrorMessage(); - processParsedSingleQuery(full_query, query_to_execute, parsed_query); + query_to_execute = ast_to_process->formatForErrorMessage(); + if (auto res = processFuzzingStep(query_to_execute, ast_to_process)) + return *res; } catch (...) { @@ -644,60 +715,6 @@ bool Client::processWithFuzzing(const String & full_query) have_error = true; } - const auto * exception = server_exception ? server_exception.get() : client_exception.get(); - // Sometimes you may get TOO_DEEP_RECURSION from the server, - // and TOO_DEEP_RECURSION should not fail the fuzzer check. - if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION) - { - have_error = false; - server_exception.reset(); - client_exception.reset(); - return true; - } - - if (have_error) - { - fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message()); - - // Try to reconnect after errors, for two reasons: - // 1. We might not have realized that the server died, e.g. if - // it sent us a trace and closed connection properly. - // 2. The connection might have gotten into a wrong state and - // the next query will get false positive about - // "Unknown packet from server". - try - { - connection->forceConnected(connection_parameters.timeouts); - } - catch (...) - { - // Just report it, we'll terminate below. - fmt::print(stderr, - "Error while reconnecting to the server: {}\n", - getCurrentExceptionMessage(true)); - - // The reconnection might fail, but we'll still be connected - // in the sense of `connection->isConnected() = true`, - // in case when the requested database doesn't exist. - // Disconnect manually now, so that the following code doesn't - // have any doubts, and the connection state is predictable. - connection->disconnect(); - } - } - - if (!connection->isConnected()) - { - // Probably the server is dead because we found an assertion - // failure. Fail fast. - fmt::print(stderr, "Lost connection to the server.\n"); - - // Print the changed settings because they might be needed to - // reproduce the error. - printChangedSettings(); - - return false; - } - // Check that after the query is formatted, we can parse it back, // format again and get the same result. Unfortunately, we can't // compare the ASTs, which would be more sensitive to errors. This @@ -728,13 +745,12 @@ bool Client::processWithFuzzing(const String & full_query) // query, but second and third. // If you have to add any more workarounds to this check, just remove // it altogether, it's not so useful. - if (parsed_query && !have_error && !queryHasWithClause(*parsed_query)) + if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process)) { ASTPtr ast_2; try { const auto * tmp_pos = query_to_execute.c_str(); - ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */); } catch (Exception & e) @@ -761,7 +777,7 @@ bool Client::processWithFuzzing(const String & full_query) "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", text_3, text_2); fmt::print(stderr, "In more detail:\n"); - fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); + fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); @@ -799,6 +815,34 @@ bool Client::processWithFuzzing(const String & full_query) } } + for (const auto & insert_query : inserts_for_fuzzed_tables) + { + std::cout << std::endl; + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*insert_query, ast_buf, false /*highlight*/); + ast_buf.next(); + std::cout << std::endl << std::endl; + + try + { + query_to_execute = insert_query->formatForErrorMessage(); + if (auto res = processFuzzingStep(query_to_execute, insert_query)) + return *res; + } + catch (...) + { + client_exception = std::make_unique(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode()); + have_error = true; + } + + if (have_error) + { + server_exception.reset(); + client_exception.reset(); + have_error = false; + } + } + return true; } @@ -833,6 +877,7 @@ void Client::addOptions(OptionsDescription & options_description) ("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).") ("query-fuzzer-runs", po::value()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.") + ("create-query-fuzzer-runs", po::value()->default_value(0), "") ("interleave-queries-file", po::value>()->multitoken(), "file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)") @@ -985,6 +1030,17 @@ void Client::processOptions(const OptionsDescription & options_description, ignore_error = true; } + if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as())) + { + // Fuzzer implies multiquery. + config().setBool("multiquery", true); + // Ignore errors in parsing queries. + config().setBool("ignore-error", true); + + global_context->setSetting("allow_suspicious_low_cardinality_types", true); + ignore_error = true; + } + if (options.count("opentelemetry-traceparent")) { String traceparent = options["opentelemetry-traceparent"].as(); diff --git a/programs/client/Client.h b/programs/client/Client.h index 1fec282be51..63f28ca96a2 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -17,6 +17,7 @@ public: protected: bool processWithFuzzing(const String & full_query) override; + std::optional processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query); void connect() override; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 6b19c1b8e02..212c9745a14 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -247,6 +247,7 @@ protected: QueryFuzzer fuzzer; int query_fuzzer_runs = 0; + int create_query_fuzzer_runs = 0; struct { diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 787fad5990a..be265e13caa 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -1,4 +1,21 @@ #include "QueryFuzzer.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include @@ -35,6 +52,7 @@ namespace DB namespace ErrorCodes { extern const int TOO_DEEP_RECURSION; + extern const int LOGICAL_ERROR; } Field QueryFuzzer::getRandomField(int type) @@ -398,6 +416,228 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def) } } +void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) +{ + if (create.columns_list && create.columns_list->columns) + { + for (auto & ast : create.columns_list->columns->children) + { + if (auto * column = ast->as()) + { + fuzzColumnDeclaration(*column); + } + } + } + + if (create.storage && create.storage->engine) + { + auto & engine_name = create.storage->engine->name; + if (startsWith(engine_name, "Replicated")) + engine_name = engine_name.substr(strlen("Replicated")); + } + + auto full_name = create.getTable(); + auto original_name = full_name.substr(0, full_name.find("__fuzz_")); + + size_t index = index_of_fuzzed_table[original_name]++; + auto new_name = original_name + "__fuzz_" + toString(index); + + create.setTable(new_name); + + SipHash sip_hash; + sip_hash.update(original_name); + if (create.columns_list) + create.columns_list->updateTreeHash(sip_hash); + if (create.storage) + create.columns_list->updateTreeHash(sip_hash); + + IAST::Hash hash; + sip_hash.get128(hash); + if (created_tables_hashes.insert(hash).second) + original_table_name_to_fuzzed[original_name].push_back(new_name); +} + +void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column) +{ + if (column.type) + { + auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type)); + + ParserDataType parser; + column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + } +} + +DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) +{ + /// Do not replace Array with not Array to often. + const auto * type_array = typeid_cast(type.get()); + if (type_array && fuzz_rand() % 5 != 0) + return std::make_shared(fuzzDataType(type_array->getNestedType())); + + const auto * type_tuple = typeid_cast(type.get()); + if (type_tuple && fuzz_rand() % 5 != 0) + { + DataTypes elements; + for (const auto & element : type_tuple->getElements()) + elements.push_back(fuzzDataType(element)); + + return type_tuple->haveExplicitNames() + ? std::make_shared(elements, type_tuple->getElementNames()) + : std::make_shared(elements); + } + + const auto * type_map = typeid_cast(type.get()); + if (type_map && fuzz_rand() % 5 != 0) + { + auto key_type = fuzzDataType(type_map->getKeyType()); + auto value_type = fuzzDataType(type_map->getValueType()); + if (!DataTypeMap::checkKeyType(key_type)) + key_type = type_map->getKeyType(); + + return std::make_shared(key_type, value_type); + } + + const auto * type_nullable = typeid_cast(type.get()); + if (type_nullable) + { + size_t tmp = fuzz_rand() % 3; + if (tmp == 0) + return type_nullable->getNestedType(); + + if (tmp == 1) + { + auto nested_type = fuzzDataType(type_nullable->getNestedType()); + if (nested_type->canBeInsideNullable()) + return std::make_shared(nested_type); + } + } + + const auto * type_low_cardinality = typeid_cast(type.get()); + if (type_low_cardinality) + { + size_t tmp = fuzz_rand() % 3; + if (tmp == 0) + return type_low_cardinality->getDictionaryType(); + + if (tmp == 1) + { + auto nested_type = fuzzDataType(type_low_cardinality->getDictionaryType()); + if (nested_type->canBeInsideLowCardinality()) + return std::make_shared(nested_type); + } + } + + size_t tmp = fuzz_rand() % 10; + if (tmp <= 1 && type->canBeInsideNullable()) + return std::make_shared(type); + + if (tmp <= 3 && type->canBeInsideLowCardinality()) + return std::make_shared(type); + + if (tmp == 4) + return getRandomType(); + + return type; +} + +DataTypePtr QueryFuzzer::getRandomType() +{ + auto type_id = static_cast(fuzz_rand() % static_cast(TypeIndex::Tuple) + 1); + + if (type_id == TypeIndex::Tuple) + { + size_t tuple_size = fuzz_rand() % 6 + 1; + DataTypes elements; + for (size_t i = 0; i < tuple_size; ++i) + elements.push_back(getRandomType()); + return std::make_shared(elements); + } + + if (type_id == TypeIndex::Array) + return std::make_shared(getRandomType()); + +#define DISPATCH(DECIMAL) \ + if (type_id == TypeIndex::DECIMAL) \ + return std::make_shared>( \ + DataTypeDecimal::maxPrecision(), DataTypeDecimal::maxPrecision()); // NOLINT + + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Decimal256) +#undef DISPATCH + + if (type_id == TypeIndex::FixedString) + return std::make_shared(fuzz_rand() % 20); + + if (type_id == TypeIndex::Enum8) + return std::make_shared(); + + if (type_id == TypeIndex::Enum16) + return std::make_shared(); + + return DataTypeFactory::instance().get(String(magic_enum::enum_name(type_id))); +} + +void QueryFuzzer::fuzzTableName(ASTTableExpression & table) +{ + if (!table.database_and_table_name || fuzz_rand() % 3 == 0) + return; + + const auto * identifier = table.database_and_table_name->as(); + if (!identifier) + return; + + auto table_id = identifier->getTableId(); + if (table_id.empty()) + return; + + auto it = original_table_name_to_fuzzed.find(table_id.getTableName()); + if (it != original_table_name_to_fuzzed.end() && !it->second.empty()) + { + const auto & new_table_name = it->second[fuzz_rand() % it->second.size()]; + StorageID new_table_id(table_id.database_name, new_table_name); + table.database_and_table_name = std::make_shared(new_table_id); + } +} + +static ASTPtr tryParseInsertQuery(const String & full_query) +{ + const char * pos = full_query.data(); + const char * end = full_query.data() + full_query.size(); + + ParserInsertQuery parser(end, false); + String message; + + return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); +} + +ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) +{ + auto parsed_query = tryParseInsertQuery(full_query); + if (!parsed_query) + return {}; + + const auto & insert = *parsed_query->as(); + if (!insert.table) + return {}; + + auto table_name = insert.getTable(); + auto it = original_table_name_to_fuzzed.find(table_name); + if (it == original_table_name_to_fuzzed.end()) + return {}; + + ASTs queries; + for (const auto & fuzzed_name : it->second) + { + auto & query = queries.emplace_back(tryParseInsertQuery(full_query)); + query->as()->setTable(fuzzed_name); + } + + return queries; +} + void QueryFuzzer::fuzz(ASTs & asts) { for (auto & ast : asts) @@ -465,6 +705,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast) } else if (auto * table_expr = typeid_cast(ast.get())) { + fuzzTableName(*table_expr); fuzz(table_expr->children); } else if (auto * expr_list = typeid_cast(ast.get())) @@ -531,6 +772,10 @@ void QueryFuzzer::fuzz(ASTPtr & ast) literal->value = fuzzField(literal->value); } } + else if (auto * create_query = typeid_cast(ast.get())) + { + fuzzCreateQuery(*create_query); + } else { fuzz(ast->children); diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h index 25bd7f0c88d..40c203b6351 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -16,6 +17,10 @@ namespace DB class ASTExpressionList; class ASTOrderByElement; +class ASTCreateQuery; +class ASTInsertQuery; +class ASTColumnDeclaration; +struct ASTTableExpression; struct ASTWindowDefinition; /* @@ -54,6 +59,9 @@ struct QueryFuzzer std::unordered_set debug_visited_nodes; ASTPtr * debug_top_ast = nullptr; + std::unordered_map> original_table_name_to_fuzzed; + std::unordered_map index_of_fuzzed_table; + std::set created_tables_hashes; // This is the only function you have to call -- it will modify the passed // ASTPtr to point to new AST with some random changes. @@ -63,12 +71,18 @@ struct QueryFuzzer Field getRandomField(int type); Field fuzzField(Field field); ASTPtr getRandomColumnLike(); + DataTypePtr fuzzDataType(DataTypePtr type); + DataTypePtr getRandomType(); + ASTs getInsertQueriesForFuzzedTables(const String & full_query); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); void fuzzOrderByList(IAST * ast); void fuzzColumnLikeExpressionList(IAST * ast); void fuzzWindowFrame(ASTWindowDefinition & def); + void fuzzCreateQuery(ASTCreateQuery & create); + void fuzzColumnDeclaration(ASTColumnDeclaration & column); + void fuzzTableName(ASTTableExpression & table); void fuzz(ASTs & asts); void fuzz(ASTPtr & ast); void collectFuzzInfoMain(ASTPtr ast); From 00144bb6a93dec87f76d13891b2e17b0e2fc6734 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Aug 2022 02:43:47 +0000 Subject: [PATCH 002/266] fix style check --- src/Client/QueryFuzzer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index be265e13caa..e1b72b0f830 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -52,7 +52,6 @@ namespace DB namespace ErrorCodes { extern const int TOO_DEEP_RECURSION; - extern const int LOGICAL_ERROR; } Field QueryFuzzer::getRandomField(int type) From 0ba886f763ee7f74a95031db92605c4c3465b254 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Aug 2022 12:24:15 +0000 Subject: [PATCH 003/266] enable fuzzing of table definitions --- docker/test/fuzzer/run-fuzzer.sh | 1 + src/Client/QueryFuzzer.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 393c980afba..d8ce62348d2 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -220,6 +220,7 @@ quit --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ + --create-query-fuzzer-runs=30 \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index e1b72b0f830..995b4b455b0 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -448,7 +448,7 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) if (create.columns_list) create.columns_list->updateTreeHash(sip_hash); if (create.storage) - create.columns_list->updateTreeHash(sip_hash); + create.storage->updateTreeHash(sip_hash); IAST::Hash hash; sip_hash.get128(hash); @@ -558,7 +558,7 @@ DataTypePtr QueryFuzzer::getRandomType() #define DISPATCH(DECIMAL) \ if (type_id == TypeIndex::DECIMAL) \ - return std::make_shared>( \ + return std::make_shared>( \ // NOLINT DataTypeDecimal::maxPrecision(), DataTypeDecimal::maxPrecision()); // NOLINT DISPATCH(Decimal32) From 0d74ae33632a6ec1a2d2ce41bbe42bf8bddb376a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Aug 2022 13:39:15 +0000 Subject: [PATCH 004/266] fix build --- src/Client/QueryFuzzer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 995b4b455b0..73637a8b4c2 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -556,10 +556,12 @@ DataTypePtr QueryFuzzer::getRandomType() if (type_id == TypeIndex::Array) return std::make_shared(getRandomType()); +/// NOLINTNEXTLINE #define DISPATCH(DECIMAL) \ if (type_id == TypeIndex::DECIMAL) \ - return std::make_shared>( \ // NOLINT - DataTypeDecimal::maxPrecision(), DataTypeDecimal::maxPrecision()); // NOLINT + return std::make_shared>( \ + DataTypeDecimal::maxPrecision(), \ + fuzz_rand() % DataTypeDecimal::maxPrecision() + 1); DISPATCH(Decimal32) DISPATCH(Decimal64) From 038325038331577597845eb514f10ea8e5265729 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 12 Aug 2022 00:32:21 +0000 Subject: [PATCH 005/266] execute DROP queries for fuzzed tables --- programs/client/Client.cpp | 13 +++++++++---- src/Client/QueryFuzzer.cpp | 24 ++++++++++++++++++++++++ src/Client/QueryFuzzer.h | 2 ++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 6ef0db93669..a0eccae957b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -618,9 +618,9 @@ bool Client::processWithFuzzing(const String & full_query) // - SET -- The time to fuzz the settings has not yet come // (see comments in Client/QueryFuzzer.cpp) size_t this_query_runs = query_fuzzer_runs; - ASTs inserts_for_fuzzed_tables; + ASTs queries_for_fuzzed_tables; - if (orig_ast->as() || orig_ast->as()) + if (orig_ast->as()) { this_query_runs = 1; } @@ -634,7 +634,12 @@ bool Client::processWithFuzzing(const String & full_query) else if (const auto * insert = orig_ast->as()) { this_query_runs = 1; - inserts_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query); + queries_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query); + } + else if (const auto * drop = orig_ast->as()) + { + this_query_runs = 1; + queries_for_fuzzed_tables = fuzzer.getDropQueriesForFuzzedTables(*drop); } String query_to_execute; @@ -815,7 +820,7 @@ bool Client::processWithFuzzing(const String & full_query) } } - for (const auto & insert_query : inserts_for_fuzzed_tables) + for (const auto & insert_query : queries_for_fuzzed_tables) { std::cout << std::endl; WriteBufferFromOStream ast_buf(std::cout, 4096); diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 73637a8b4c2..0035bd7e990 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -632,6 +633,9 @@ ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) ASTs queries; for (const auto & fuzzed_name : it->second) { + /// Parse query from scratch for each table instead of clone, + /// to store proper pointers to inlined data, + /// which are not copies during clone. auto & query = queries.emplace_back(tryParseInsertQuery(full_query)); query->as()->setTable(fuzzed_name); } @@ -639,6 +643,26 @@ ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) return queries; } +ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query) +{ + if (drop_query.kind != ASTDropQuery::Drop) + return {}; + + auto table_name = drop_query.getTable(); + auto it = original_table_name_to_fuzzed.find(table_name); + if (it == original_table_name_to_fuzzed.end()) + return {}; + + ASTs queries; + for (const auto & fuzzed_name : it->second) + { + auto & query = queries.emplace_back(drop_query.clone()); + query->as()->setTable(fuzzed_name); + } + + return queries; +} + void QueryFuzzer::fuzz(ASTs & asts) { for (auto & ast : asts) diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h index 40c203b6351..ffe9fc7c91e 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -20,6 +20,7 @@ class ASTOrderByElement; class ASTCreateQuery; class ASTInsertQuery; class ASTColumnDeclaration; +class ASTDropQuery; struct ASTTableExpression; struct ASTWindowDefinition; @@ -74,6 +75,7 @@ struct QueryFuzzer DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); From e7959d74bceed8a6c753e91b7ab7b29ed0ecd107 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 1 Sep 2022 22:06:54 +0000 Subject: [PATCH 006/266] add composable factory and protocol --- src/Server/TCPProtocolStack.h | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 src/Server/TCPProtocolStack.h diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h new file mode 100644 index 00000000000..c7c3e73acab --- /dev/null +++ b/src/Server/TCPProtocolStack.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base/types.h" + + +namespace DB +{ + +class TCPProtocolStack : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +private: + TCPServer & tcp_server; + +public: + TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket) : TCPServerConnection(socket), tcp_server(tcp_server_) {} + + void append(std::unique_ptr factory) + { + stack.emplace_back(std::move(factory)); + } + + void run() override + { + for (auto & factory : stack) + { + std::unique_ptr connection(factory->createConnection(socket(), tcp_server)); + connection->run(); + } + } + +private: + std::list> stack; +}; + + +class TCPProtocolStackFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string server_display_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TCPProtocolStackFactory(IServer & server_) : + server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + { + server_display_name = server.config().getString("display_name", getFQDNOrHostName()); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStack(tcp_server, socket); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + + +} From b6e3680d60a3a94b377ae1c65cda01feffcd30e1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 2 Sep 2022 04:47:04 +0000 Subject: [PATCH 007/266] test with TLS --- programs/server/Server.cpp | 22 ++++++++- src/Server/TCPProtocolStack.h | 92 ++++++++++++++++++++++++++++++----- 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b621a897035..1633dec6865 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -88,6 +88,8 @@ #include #include +#include + #include "config_core.h" #include "Common/config_version.h" @@ -1952,19 +1954,35 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL - Poco::Net::SecureServerSocket socket; + //Poco::Net::SecureServerSocket socket; + Poco::Net::ServerSocket socket; auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); + + TCPProtocolStackFactory *stack = new TCPProtocolStackFactory(*this); + stack->append(new TLSHandlerFactory(*this)); + stack->append(new TCPHandlerFactory(*this, false, false)); return ProtocolServerAdapter( listen_host, port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + stack, server_pool, socket, new Poco::Net::TCPServerParams)); +/* + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, true, false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); +*/ #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index c7c3e73acab..e1c39fbf8eb 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -11,6 +11,9 @@ #include #include #include +#include + +#include "Poco/Net/SSLManager.h" #include "base/types.h" @@ -18,20 +21,25 @@ namespace DB { +class TCPConnectionAccessor : public Poco::Net::TCPServerConnection +{ +public: + using Poco::Net::TCPServerConnection::socket; + explicit TCPConnectionAccessor(const Poco::Net::StreamSocket & socket) : Poco::Net::TCPServerConnection(socket) {} +}; + class TCPProtocolStack : public Poco::Net::TCPServerConnection { using StreamSocket = Poco::Net::StreamSocket; using TCPServerConnection = Poco::Net::TCPServerConnection; private: TCPServer & tcp_server; + std::list stack; public: - TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket) : TCPServerConnection(socket), tcp_server(tcp_server_) {} - - void append(std::unique_ptr factory) - { - stack.emplace_back(std::move(factory)); - } + TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_) + : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_) + {} void run() override { @@ -39,16 +47,74 @@ public: { std::unique_ptr connection(factory->createConnection(socket(), tcp_server)); connection->run(); + if (auto * accessor = dynamic_cast(connection.get()); accessor) + socket() = accessor->socket(); } } - -private: - std::list> stack; }; class TCPProtocolStackFactory : public TCPServerConnectionFactory { +private: + IServer & server; + Poco::Logger * log; + std::string server_display_name; + std::list stack; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TCPProtocolStackFactory(IServer & server_) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + { + server_display_name = server.config().getString("display_name", getFQDNOrHostName()); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStack(tcp_server, socket, stack); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } + + void append(TCPServerConnectionFactory::Ptr factory) + { + stack.push_back(factory); + } +}; + + + +class TLSHandler : public TCPConnectionAccessor +{ + using StreamSocket = Poco::Net::StreamSocket; + using SecureStreamSocket = Poco::Net::SecureStreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +public: + explicit TLSHandler(const StreamSocket & socket) : TCPConnectionAccessor(socket) {} + + void run() override + { + socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + } +}; + + +class TLSHandlerFactory : public TCPServerConnectionFactory +{ private: IServer & server; Poco::Logger * log; @@ -62,18 +128,18 @@ private: }; public: - explicit TCPProtocolStackFactory(IServer & server_) : - server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + explicit TLSHandlerFactory(IServer & server_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/) override { try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStack(tcp_server, socket); + return new TLSHandler(socket); } catch (const Poco::Net::NetException &) { From 5727517713455d28ae554e2cc44ecff3f5144eb3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 2 Sep 2022 16:22:57 +0000 Subject: [PATCH 008/266] add variadic constructor --- programs/server/Server.cpp | 14 ++++++++++++++ src/Server/TCPProtocolStack.h | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 1633dec6865..4b74d724b85 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1960,9 +1960,11 @@ void Server::createServers( socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); +/* TCPProtocolStackFactory *stack = new TCPProtocolStackFactory(*this); stack->append(new TLSHandlerFactory(*this)); stack->append(new TCPHandlerFactory(*this, false, false)); + return ProtocolServerAdapter( listen_host, port_name, @@ -1972,6 +1974,18 @@ void Server::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); +*/ + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPProtocolStackFactory(*this, new TLSHandlerFactory(*this), new TCPHandlerFactory(*this, false, false)), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + + /* return ProtocolServerAdapter( listen_host, diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index e1c39fbf8eb..c72dfd98f53 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -70,8 +70,9 @@ private: }; public: - explicit TCPProtocolStackFactory(IServer & server_) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + template + explicit TCPProtocolStackFactory(IServer & server_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), stack({factory...}) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } @@ -102,7 +103,6 @@ class TLSHandler : public TCPConnectionAccessor { using StreamSocket = Poco::Net::StreamSocket; using SecureStreamSocket = Poco::Net::SecureStreamSocket; - using TCPServerConnection = Poco::Net::TCPServerConnection; public: explicit TLSHandler(const StreamSocket & socket) : TCPConnectionAccessor(socket) {} From 8a7fe2888a9c6526c24437e2de628e40864cf7ae Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 8 Sep 2022 06:12:33 +0000 Subject: [PATCH 009/266] protocols configuration processing --- programs/server/Server.cpp | 109 +++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4b74d724b85..df6b40cd347 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1870,6 +1871,114 @@ void Server::createServers( http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); + + + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + + auto createFactory = [&](const std::string & type) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr + { + if (type == "tcp") + return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); + if (type == "tls") + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this)); + if (type == "mysql") + return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); + if (type == "postgres") + return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); + if (type == "http") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "HTTPHandler-factory")) + ); + if (type == "prometheus") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory")) + ); + if (type == "interserver") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) + ); + + + throw Exception("LOGICAL ERROR: Unknown protocol name.", ErrorCodes::LOGICAL_ERROR); + }; + + for (const auto & protocol : protocols) + { + std::string prefix = protocol + "."; + + if (config.has(prefix + "host") && config.has(prefix + "port")) + { + + std::string port_name = prefix + "port"; + std::string listen_host = prefix + "host"; + bool is_secure = false; + auto stack = std::make_unique(*this); + while (true) + { + if (!config.has(prefix + "type")) + { + // misconfigured - lack of "type" + stack.reset(); + break; + } + + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + { + // misconfigured - only one tls layer is allowed + stack.reset(); + break; + } + is_secure = true; + } + + TCPServerConnectionFactory::Ptr factory = createFactory(type); + if (!factory) + { + // misconfigured - protocol "type" doesn't exist + stack.reset(); + break; + } + + stack->append(factory); + + if (!config.has(prefix + "impl")) + { + stack->append(createFactory("tcp")); + break; + } + prefix = "protocols." + config.getString(prefix + "impl"); + } + + if (!stack) + continue; + + createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name.c_str(), + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } + } + + + + for (const auto & listen_host : listen_hosts) { /// HTTP From 772bf050da081aeef04814ea5420bf055299fe1a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 10 Sep 2022 20:21:37 +0000 Subject: [PATCH 010/266] add PROXYv1 handler, add stack exchange data block, tuneup protocols config --- programs/server/Server.cpp | 56 ++++--- src/Server/TCPHandler.cpp | 10 ++ src/Server/TCPHandler.h | 2 + src/Server/TCPHandlerFactory.h | 16 ++ src/Server/TCPProtocolStack.h | 202 +++++++++++++++++++++++- src/Server/TCPProtocolStackData.h | 15 ++ src/Server/TCPServerConnectionFactory.h | 5 + 7 files changed, 274 insertions(+), 32 deletions(-) create mode 100644 src/Server/TCPProtocolStackData.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index df6b40cd347..c86a33ba60c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -71,6 +71,7 @@ #include #include #include +#include #include #include "MetricsTransmitter.h" #include @@ -88,6 +89,7 @@ #include #include #include +#include #include @@ -1882,6 +1884,8 @@ void Server::createServers( return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); if (type == "tls") return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this)); + if (type == "proxy1") + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this)); if (type == "mysql") return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); if (type == "postgres") @@ -1906,51 +1910,53 @@ void Server::createServers( for (const auto & protocol : protocols) { std::string prefix = protocol + "."; + std::unordered_set pset {prefix}; if (config.has(prefix + "host") && config.has(prefix + "port")) { - std::string port_name = prefix + "port"; std::string listen_host = prefix + "host"; bool is_secure = false; auto stack = std::make_unique(*this); while (true) { - if (!config.has(prefix + "type")) + // if there is no "type" - it's a reference to another protocol and this is just another endpoint + if (config.has(prefix + "type")) { - // misconfigured - lack of "type" - stack.reset(); - break; - } - - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) + std::string type = config.getString(prefix + "type"); + if (type == "tls") { - // misconfigured - only one tls layer is allowed + if (is_secure) + { + // misconfigured - only one tls layer is allowed + stack.reset(); + break; + } + is_secure = true; + } + + TCPServerConnectionFactory::Ptr factory = createFactory(type); + if (!factory) + { + // misconfigured - protocol type doesn't exist stack.reset(); break; } - is_secure = true; + + stack->append(factory); + + if (!config.has(prefix + "impl")) + break; } - TCPServerConnectionFactory::Ptr factory = createFactory(type); - if (!factory) + prefix = "protocols." + config.getString(prefix + "impl") + "."; + + if (!pset.insert(prefix).second) { - // misconfigured - protocol "type" doesn't exist + // misconfigured - loop is detected stack.reset(); break; } - - stack->append(factory); - - if (!config.has(prefix + "impl")) - { - stack->append(createFactory("tcp")); - break; - } - prefix = "protocols." + config.getString(prefix + "impl"); } if (!stack) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1fc88168b35..44b6cfdd628 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -109,6 +109,16 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N { } +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_) +: Poco::Net::TCPServerConnection(socket_) + , server(server_) + , tcp_server(tcp_server_) + , log(&Poco::Logger::get("TCPHandler")) + , forwarded_for(stack_data.forwarded_for) + , server_display_name(std::move(server_display_name_)) +{ +} + TCPHandler::~TCPHandler() { try diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index ea5fb2f9fe0..13c3c5f70c1 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -22,6 +22,7 @@ #include #include "IServer.h" +#include "Server/TCPProtocolStackData.h" #include "base/types.h" @@ -137,6 +138,7 @@ public: * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_); ~TCPHandler() override; void run() override; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 354c886f4c0..fde04c6e0ab 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include "Server/TCPProtocolStackData.h" #include #include #include @@ -53,6 +54,21 @@ public: return new DummyTCPHandler(socket); } } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + + return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } }; } diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index c72dfd98f53..21687898d45 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -14,13 +15,24 @@ #include #include "Poco/Net/SSLManager.h" +#include +#include "Interpreters/Context.h" +#include "Server/TCPProtocolStackData.h" #include "base/types.h" namespace DB { +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_READ_FROM_SOCKET; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; +} + class TCPConnectionAccessor : public Poco::Net::TCPServerConnection { public: @@ -43,12 +55,16 @@ public: void run() override { + TCPProtocolStackData stack_data; + stack_data.socket = socket(); for (auto & factory : stack) { - std::unique_ptr connection(factory->createConnection(socket(), tcp_server)); + std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); connection->run(); - if (auto * accessor = dynamic_cast(connection.get()); accessor) - socket() = accessor->socket(); + if (stack_data.socket != socket()) + socket() = stack_data.socket; +// if (auto * accessor = dynamic_cast(connection.get()); accessor) + // socket() = accessor->socket(); } } }; @@ -99,17 +115,23 @@ public: -class TLSHandler : public TCPConnectionAccessor +class TLSHandler : public Poco::Net::TCPServerConnection //TCPConnectionAccessor { using StreamSocket = Poco::Net::StreamSocket; using SecureStreamSocket = Poco::Net::SecureStreamSocket; public: - explicit TLSHandler(const StreamSocket & socket) : TCPConnectionAccessor(socket) {} + explicit TLSHandler(const StreamSocket & socket, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) //TCPConnectionAccessor(socket) + , stack_data(stack_data_) + {} void run() override { socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + stack_data.socket = socket(); } +private: + TCPProtocolStackData & stack_data; }; @@ -134,12 +156,18 @@ public: server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/) override + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override { try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket); + return new TLSHandler(socket, stack_data); } catch (const Poco::Net::NetException &) { @@ -150,4 +178,164 @@ public: }; +class ProxyV1Handler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; +public: + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), stack_data(stack_data_) {} + + void run() override + { + const auto & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + + std::string word; + bool eol; + + // Read PROXYv1 protocol header + // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + + // read "PROXY" + if (!readWord(5, word, eol) || word != "PROXY" || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read "TCP4" or "TCP6" or "UNKNOWN" + if (!readWord(7, word, eol)) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word == "UNKNOWN" && eol) + return; + + if (eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + stack_data.forwarded_for = std::move(word); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port + if (!readWord(5, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port and "\r\n" + if (!readWord(5, word, eol) || !eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + } + +protected: + bool readWord(int max_len, std::string & word, bool & eol) + { + word.clear(); + eol = false; + + char ch = 0; + int n = 0; + bool is_cr = false; + try + { + for (++max_len; max_len > 0 || is_cr; --max_len) + { + n = socket().receiveBytes(&ch, 1); + if (n == 0) + { + socket().shutdown(); + return false; + } + if (n < 0) + break; + + if (is_cr) + return ch == 0x0A; + + if (ch == 0x0D) + { + is_cr = true; + eol = true; + continue; + } + + if (ch == ' ') + return true; + + word.push_back(ch); + } + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + socket().peerAddress().toString(), + socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (n < 0) + throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + + return false; + } + +private: + IServer & server; + TCPProtocolStackData & stack_data; +}; + +class ProxyV1HandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string server_display_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit ProxyV1HandlerFactory(IServer & server_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")) + { + server_display_name = server.config().getString("display_name", getFQDNOrHostName()); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new ProxyV1Handler(socket, server, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + } diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h new file mode 100644 index 00000000000..bc90de8c678 --- /dev/null +++ b/src/Server/TCPProtocolStackData.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace DB +{ + +struct TCPProtocolStackData +{ + Poco::Net::StreamSocket socket; + std::string forwarded_for; +}; + +} diff --git a/src/Server/TCPServerConnectionFactory.h b/src/Server/TCPServerConnectionFactory.h index 613f98352bd..ab9b0848ed7 100644 --- a/src/Server/TCPServerConnectionFactory.h +++ b/src/Server/TCPServerConnectionFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Server/TCPProtocolStackData.h" namespace Poco { @@ -23,5 +24,9 @@ public: /// Same as Poco::Net::TCPServerConnectionFactory except we can pass the TCPServer virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) = 0; + virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData &/* stack_data */) + { + return createConnection(socket, tcp_server); + } }; } From d001baec873b1bb13d633cee0b5f6204918efb70 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 00:40:40 +0000 Subject: [PATCH 011/266] pass section config key to a factory --- programs/server/Server.cpp | 19 ++++++++------- src/Server/TCPProtocolStack.h | 45 ++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c86a33ba60c..205e30a2c65 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1878,14 +1878,14 @@ void Server::createServers( Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); - auto createFactory = [&](const std::string & type) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr + auto createFactory = [&](const std::string & type, const std::string & conf_name) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); if (type == "tls") - return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this)); + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); if (type == "proxy1") - return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this)); + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); if (type == "mysql") return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); if (type == "postgres") @@ -1903,12 +1903,12 @@ void Server::createServers( new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) ); - throw Exception("LOGICAL ERROR: Unknown protocol name.", ErrorCodes::LOGICAL_ERROR); }; for (const auto & protocol : protocols) { + std::string conf_name = protocol; std::string prefix = protocol + "."; std::unordered_set pset {prefix}; @@ -1917,10 +1917,10 @@ void Server::createServers( std::string port_name = prefix + "port"; std::string listen_host = prefix + "host"; bool is_secure = false; - auto stack = std::make_unique(*this); + auto stack = std::make_unique(*this, conf_name); while (true) { - // if there is no "type" - it's a reference to another protocol and this is just another endpoint + // if there is no "type" - it's a reference to another protocol and this is just an endpoint if (config.has(prefix + "type")) { std::string type = config.getString(prefix + "type"); @@ -1935,7 +1935,7 @@ void Server::createServers( is_secure = true; } - TCPServerConnectionFactory::Ptr factory = createFactory(type); + TCPServerConnectionFactory::Ptr factory = createFactory(type, conf_name); if (!factory) { // misconfigured - protocol type doesn't exist @@ -1949,7 +1949,8 @@ void Server::createServers( break; } - prefix = "protocols." + config.getString(prefix + "impl") + "."; + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; if (!pset.insert(prefix).second) { @@ -2095,7 +2096,7 @@ void Server::createServers( port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPProtocolStackFactory(*this, new TLSHandlerFactory(*this), new TCPHandlerFactory(*this, false, false)), + new TCPProtocolStackFactory(*this, "", new TLSHandlerFactory(*this, ""), new TCPHandlerFactory(*this, false, false)), server_pool, socket, new Poco::Net::TCPServerParams)); diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index 21687898d45..0804d78336b 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -47,10 +47,11 @@ class TCPProtocolStack : public Poco::Net::TCPServerConnection private: TCPServer & tcp_server; std::list stack; + std::string conf_name; public: - TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_) - : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_) + TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) {} void run() override @@ -73,9 +74,9 @@ public: class TCPProtocolStackFactory : public TCPServerConnectionFactory { private: - IServer & server; + IServer & server [[maybe_unused]]; Poco::Logger * log; - std::string server_display_name; + std::string conf_name; std::list stack; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -87,10 +88,9 @@ private: public: template - explicit TCPProtocolStackFactory(IServer & server_, T... factory) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), stack({factory...}) + explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) { - server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override @@ -98,7 +98,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStack(tcp_server, socket, stack); + return new TCPProtocolStack(tcp_server, socket, stack, conf_name); } catch (const Poco::Net::NetException &) { @@ -120,8 +120,9 @@ class TLSHandler : public Poco::Net::TCPServerConnection //TCPConnectionAccessor using StreamSocket = Poco::Net::StreamSocket; using SecureStreamSocket = Poco::Net::SecureStreamSocket; public: - explicit TLSHandler(const StreamSocket & socket, TCPProtocolStackData & stack_data_) + explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) : Poco::Net::TCPServerConnection(socket) //TCPConnectionAccessor(socket) + , conf_name(conf_name_) , stack_data(stack_data_) {} @@ -131,6 +132,7 @@ public: stack_data.socket = socket(); } private: + std::string conf_name; TCPProtocolStackData & stack_data; }; @@ -138,9 +140,9 @@ private: class TLSHandlerFactory : public TCPServerConnectionFactory { private: - IServer & server; + IServer & server [[maybe_unused]]; Poco::Logger * log; - std::string server_display_name; + std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -150,10 +152,9 @@ private: }; public: - explicit TLSHandlerFactory(IServer & server_) - : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")) + explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) { - server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override @@ -167,7 +168,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket, stack_data); + return new TLSHandler(socket, conf_name, stack_data); } catch (const Poco::Net::NetException &) { @@ -182,8 +183,8 @@ class ProxyV1Handler : public Poco::Net::TCPServerConnection { using StreamSocket = Poco::Net::StreamSocket; public: - explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket), server(server_), stack_data(stack_data_) {} + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} void run() override { @@ -293,6 +294,7 @@ protected: private: IServer & server; + std::string conf_name; TCPProtocolStackData & stack_data; }; @@ -301,7 +303,7 @@ class ProxyV1HandlerFactory : public TCPServerConnectionFactory private: IServer & server; Poco::Logger * log; - std::string server_display_name; + std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -311,10 +313,9 @@ private: }; public: - explicit ProxyV1HandlerFactory(IServer & server_) - : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")) + explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) { - server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override @@ -328,7 +329,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new ProxyV1Handler(socket, server, stack_data); + return new ProxyV1Handler(socket, server, conf_name, stack_data); } catch (const Poco::Net::NetException &) { From c3ac0c434bb3b967df5fb567ada6486ac9dcbda3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 04:40:58 +0000 Subject: [PATCH 012/266] some refactoring --- programs/server/Server.cpp | 31 +++++++++---------------------- src/Server/TCPProtocolStack.h | 2 ++ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 205e30a2c65..28f0e34eb73 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1903,7 +1903,7 @@ void Server::createServers( new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) ); - throw Exception("LOGICAL ERROR: Unknown protocol name.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); }; for (const auto & protocol : protocols) @@ -1927,40 +1927,27 @@ void Server::createServers( if (type == "tls") { if (is_secure) - { - // misconfigured - only one tls layer is allowed - stack.reset(); - break; - } + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); is_secure = true; } - TCPServerConnectionFactory::Ptr factory = createFactory(type, conf_name); - if (!factory) - { - // misconfigured - protocol type doesn't exist - stack.reset(); - break; - } - - stack->append(factory); - - if (!config.has(prefix + "impl")) - break; + stack->append(createFactory(type, conf_name)); } + if (!config.has(prefix + "impl")) + break; + conf_name = "protocols." + config.getString(prefix + "impl"); prefix = conf_name + "."; - if (!pset.insert(prefix).second) + if (!pset.insert(conf_name).second) { // misconfigured - loop is detected - stack.reset(); - break; + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); } } - if (!stack) + if (!stack || stack->size() == 0) continue; createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index 0804d78336b..85896f7f416 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -111,6 +111,8 @@ public: { stack.push_back(factory); } + + size_t size() { return stack.size(); } }; From 745e759146a03237412909a824653227e3fe3460 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 14:32:37 +0000 Subject: [PATCH 013/266] bugs fixed, cleanup, working state --- programs/server/Server.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 28f0e34eb73..ff5750946ca 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1873,12 +1873,10 @@ void Server::createServers( http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); - - Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); - auto createFactory = [&](const std::string & type, const std::string & conf_name) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr + auto createFactory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); @@ -1908,16 +1906,20 @@ void Server::createServers( for (const auto & protocol : protocols) { - std::string conf_name = protocol; - std::string prefix = protocol + "."; + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; std::unordered_set pset {prefix}; if (config.has(prefix + "host") && config.has(prefix + "port")) { + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); std::string port_name = prefix + "port"; - std::string listen_host = prefix + "host"; + std::string listen_host = config.getString(prefix + "host"); bool is_secure = false; auto stack = std::make_unique(*this, conf_name); + while (true) { // if there is no "type" - it's a reference to another protocol and this is just an endpoint @@ -1941,14 +1943,11 @@ void Server::createServers( prefix = conf_name + "."; if (!pset.insert(conf_name).second) - { - // misconfigured - loop is detected throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } } if (!stack || stack->size() == 0) - continue; + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { @@ -1960,7 +1959,7 @@ void Server::createServers( return ProtocolServerAdapter( listen_host, port_name.c_str(), - "secure native protocol (tcp_secure): " + address.toString(), + description + ": " + address.toString(), std::make_unique( stack.release(), server_pool, @@ -1969,9 +1968,6 @@ void Server::createServers( }); } } - - - for (const auto & listen_host : listen_hosts) { From 0c62b5acfcb431793726a8e4f49d68497b6f4bd7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 19:00:40 +0000 Subject: [PATCH 014/266] split into diferent files --- programs/server/Server.cpp | 4 +- src/Server/ProxyV1Handler.cpp | 126 ++++++++++ src/Server/ProxyV1Handler.h | 29 +++ src/Server/ProxyV1HandlerFactory.h | 56 +++++ src/Server/TCPProtocolStack.h | 344 --------------------------- src/Server/TCPProtocolStackFactory.h | 57 +++++ src/Server/TCPProtocolStackHandler.h | 42 ++++ src/Server/TLSHandler.h | 35 +++ src/Server/TLSHandlerFactory.h | 59 +++++ 9 files changed, 407 insertions(+), 345 deletions(-) create mode 100644 src/Server/ProxyV1Handler.cpp create mode 100644 src/Server/ProxyV1Handler.h create mode 100644 src/Server/ProxyV1HandlerFactory.h delete mode 100644 src/Server/TCPProtocolStack.h create mode 100644 src/Server/TCPProtocolStackFactory.h create mode 100644 src/Server/TCPProtocolStackHandler.h create mode 100644 src/Server/TLSHandler.h create mode 100644 src/Server/TLSHandlerFactory.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ff5750946ca..e5f62f2f885 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -84,6 +84,8 @@ #include #include #include +#include +#include #include #include #include @@ -91,7 +93,7 @@ #include #include -#include +#include #include "config_core.h" #include "Common/config_version.h" diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp new file mode 100644 index 00000000000..b3ed8b7bd60 --- /dev/null +++ b/src/Server/ProxyV1Handler.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_READ_FROM_SOCKET; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; +} + + +void ProxyV1Handler::run() +{ + const auto & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + + std::string word; + bool eol; + + // Read PROXYv1 protocol header + // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + + // read "PROXY" + if (!readWord(5, word, eol) || word != "PROXY" || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read "TCP4" or "TCP6" or "UNKNOWN" + if (!readWord(7, word, eol)) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word == "UNKNOWN" && eol) + return; + + if (eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + stack_data.forwarded_for = std::move(word); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port + if (!readWord(5, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port and "\r\n" + if (!readWord(5, word, eol) || !eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); +} + +bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) +{ + word.clear(); + eol = false; + + char ch = 0; + int n = 0; + bool is_cr = false; + try + { + for (++max_len; max_len > 0 || is_cr; --max_len) + { + n = socket().receiveBytes(&ch, 1); + if (n == 0) + { + socket().shutdown(); + return false; + } + if (n < 0) + break; + + if (is_cr) + return ch == 0x0A; + + if (ch == 0x0D) + { + is_cr = true; + eol = true; + continue; + } + + if (ch == ' ') + return true; + + word.push_back(ch); + } + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + socket().peerAddress().toString(), + socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (n < 0) + throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + + return false; +} + + + +} diff --git a/src/Server/ProxyV1Handler.h b/src/Server/ProxyV1Handler.h new file mode 100644 index 00000000000..062cc0e291a --- /dev/null +++ b/src/Server/ProxyV1Handler.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ProxyV1Handler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; +public: + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} + + void run() override; + +protected: + bool readWord(int max_len, std::string & word, bool & eol); + +private: + IServer & server; + std::string conf_name; + TCPProtocolStackData & stack_data; +}; + +} diff --git a/src/Server/ProxyV1HandlerFactory.h b/src/Server/ProxyV1HandlerFactory.h new file mode 100644 index 00000000000..028596d745d --- /dev/null +++ b/src/Server/ProxyV1HandlerFactory.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class ProxyV1HandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string conf_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new ProxyV1Handler(socket, server, conf_name, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + +} diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h deleted file mode 100644 index 85896f7f416..00000000000 --- a/src/Server/TCPProtocolStack.h +++ /dev/null @@ -1,344 +0,0 @@ -#pragma once - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Poco/Net/SSLManager.h" -#include - -#include "Interpreters/Context.h" -#include "Server/TCPProtocolStackData.h" -#include "base/types.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NETWORK_ERROR; - extern const int SOCKET_TIMEOUT; - extern const int CANNOT_READ_FROM_SOCKET; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; -} - -class TCPConnectionAccessor : public Poco::Net::TCPServerConnection -{ -public: - using Poco::Net::TCPServerConnection::socket; - explicit TCPConnectionAccessor(const Poco::Net::StreamSocket & socket) : Poco::Net::TCPServerConnection(socket) {} -}; - -class TCPProtocolStack : public Poco::Net::TCPServerConnection -{ - using StreamSocket = Poco::Net::StreamSocket; - using TCPServerConnection = Poco::Net::TCPServerConnection; -private: - TCPServer & tcp_server; - std::list stack; - std::string conf_name; - -public: - TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) - : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) - {} - - void run() override - { - TCPProtocolStackData stack_data; - stack_data.socket = socket(); - for (auto & factory : stack) - { - std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); - connection->run(); - if (stack_data.socket != socket()) - socket() = stack_data.socket; -// if (auto * accessor = dynamic_cast(connection.get()); accessor) - // socket() = accessor->socket(); - } - } -}; - - -class TCPProtocolStackFactory : public TCPServerConnectionFactory -{ -private: - IServer & server [[maybe_unused]]; - Poco::Logger * log; - std::string conf_name; - std::list stack; - - class DummyTCPHandler : public Poco::Net::TCPServerConnection - { - public: - using Poco::Net::TCPServerConnection::TCPServerConnection; - void run() override {} - }; - -public: - template - explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) - { - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override - { - try - { - LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStack(tcp_server, socket, stack, conf_name); - } - catch (const Poco::Net::NetException &) - { - LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); - return new DummyTCPHandler(socket); - } - } - - void append(TCPServerConnectionFactory::Ptr factory) - { - stack.push_back(factory); - } - - size_t size() { return stack.size(); } -}; - - - -class TLSHandler : public Poco::Net::TCPServerConnection //TCPConnectionAccessor -{ - using StreamSocket = Poco::Net::StreamSocket; - using SecureStreamSocket = Poco::Net::SecureStreamSocket; -public: - explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket) //TCPConnectionAccessor(socket) - , conf_name(conf_name_) - , stack_data(stack_data_) - {} - - void run() override - { - socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); - stack_data.socket = socket(); - } -private: - std::string conf_name; - TCPProtocolStackData & stack_data; -}; - - -class TLSHandlerFactory : public TCPServerConnectionFactory -{ -private: - IServer & server [[maybe_unused]]; - Poco::Logger * log; - std::string conf_name; - - class DummyTCPHandler : public Poco::Net::TCPServerConnection - { - public: - using Poco::Net::TCPServerConnection::TCPServerConnection; - void run() override {} - }; - -public: - explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) - { - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override - { - TCPProtocolStackData stack_data; - return createConnection(socket, tcp_server, stack_data); - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override - { - try - { - LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket, conf_name, stack_data); - } - catch (const Poco::Net::NetException &) - { - LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); - return new DummyTCPHandler(socket); - } - } -}; - - -class ProxyV1Handler : public Poco::Net::TCPServerConnection -{ - using StreamSocket = Poco::Net::StreamSocket; -public: - explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} - - void run() override - { - const auto & settings = server.context()->getSettingsRef(); - socket().setReceiveTimeout(settings.receive_timeout); - - std::string word; - bool eol; - - // Read PROXYv1 protocol header - // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt - - // read "PROXY" - if (!readWord(5, word, eol) || word != "PROXY" || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read "TCP4" or "TCP6" or "UNKNOWN" - if (!readWord(7, word, eol)) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - if (word == "UNKNOWN" && eol) - return; - - if (eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read address - if (!readWord(39, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - stack_data.forwarded_for = std::move(word); - - // read address - if (!readWord(39, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read port - if (!readWord(5, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read port and "\r\n" - if (!readWord(5, word, eol) || !eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - } - -protected: - bool readWord(int max_len, std::string & word, bool & eol) - { - word.clear(); - eol = false; - - char ch = 0; - int n = 0; - bool is_cr = false; - try - { - for (++max_len; max_len > 0 || is_cr; --max_len) - { - n = socket().receiveBytes(&ch, 1); - if (n == 0) - { - socket().shutdown(); - return false; - } - if (n < 0) - break; - - if (is_cr) - return ch == 0x0A; - - if (ch == 0x0D) - { - is_cr = true; - eol = true; - continue; - } - - if (ch == ' ') - return true; - - word.push_back(ch); - } - } - catch (const Poco::Net::NetException & e) - { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); - } - catch (const Poco::TimeoutException &) - { - throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", - socket().peerAddress().toString(), - socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); - } - catch (const Poco::IOException & e) - { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); - } - - if (n < 0) - throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); - - return false; - } - -private: - IServer & server; - std::string conf_name; - TCPProtocolStackData & stack_data; -}; - -class ProxyV1HandlerFactory : public TCPServerConnectionFactory -{ -private: - IServer & server; - Poco::Logger * log; - std::string conf_name; - - class DummyTCPHandler : public Poco::Net::TCPServerConnection - { - public: - using Poco::Net::TCPServerConnection::TCPServerConnection; - void run() override {} - }; - -public: - explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) - { - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override - { - TCPProtocolStackData stack_data; - return createConnection(socket, tcp_server, stack_data); - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override - { - try - { - LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new ProxyV1Handler(socket, server, conf_name, stack_data); - } - catch (const Poco::Net::NetException &) - { - LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); - return new DummyTCPHandler(socket); - } - } -}; - -} diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h new file mode 100644 index 00000000000..87d5dba350f --- /dev/null +++ b/src/Server/TCPProtocolStackFactory.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + + +class TCPProtocolStackFactory : public TCPServerConnectionFactory +{ +private: + IServer & server [[maybe_unused]]; + Poco::Logger * log; + std::string conf_name; + std::list stack; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + template + explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStackHandler(tcp_server, socket, stack, conf_name); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } + + void append(TCPServerConnectionFactory::Ptr factory) + { + stack.push_back(factory); + } + + size_t size() { return stack.size(); } +}; + + +} diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h new file mode 100644 index 00000000000..7b513298022 --- /dev/null +++ b/src/Server/TCPProtocolStackHandler.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include +#include "Server/TCPProtocolStackData.h" + + +namespace DB +{ + + +class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +private: + TCPServer & tcp_server; + std::list stack; + std::string conf_name; + +public: + TCPProtocolStackHandler(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) + {} + + void run() override + { + TCPProtocolStackData stack_data; + stack_data.socket = socket(); + for (auto & factory : stack) + { + std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); + connection->run(); + if (stack_data.socket != socket()) + socket() = stack_data.socket; + } + } +}; + + +} diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h new file mode 100644 index 00000000000..623a9999475 --- /dev/null +++ b/src/Server/TLSHandler.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include +#include "Server/TCPProtocolStackData.h" + + +namespace DB +{ + + +class TLSHandler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using SecureStreamSocket = Poco::Net::SecureStreamSocket; +public: + explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) + , conf_name(conf_name_) + , stack_data(stack_data_) + {} + + void run() override + { + socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + stack_data.socket = socket(); + } +private: + std::string conf_name; + TCPProtocolStackData & stack_data; +}; + + +} diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h new file mode 100644 index 00000000000..283e96252c3 --- /dev/null +++ b/src/Server/TLSHandlerFactory.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +class TLSHandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server [[maybe_unused]]; + Poco::Logger * log; + std::string conf_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TLSHandler(socket, conf_name, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + + +} From 7985f4ba16cdc5e396ab5336ce5702c264158dff Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 21:35:38 +0000 Subject: [PATCH 015/266] cleanup, respect USE_SSL --- programs/server/Server.cpp | 38 ++++++++------------------------------ src/Server/TLSHandler.h | 18 ++++++++++++++---- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index e5f62f2f885..9e434c19fc6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1882,8 +1882,15 @@ void Server::createServers( { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); + if (type == "tls") +#if USE_SSL return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); +#else + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + if (type == "proxy1") return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); if (type == "mysql") @@ -2055,39 +2062,11 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL - //Poco::Net::SecureServerSocket socket; - Poco::Net::ServerSocket socket; + Poco::Net::SecureServerSocket socket; auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); -/* - TCPProtocolStackFactory *stack = new TCPProtocolStackFactory(*this); - stack->append(new TLSHandlerFactory(*this)); - stack->append(new TCPHandlerFactory(*this, false, false)); - - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - stack, - server_pool, - socket, - new Poco::Net::TCPServerParams)); -*/ - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPProtocolStackFactory(*this, "", new TLSHandlerFactory(*this, ""), new TCPHandlerFactory(*this, false, false)), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - - -/* return ProtocolServerAdapter( listen_host, port_name, @@ -2097,7 +2076,6 @@ void Server::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); -*/ #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 623a9999475..4fea43523cd 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,19 +1,24 @@ #pragma once #include -#include -#include #include "Server/TCPProtocolStackData.h" +#if USE_SSL +# include +# include +#endif namespace DB { +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} class TLSHandler : public Poco::Net::TCPServerConnection { using StreamSocket = Poco::Net::StreamSocket; - using SecureStreamSocket = Poco::Net::SecureStreamSocket; public: explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) : Poco::Net::TCPServerConnection(socket) @@ -23,8 +28,13 @@ public: void run() override { - socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); +#if USE_SSL + socket() = Poco::Net::SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); stack_data.socket = socket(); +#else + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif } private: std::string conf_name; From ffa7d3b121fa1910afdf64c278c70f8cd69caca0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 21:51:25 +0000 Subject: [PATCH 016/266] cleanup --- programs/server/Server.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9e434c19fc6..b122fcbfed3 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -71,7 +70,6 @@ #include #include #include -#include #include #include "MetricsTransmitter.h" #include @@ -82,10 +80,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -93,8 +93,6 @@ #include #include -#include - #include "config_core.h" #include "Common/config_version.h" @@ -2066,13 +2064,12 @@ void Server::createServers( auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( listen_host, port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, true, false), + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), server_pool, socket, new Poco::Net::TCPServerParams)); From 7c855c9da246f64828dd3c658c85443911f279c3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 22:44:27 +0000 Subject: [PATCH 017/266] style fix --- programs/server/Server.cpp | 4 ++-- src/Server/ProxyV1Handler.cpp | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b122fcbfed3..368971b3a34 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1882,7 +1882,7 @@ void Server::createServers( return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); if (type == "tls") -#if USE_SSL +#if USE_SSL return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -1975,7 +1975,7 @@ void Server::createServers( }); } } - + for (const auto & listen_host : listen_hosts) { /// HTTP diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp index b3ed8b7bd60..838a1de1c04 100644 --- a/src/Server/ProxyV1Handler.cpp +++ b/src/Server/ProxyV1Handler.cpp @@ -15,12 +15,11 @@ namespace ErrorCodes extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; } - void ProxyV1Handler::run() { const auto & settings = server.context()->getSettingsRef(); socket().setReceiveTimeout(settings.receive_timeout); - + std::string word; bool eol; @@ -57,7 +56,7 @@ void ProxyV1Handler::run() // read port if (!readWord(5, word, eol) || eol) throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - + // read port and "\r\n" if (!readWord(5, word, eol) || !eol) throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); @@ -121,6 +120,4 @@ bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) return false; } - - } From 0d89bdbbb922fc21060e2c639a91ff1455197b54 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sun, 11 Sep 2022 19:25:33 -0400 Subject: [PATCH 018/266] maybe_unused --- src/Server/TLSHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 4fea43523cd..f8cb94eb004 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -38,7 +38,7 @@ public: } private: std::string conf_name; - TCPProtocolStackData & stack_data; + TCPProtocolStackData & stack_data [[maybe_unused]]; }; From d901c4dca5ceb942315e8281c283db269d9c4bba Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 12 Sep 2022 18:45:43 +0800 Subject: [PATCH 019/266] add option to return NULL when decrypt fail --- src/Core/Settings.h | 1 + src/Functions/FunctionsAES.cpp | 7 +++ src/Functions/FunctionsAES.h | 62 ++++++++++++++----- .../0_stateless/01318_decrypt.reference | 1 + tests/queries/0_stateless/01318_decrypt.sql | 15 +++++ 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1a2b9e42a25..9dd99dd3c79 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -627,6 +627,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \ M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ \ + M(Bool, aes_decryption_use_null_on_fail, false, "Decrypt a string with incorrect key will return NULL value", 0) \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ diff --git a/src/Functions/FunctionsAES.cpp b/src/Functions/FunctionsAES.cpp index 9ef07e2747d..38dc8eaac7a 100644 --- a/src/Functions/FunctionsAES.cpp +++ b/src/Functions/FunctionsAES.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_SSL @@ -9,6 +10,12 @@ #include + +bool getParamsFromContext(DB::ContextPtr context) +{ + return context->getSettingsRef().aes_decryption_use_null_on_fail; +} + namespace DB { namespace ErrorCodes diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 0d8e5a5546a..044f4bcfaf9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -1,6 +1,9 @@ #pragma once #include +#include +#include +#include #if USE_SSL #include @@ -21,6 +24,8 @@ #include +bool NO_INLINE getParamsFromContext(DB::ContextPtr context); + namespace DB { namespace ErrorCodes @@ -409,12 +414,14 @@ template class FunctionDecrypt : public IFunction { public: + explicit FunctionDecrypt(const ContextPtr & context) : decrypt_use_null(getParamsFromContext(context)) { } static constexpr OpenSSLDetails::CompatibilityMode compatibility_mode = Impl::compatibility_mode; static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } private: using CipherMode = OpenSSLDetails::CipherMode; + bool decrypt_use_null = false; String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -445,6 +452,9 @@ private: optional_args ); + if (decrypt_use_null) + return std::make_shared(std::make_shared()); + return std::make_shared(); } @@ -468,7 +478,7 @@ private: ColumnPtr result_column; if (arguments.size() <= 3) { - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr, decrypt_use_null); } else { @@ -478,7 +488,7 @@ private: if (arguments.size() <= 4) { - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr, decrypt_use_null); } else { @@ -486,7 +496,7 @@ private: throw Exception("AAD can be only set for GCM-mode", ErrorCodes::BAD_ARGUMENTS); const auto aad_column = arguments[4].column; - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); } } @@ -499,22 +509,23 @@ private: const ColumnPtr & input_column, const ColumnPtr & key_column, const ColumnPtr & iv_column, - const ColumnPtr & aad_column) + const ColumnPtr & aad_column, + bool decrypt_use_null = false) { if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::MySQL) { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); } else { const auto cipher_mode = EVP_CIPHER_mode(evp_cipher); if (cipher_mode == EVP_CIPH_GCM_MODE) { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); } else { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); } } @@ -527,7 +538,8 @@ private: const ColumnPtr & input_column, const ColumnPtr & key_column, [[maybe_unused]] const ColumnPtr & iv_column, - [[maybe_unused]] const ColumnPtr & aad_column) + [[maybe_unused]] const ColumnPtr & aad_column, + bool decrypt_use_null = false) { using namespace OpenSSLDetails; @@ -541,6 +553,7 @@ private: static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1 auto decrypted_result_column = ColumnString::create(); + auto null_map = ColumnUInt8::create(); auto & decrypted_result_column_data = decrypted_result_column->getChars(); auto & decrypted_result_column_offsets = decrypted_result_column->getOffsets(); @@ -616,6 +629,7 @@ private: } } + bool decrypt_fail = false; /// Avoid extra work on empty ciphertext/plaintext. Always decrypt empty to empty. /// This makes sense for default implementation for NULLs. if (input_value.size > 0) @@ -662,8 +676,13 @@ private: if (EVP_DecryptUpdate(evp_ctx, reinterpret_cast(decrypted), &output_len, reinterpret_cast(input_value.data), static_cast(input_value.size)) != 1) - onError("Failed to decrypt"); - decrypted += output_len; + { + if (!decrypt_use_null) + onError("Failed to decrypt"); + decrypt_fail = true; + } + else + decrypted += output_len; // 3: optionally get tag from the ciphertext (RFC5116) and feed it to the context if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM) @@ -676,14 +695,26 @@ private: // 4: retrieve encrypted data (ciphertext) if (EVP_DecryptFinal_ex(evp_ctx, reinterpret_cast(decrypted), &output_len) != 1) - onError("Failed to decrypt"); - decrypted += output_len; + { + if (!decrypt_use_null) + onError("Failed to decrypt"); + decrypt_fail = true; + } + else + decrypted += output_len; } *decrypted = '\0'; ++decrypted; decrypted_result_column_offsets.push_back(decrypted - decrypted_result_column_data.data()); + if (decrypt_use_null) + { + if (decrypt_fail) + null_map->insertValue(1); + else + null_map->insertValue(0); + } } @@ -694,7 +725,10 @@ private: } decrypted_result_column->validate(); - return decrypted_result_column; + if (decrypt_use_null) + return ColumnNullable::create(std::move(decrypted_result_column), std::move(null_map)); + else + return decrypted_result_column; } }; diff --git a/tests/queries/0_stateless/01318_decrypt.reference b/tests/queries/0_stateless/01318_decrypt.reference index cabb7bb9b83..bbcf4d6374a 100644 --- a/tests/queries/0_stateless/01318_decrypt.reference +++ b/tests/queries/0_stateless/01318_decrypt.reference @@ -87,3 +87,4 @@ aes-256-gcm 1 aes-256-gcm 1 aes-256-gcm 1 F56E87055BC32D0EEB31B2EACC2BF2A5 1 +2022-09-02 00:00:00 2 diff --git a/tests/queries/0_stateless/01318_decrypt.sql b/tests/queries/0_stateless/01318_decrypt.sql index 565fbd02e0a..fb0cc9055f7 100644 --- a/tests/queries/0_stateless/01318_decrypt.sql +++ b/tests/queries/0_stateless/01318_decrypt.sql @@ -129,4 +129,19 @@ SELECT hex(decrypt('aes-256-gcm', concat(ciphertext, tag), key, iv, aad)) as plaintext_actual, plaintext_actual = hex(plaintext); +-- decrypt with null when fail +CREATE TABLE decrypt_null ( + dt DateTime, + user_id UInt32, + encrypted String, + iv String +) ENGINE = Memory; + +INSERT INTO decrypt_null VALUES ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3'); + +SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); --{serverError 454} + +SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2') SETTINGS aes_decryption_use_null_on_fail = 1; + + DROP TABLE encryption_test; From 9d94179217206991f0530beb65e7132dfe4ed325 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 12 Sep 2022 21:39:07 +0800 Subject: [PATCH 020/266] try make code less ugly --- src/Core/Settings.h | 2 +- src/Functions/FunctionsAES.cpp | 9 +-- src/Functions/FunctionsAES.h | 72 ++++++++++--------- .../0_stateless/01318_decrypt.reference | 3 + tests/queries/0_stateless/01318_decrypt.sql | 4 +- 5 files changed, 50 insertions(+), 40 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b8f95ed4387..491671956ec 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -627,7 +627,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \ M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ \ - M(Bool, aes_decryption_use_null_on_fail, false, "Decrypt a string with incorrect key will return NULL value", 0) \ + M(Bool, aes_decryption_use_null_when_fail, false, "Decrypt a string with incorrect key will return NULL instead of throwing error", 0) \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ diff --git a/src/Functions/FunctionsAES.cpp b/src/Functions/FunctionsAES.cpp index 38dc8eaac7a..0e4a99a64e7 100644 --- a/src/Functions/FunctionsAES.cpp +++ b/src/Functions/FunctionsAES.cpp @@ -11,10 +11,6 @@ -bool getParamsFromContext(DB::ContextPtr context) -{ - return context->getSettingsRef().aes_decryption_use_null_on_fail; -} namespace DB { @@ -24,6 +20,11 @@ namespace ErrorCodes } } +bool AESHelpers::getParamsFromContext(DB::ContextPtr context) +{ + return context->getSettingsRef().aes_decryption_use_null_when_fail; +} + namespace OpenSSLDetails { void onError(std::string error_message) diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 044f4bcfaf9..f8cfdb9a2f3 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -23,9 +23,6 @@ #include - -bool NO_INLINE getParamsFromContext(DB::ContextPtr context); - namespace DB { namespace ErrorCodes @@ -34,6 +31,11 @@ namespace ErrorCodes } } +namespace AESHelpers +{ + bool NO_INLINE getParamsFromContext(DB::ContextPtr context); +} + namespace OpenSSLDetails { [[noreturn]] void onError(std::string error_message); @@ -414,14 +416,14 @@ template class FunctionDecrypt : public IFunction { public: - explicit FunctionDecrypt(const ContextPtr & context) : decrypt_use_null(getParamsFromContext(context)) { } + explicit FunctionDecrypt(const ContextPtr & context) : use_null_when_decrypt_fail(AESHelpers::getParamsFromContext(context)) { } static constexpr OpenSSLDetails::CompatibilityMode compatibility_mode = Impl::compatibility_mode; static constexpr auto name = Impl::name; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } private: using CipherMode = OpenSSLDetails::CipherMode; - bool decrypt_use_null = false; + bool use_null_when_decrypt_fail = false; String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -452,7 +454,7 @@ private: optional_args ); - if (decrypt_use_null) + if (use_null_when_decrypt_fail) return std::make_shared(std::make_shared()); return std::make_shared(); @@ -478,7 +480,7 @@ private: ColumnPtr result_column; if (arguments.size() <= 3) { - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr, decrypt_use_null); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr, use_null_when_decrypt_fail); } else { @@ -488,7 +490,7 @@ private: if (arguments.size() <= 4) { - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr, decrypt_use_null); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr, use_null_when_decrypt_fail); } else { @@ -496,7 +498,7 @@ private: throw Exception("AAD can be only set for GCM-mode", ErrorCodes::BAD_ARGUMENTS); const auto aad_column = arguments[4].column; - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); } } @@ -510,22 +512,22 @@ private: const ColumnPtr & key_column, const ColumnPtr & iv_column, const ColumnPtr & aad_column, - bool decrypt_use_null = false) + bool use_null_when_decrypt_fail = false) { if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::MySQL) { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); } else { const auto cipher_mode = EVP_CIPHER_mode(evp_cipher); if (cipher_mode == EVP_CIPH_GCM_MODE) { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); } else { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, decrypt_use_null); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); } } @@ -539,7 +541,7 @@ private: const ColumnPtr & key_column, [[maybe_unused]] const ColumnPtr & iv_column, [[maybe_unused]] const ColumnPtr & aad_column, - bool decrypt_use_null = false) + bool use_null_when_decrypt_fail = false) { using namespace OpenSSLDetails; @@ -677,38 +679,40 @@ private: reinterpret_cast(decrypted), &output_len, reinterpret_cast(input_value.data), static_cast(input_value.size)) != 1) { - if (!decrypt_use_null) + if (!use_null_when_decrypt_fail) onError("Failed to decrypt"); decrypt_fail = true; } else - decrypted += output_len; - - // 3: optionally get tag from the ciphertext (RFC5116) and feed it to the context - if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM) { - void * tag = const_cast(reinterpret_cast(input_value.data + input_value.size)); - if (EVP_CIPHER_CTX_ctrl(evp_ctx, EVP_CTRL_AEAD_SET_TAG, tag_size, tag) != 1) - onError("Failed to set tag"); - } - // 4: retrieve encrypted data (ciphertext) - if (EVP_DecryptFinal_ex(evp_ctx, - reinterpret_cast(decrypted), &output_len) != 1) - { - if (!decrypt_use_null) - onError("Failed to decrypt"); - decrypt_fail = true; - } - else decrypted += output_len; + // 3: optionally get tag from the ciphertext (RFC5116) and feed it to the context + if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM) + { + void * tag = const_cast(reinterpret_cast(input_value.data + input_value.size)); + if (EVP_CIPHER_CTX_ctrl(evp_ctx, EVP_CTRL_AEAD_SET_TAG, tag_size, tag) != 1) + onError("Failed to set tag"); + } + + // 4: retrieve encrypted data (ciphertext) + if (!decrypt_fail && EVP_DecryptFinal_ex(evp_ctx, + reinterpret_cast(decrypted), &output_len) != 1) + { + if (!use_null_when_decrypt_fail) + onError("Failed to decrypt"); + decrypt_fail = true; + } + else + decrypted += output_len; + } } *decrypted = '\0'; ++decrypted; decrypted_result_column_offsets.push_back(decrypted - decrypted_result_column_data.data()); - if (decrypt_use_null) + if (use_null_when_decrypt_fail) { if (decrypt_fail) null_map->insertValue(1); @@ -725,7 +729,7 @@ private: } decrypted_result_column->validate(); - if (decrypt_use_null) + if (use_null_when_decrypt_fail) return ColumnNullable::create(std::move(decrypted_result_column), std::move(null_map)); else return decrypted_result_column; diff --git a/tests/queries/0_stateless/01318_decrypt.reference b/tests/queries/0_stateless/01318_decrypt.reference index bbcf4d6374a..e8584acab40 100644 --- a/tests/queries/0_stateless/01318_decrypt.reference +++ b/tests/queries/0_stateless/01318_decrypt.reference @@ -88,3 +88,6 @@ aes-256-gcm 1 aes-256-gcm 1 F56E87055BC32D0EEB31B2EACC2BF2A5 1 2022-09-02 00:00:00 2 +2022-08-02 00:00:00 1 \N +2022-09-02 00:00:00 2 value2 +2022-09-02 00:00:01 3 \N diff --git a/tests/queries/0_stateless/01318_decrypt.sql b/tests/queries/0_stateless/01318_decrypt.sql index fb0cc9055f7..f478b043432 100644 --- a/tests/queries/0_stateless/01318_decrypt.sql +++ b/tests/queries/0_stateless/01318_decrypt.sql @@ -141,7 +141,9 @@ INSERT INTO decrypt_null VALUES ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm' SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); --{serverError 454} -SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2') SETTINGS aes_decryption_use_null_on_fail = 1; +SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2') SETTINGS aes_decryption_use_null_when_fail = 1; + +SELECT dt, user_id, (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv)) as value FROM decrypt_null ORDER BY user_id SETTINGS aes_decryption_use_null_when_fail = 1; DROP TABLE encryption_test; From 51d6611b96b1b7c24bf8252e4ff75d2611d7f08b Mon Sep 17 00:00:00 2001 From: Meena Renganathan Date: Mon, 12 Sep 2022 09:05:38 -0700 Subject: [PATCH 021/266] Committing the ClickHouse core changes and other libraries to support OpenSSL. BoringSSL is still set as default --- CMakeLists.txt | 14 ++++++++++++++ contrib/CMakeLists.txt | 6 +++++- contrib/krb5-cmake/CMakeLists.txt | 6 ++++++ contrib/libpq-cmake/CMakeLists.txt | 6 ++++++ programs/server/Server.cpp | 8 ++++++-- src/CMakeLists.txt | 8 +++++++- src/Compression/CompressionFactory.cpp | 4 ++++ src/Core/config_core.h.in | 1 + src/configure_config.cmake | 3 +++ 9 files changed, 52 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 64fb870b61b..52626c7badf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -495,6 +495,20 @@ endif () enable_testing() # Enable for tests without binary +option(ENABLE_CH_BUNDLE_BORINGSSL "Provide the user to allow building of OpenSSL library. By default, uses in-house ClickHouse BoringSSL" ON) + +message (STATUS "ENABLE_CH_BUNDLE_BORINGSSL: ${ENABLE_CH_BUNDLE_BORINGSSL}") +if (ENABLE_CH_BUNDLE_BORINGSSL) + message (STATUS "Uses in-house ClickHouse BoringSSL library") +else () + message (STATUS "Build and uses OpenSSL library instead of BoringSSL") +endif () + +if (NOT ENABLE_CH_BUNDLE_BORINGSSL) + set(ENABLE_SSL 1) + target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") +endif () + # when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc if (CMAKE_INSTALL_PREFIX STREQUAL "/usr") set (CLICKHOUSE_ETC_DIR "/etc") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 08b91c1b81c..a1baca69c1c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -74,7 +74,11 @@ add_contrib (re2-cmake re2) add_contrib (xz-cmake xz) add_contrib (brotli-cmake brotli) add_contrib (double-conversion-cmake double-conversion) -add_contrib (boringssl-cmake boringssl) +if (ENABLE_CH_BUNDLE_BORINGSSL) + add_contrib (boringssl-cmake boringssl) +else () + add_contrib (openssl-cmake openssl) +endif () add_contrib (poco-cmake poco) add_contrib (croaring-cmake croaring) add_contrib (zstd-cmake zstd) diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 214d23bc2a9..95f8e7e0d21 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -578,6 +578,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c") endif() +if (NOT ENABLE_CH_BUNDLE_BORINGSSL) + list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c") + list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c") +endif () + + target_sources(_krb5 PRIVATE ${ALL_SRCS} ) diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 280c0381393..26ece28bd18 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -59,6 +59,12 @@ set(SRCS add_library(_libpq ${SRCS}) +if (NOT ENABLE_CH_BUNDLE_BORINGSSL) + add_definitions(-DHAVE_BIO_METH_NEW) + add_definitions(-DHAVE_HMAC_CTX_NEW) + add_definitions(-DHAVE_HMAC_CTX_FREE) +endif () + target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR}) target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include") target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs") diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 93df877ab8e..ab55e527431 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -79,7 +79,9 @@ #include #include #include +#if USE_BORINGSSL #include +#endif #include #include #include @@ -1263,8 +1265,9 @@ int Server::main(const std::vector & /*args*/) global_context->updateStorageConfiguration(*config); global_context->updateInterserverCredentials(*config); - +#if USE_BORINGSSL CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); +#endif #if USE_SSL CertificateReloader::instance().tryLoad(*config); #endif @@ -1467,9 +1470,10 @@ int Server::main(const std::vector & /*args*/) global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks); global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks); } - +#if USE_BORINGSSL /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); +#endif SCOPE_EXIT({ /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3dc42746d67..c0246486110 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -247,7 +247,13 @@ add_object_library(clickhouse_access Access) add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) add_object_library(clickhouse_core_mysql Core/MySQL) -add_object_library(clickhouse_compression Compression) +if (ENABLE_CH_BUNDLE_BORINGSSL) + add_object_library(clickhouse_compression Compression) +else () + add_headers_and_sources(dbms Compression) + list(REMOVE_ITEM dbms_headers Compression/CompressionCodecEncrypted.h) + list(REMOVE_ITEM dbms_sources Compression/CompressionCodecEncrypted.cpp) +endif () add_object_library(clickhouse_querypipeline QueryPipeline) add_object_library(clickhouse_datatypes DataTypes) add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations) diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 7291d42f681..2d07dba6cdf 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -176,7 +176,9 @@ void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory); void registerCodecGorilla(CompressionCodecFactory & factory); +#if USE_BORINGSSL void registerCodecEncrypted(CompressionCodecFactory & factory); +#endif void registerCodecFPC(CompressionCodecFactory & factory); #endif @@ -193,7 +195,9 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecT64(*this); registerCodecDoubleDelta(*this); registerCodecGorilla(*this); +#if USE_BORINGSSL registerCodecEncrypted(*this); +#endif registerCodecFPC(*this); #ifdef ENABLE_QPL_COMPRESSION registerCodecDeflateQpl(*this); diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 46c77593d4e..963226d70e5 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -22,3 +22,4 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_REPLXX #cmakedefine01 USE_JEMALLOC +#cmakedefine01 USE_BORINGSSL diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 3f3ddf54716..a2ab5bc82e5 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -103,3 +103,6 @@ endif() if (TARGET ch_contrib::jemalloc) set(USE_JEMALLOC 1) endif() +if (ENABLE_CH_BUNDLE_BORINGSSL) + set(USE_BORINGSSL 1) +endif () From 4c8c804e253d0d48d9a826ed25f9117d0c016194 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Tue, 13 Sep 2022 15:47:52 +0800 Subject: [PATCH 022/266] fix style --- src/Functions/FunctionsAES.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Functions/FunctionsAES.cpp b/src/Functions/FunctionsAES.cpp index 0e4a99a64e7..e514884f2d1 100644 --- a/src/Functions/FunctionsAES.cpp +++ b/src/Functions/FunctionsAES.cpp @@ -9,9 +9,6 @@ #include #include - - - namespace DB { namespace ErrorCodes From d2d8c179b683f89c96608dec78eb8fef646db838 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 13 Sep 2022 13:16:18 +0200 Subject: [PATCH 023/266] Add unreserved_space column to system.disks --- docs/en/operations/system-tables/disks.md | 1 + docs/ru/operations/system-tables/disks.md | 1 + src/Storages/System/StorageSystemDisks.cpp | 4 ++++ .../0_stateless/02117_show_create_table_system.reference | 1 + 4 files changed, 7 insertions(+) diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index 1106562da53..f4c71eb1cd2 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -11,6 +11,7 @@ Columns: - `path` ([String](../../sql-reference/data-types/string.md)) โ€” Path to the mount point in the file system. - `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” Free space on disk in bytes. - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” Disk volume in bytes. +- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running). - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. **Example** diff --git a/docs/ru/operations/system-tables/disks.md b/docs/ru/operations/system-tables/disks.md index fc4c370cc1a..1d540b277d1 100644 --- a/docs/ru/operations/system-tables/disks.md +++ b/docs/ru/operations/system-tables/disks.md @@ -11,5 +11,6 @@ Cะพะดะตั€ะถะธั‚ ะธะฝั„ะพั€ะผะฐั†ะธัŽ ะพ ะดะธัะบะฐั…, ะทะฐะดะฐะฝะฝั‹ั… ะฒ [ะบะพ - `path` ([String](../../sql-reference/data-types/string.md)) โ€” ะฟัƒั‚ัŒ ะบ ั‚ะพั‡ะบะต ะผะพะฝั‚ะธั€ะพะฒะฐะฝะธั ะฒ ั„ะฐะนะปะพะฒะพะน ัะธัั‚ะตะผะต. - `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” ัะฒะพะฑะพะดะฝะพะต ะผะตัั‚ะพ ะฝะฐ ะดะธัะบะต ะฒ ะฑะฐะนั‚ะฐั…. - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” ะพะฑัŠั‘ะผ ะดะธัะบะฐ ะฒ ะฑะฐะนั‚ะฐั…. +- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” ะฝะต ะทะฐั€ะตะทะตั€ะฒะธั€ะพะฒะฐะฝะฝะพะต cะฒะพะฑะพะดะฝะพะต ะผะตัั‚ะพ ะฒ ะฑะฐะนั‚ะฐั… (`free_space` ะผะธะฝัƒั ั€ะฐะทะผะตั€ ะผะตัั‚ะฐ, ะทะฐั€ะตะทะตั€ะฒะธั€ะพะฒะฐะฝะฝะพะณะพ ะฝะฐ ะฒั‹ะฟะพะปะฝัะตะผั‹ะต ะฒ ะดะฐะฝะฝั‹ะน ะผะพะผะตะฝั‚ ั„ะพะฝะพะฒั‹ะต ัะปะธัะฝะธั, ะฒัั‚ะฐะฒะบะธ ะธ ะดั€ัƒะณะธะต ะพะฟะตั€ะฐั†ะธะธ ะทะฐะฟะธัะธ ะฝะฐ ะดะธัะบ). - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) โ€” ะผะตัั‚ะพ, ะบะพั‚ะพั€ะพะต ะดะพะปะถะฝะพ ะพัั‚ะฐั‚ัŒัั ัะฒะพะฑะพะดะฝั‹ะผ ะฝะฐ ะดะธัะบะต ะฒ ะฑะฐะนั‚ะฐั…. ะ—ะฐะดะฐั‘ั‚ัั ะทะฝะฐั‡ะตะฝะธะตะผ ะฟะฐั€ะฐะผะตั‚ั€ะฐ `keep_free_space_bytes` ะบะพะฝั„ะธะณัƒั€ะฐั†ะธะธ ะดะธัะบะพะฒ. diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index ef2c695d6b7..6b50b00dc30 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -21,6 +21,7 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) {"path", std::make_shared()}, {"free_space", std::make_shared()}, {"total_space", std::make_shared()}, + {"unreserved_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, {"type", std::make_shared()}, {"is_encrypted", std::make_shared()}, @@ -44,6 +45,7 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_path = ColumnString::create(); MutableColumnPtr col_free = ColumnUInt64::create(); MutableColumnPtr col_total = ColumnUInt64::create(); + MutableColumnPtr col_unreserved = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); MutableColumnPtr col_is_encrypted = ColumnUInt8::create(); @@ -55,6 +57,7 @@ Pipe StorageSystemDisks::read( col_path->insert(disk_ptr->getPath()); col_free->insert(disk_ptr->getAvailableSpace()); col_total->insert(disk_ptr->getTotalSpace()); + col_unreserved->insert(disk_ptr->getUnreservedSpace()); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); col_type->insert(toString(data_source_description.type)); @@ -72,6 +75,7 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_path)); res_columns.emplace_back(std::move(col_free)); res_columns.emplace_back(std::move(col_total)); + res_columns.emplace_back(std::move(col_unreserved)); res_columns.emplace_back(std::move(col_keep)); res_columns.emplace_back(std::move(col_type)); res_columns.emplace_back(std::move(col_is_encrypted)); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 9e2f676bb55..a7b4725ac57 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -184,6 +184,7 @@ CREATE TABLE system.disks `path` String, `free_space` UInt64, `total_space` UInt64, + `unreserved_space` UInt64, `keep_free_space` UInt64, `type` String, `is_encrypted` UInt8, From d550604e281206a14cd39eec9c975d653e37f651 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 13 Sep 2022 23:12:53 +0000 Subject: [PATCH 024/266] respect listen_host config param, fix updateServers and getListenTry --- programs/server/Server.cpp | 131 ++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 53 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0c2af7a0158..6336c1f795f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -390,7 +390,16 @@ bool getListenTry(const Poco::Util::AbstractConfiguration & config) { bool listen_try = config.getBool("listen_try", false); if (!listen_try) - listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty(); + { + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + listen_try = + DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() && + std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol) + { + return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); + }); + } return listen_try; } @@ -1878,7 +1887,7 @@ void Server::createServers( Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); - auto createFactory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr + auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); @@ -1915,66 +1924,74 @@ void Server::createServers( for (const auto & protocol : protocols) { - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - std::unordered_set pset {prefix}; - - if (config.has(prefix + "host") && config.has(prefix + "port")) + std::vector hosts; + if (config.has("protocols." + protocol + ".host")) + hosts.push_back(config.getString("protocols." + protocol + ".host")); + else + hosts = listen_hosts; + + for (const auto & host : hosts) { - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; - std::string listen_host = config.getString(prefix + "host"); - bool is_secure = false; - auto stack = std::make_unique(*this, conf_name); + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset {prefix}; - while (true) + if (config.has(prefix + "port")) { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + std::string port_name = prefix + "port"; + bool is_secure = false; + auto stack = std::make_unique(*this, conf_name); + + while (true) { - std::string type = config.getString(prefix + "type"); - if (type == "tls") + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); } - stack->append(createFactory(type, conf_name)); + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); } - if (!config.has(prefix + "impl")) - break; + if (!stack || stack->size() == 0) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; + createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); } - - if (!stack || stack->size() == 0) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); - - createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, is_secure); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - - return ProtocolServerAdapter( - listen_host, - port_name.c_str(), - description + ": " + address.toString(), - std::make_unique( - stack.release(), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); } } @@ -2223,9 +2240,17 @@ void Server::updateServers( { if (!server.isStopping()) { - bool has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); - bool has_port = !config.getString(server.getPortName(), "").empty(); - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber()) + std::string port_name = server.getPortName(); + bool has_host = false; + if (port_name.starts_with("protocols.")) + { + std::string protocol = port_name.substr(0, port_name.find_last_of('.')); + has_host = config.has(protocol + ".host"); + } + if (!has_host) + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + bool has_port = !config.getString(port_name, "").empty(); + if (!has_host || !has_port || config.getInt(port_name) != server.portNumber()) { server.stop(); LOG_INFO(log, "Stopped listening for {}", server.getDescription()); From 8f079922926df010b837c57504ece066b2b161ad Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 14 Sep 2022 06:24:44 +0000 Subject: [PATCH 025/266] allow to specify key and certificate for TLS layer --- src/Server/TCPProtocolStackFactory.h | 3 +++ src/Server/TLSHandler.h | 23 ++++++++++++++++++----- src/Server/TLSHandlerFactory.h | 7 ++++++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 87d5dba350f..4acbd3e5059 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include namespace DB diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index f8cb94eb004..e753910e1c0 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,7 +1,10 @@ #pragma once +#include #include -#include "Server/TCPProtocolStackData.h" +#include +#include +#include #if USE_SSL # include @@ -18,18 +21,27 @@ namespace ErrorCodes class TLSHandler : public Poco::Net::TCPServerConnection { +#if USE_SSL + using SecureStreamSocket = Poco::Net::SecureStreamSocket; + using SSLManager = Poco::Net::SSLManager; + using Context = Poco::Net::Context; +#endif using StreamSocket = Poco::Net::StreamSocket; public: - explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + explicit TLSHandler(const StreamSocket & socket, const std::string & key_, const std::string & certificate_, TCPProtocolStackData & stack_data_) : Poco::Net::TCPServerConnection(socket) - , conf_name(conf_name_) + , key(key_) + , certificate(certificate_) , stack_data(stack_data_) {} void run() override { #if USE_SSL - socket() = Poco::Net::SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + auto ctx = SSLManager::instance().defaultServerContext(); + if (!key.empty() && !certificate.empty()) + ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); + socket() = SecureStreamSocket::attach(socket(), ctx); stack_data.socket = socket(); #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -37,7 +49,8 @@ public: #endif } private: - std::string conf_name; + std::string key [[maybe_unused]]; + std::string certificate [[maybe_unused]]; TCPProtocolStackData & stack_data [[maybe_unused]]; }; diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 283e96252c3..8063ffa783d 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,11 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket, conf_name, stack_data); + return new TLSHandler( + socket, + server.config().getString(conf_name + ".privateKeyFile", ""), + server.config().getString(conf_name + ".certificateFile", ""), + stack_data); } catch (const Poco::Net::NetException &) { From a136465eb48443508e953437d18da0c188eb0b2d Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 14 Sep 2022 11:08:37 +0200 Subject: [PATCH 026/266] Fix test integration/test_disk_types/test.py --- tests/integration/test_disk_types/test.py | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 5f7b430d7ef..715a5d08ae3 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV disk_types = { "default": "local", @@ -28,18 +29,21 @@ def cluster(): def test_different_types(cluster): node = cluster.instances["node"] - response = node.query("SELECT * FROM system.disks") - disks = response.split("\n") - for disk in disks: - if disk == "": # skip empty line (after split at last position) - continue - fields = disk.split("\t") + response = TSV.toMat( node.query("SELECT * FROM system.disks FORMAT TSVWithNames") ) + + assert len(response) > len(disk_types) # at least one extra line for header + + name_col_ix = response[0].index("name") + type_col_ix = response[0].index("type") + encrypted_col_ix = response[0].index("is_encrypted") + + for fields in response[1:]: # skip header assert len(fields) >= 7 - assert disk_types.get(fields[0], "UNKNOWN") == fields[5] - if "encrypted" in fields[0]: - assert fields[6] == "1" + assert disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix], f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" + if "encrypted" in fields[name_col_ix]: + assert fields[encrypted_col_ix] == "1", f"{fields[name_col_ix]} expected to be encrypted!" else: - assert fields[6] == "0" + assert fields[encrypted_col_ix] == "0", f"{fields[name_col_ix]} expected to be non-encrypted!" def test_select_by_type(cluster): From b939379da676611262b400ebba778b75c84fef2b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 14 Sep 2022 16:29:26 +0000 Subject: [PATCH 027/266] bug fix, merge fix, style fix --- programs/server/Server.cpp | 10 +++++----- src/Server/TLSHandler.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index fda0ccf491a..624c312468e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1913,15 +1913,15 @@ void Server::createServers( return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); if (type == "http") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "HTTPHandler-factory")) + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory")) ); if (type == "prometheus") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory")) + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory")) ); if (type == "interserver") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory")) ); throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); @@ -1934,12 +1934,12 @@ void Server::createServers( hosts.push_back(config.getString("protocols." + protocol + ".host")); else hosts = listen_hosts; - + for (const auto & host : hosts) { std::string conf_name = "protocols." + protocol; std::string prefix = conf_name + "."; - std::unordered_set pset {prefix}; + std::unordered_set pset {conf_name}; if (config.has(prefix + "port")) { diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index e753910e1c0..fa2772cfd41 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -21,7 +21,7 @@ namespace ErrorCodes class TLSHandler : public Poco::Net::TCPServerConnection { -#if USE_SSL +#if USE_SSL using SecureStreamSocket = Poco::Net::SecureStreamSocket; using SSLManager = Poco::Net::SSLManager; using Context = Poco::Net::Context; From 910d49302cc8669899534bbf5d77f84a5f8a42ba Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 14 Sep 2022 19:05:37 +0000 Subject: [PATCH 028/266] USE_SSL fix --- src/Server/TLSHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index fa2772cfd41..32f0ca59776 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,12 +1,12 @@ #pragma once -#include #include #include #include #include #if USE_SSL +# include # include # include #endif From c66f41230090835c7bbdbcff536265c9c92148b2 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Sep 2022 07:11:52 +0000 Subject: [PATCH 029/266] pass session certificate for showCertificate() --- src/Functions/FunctionShowCertificate.h | 20 +++++++++++++++++--- src/Interpreters/ClientInfo.h | 1 + src/Interpreters/Session.cpp | 3 ++- src/Interpreters/Session.h | 2 +- src/Server/TCPHandler.cpp | 3 ++- src/Server/TCPHandler.h | 1 + src/Server/TCPProtocolStackData.h | 1 + src/Server/TLSHandler.h | 1 + 8 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionShowCertificate.h b/src/Functions/FunctionShowCertificate.h index 0724158f66b..832f80f8b1b 100644 --- a/src/Functions/FunctionShowCertificate.h +++ b/src/Functions/FunctionShowCertificate.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include @@ -33,14 +35,18 @@ class FunctionShowCertificate : public IFunction public: static constexpr auto name = "showCertificate"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr ctx) { #if !defined(USE_SSL) || USE_SSL == 0 throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support is disabled"); #endif - return std::make_shared(); + return std::make_shared(ctx->getQueryContext()->getClientInfo().certificate); } + std::string certificate; + + explicit FunctionShowCertificate(const std::string & certificate_ = "") : certificate(certificate_) {} + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -61,7 +67,15 @@ public: if (input_rows_count) { #if USE_SSL - if (const X509 * cert = SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext())) + std::unique_ptr x509_cert; + if (!certificate.empty()) + x509_cert = std::make_unique(certificate); + + const X509 * cert = x509_cert ? + x509_cert->certificate() : + SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext()); + + if (cert) { BIO * b = BIO_new(BIO_s_mem()); SCOPE_EXIT( diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index a1096b99325..f7a172b226d 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -69,6 +69,7 @@ public: Interface interface = Interface::TCP; bool is_secure = false; + String certificate; /// For tcp String os_user; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 52588a5f4cc..7639dec813d 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -244,7 +244,7 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } -Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure) +Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure, const std::string & certificate) : auth_id(UUIDHelpers::generateV4()), global_context(global_context_), log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) @@ -252,6 +252,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter prepared_client_info.emplace(); prepared_client_info->interface = interface_; prepared_client_info->is_secure = is_secure; + prepared_client_info->certificate = certificate; } Session::~Session() diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index ed4f7809dee..0f17c378915 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -32,7 +32,7 @@ public: /// Stops using named sessions. The method must be called at the server shutdown. static void shutdownNamedSessions(); - Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false); + Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false, const std::string & certificate = ""); ~Session(); Session(const Session &&) = delete; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index eaa4e083a1f..fe30655c19a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -115,6 +115,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N , tcp_server(tcp_server_) , log(&Poco::Logger::get("TCPHandler")) , forwarded_for(stack_data.forwarded_for) + , certificate(stack_data.certificate) , server_display_name(std::move(server_display_name_)) { } @@ -1065,7 +1066,7 @@ std::unique_ptr TCPHandler::makeSession() { auto interface = is_interserver_mode ? ClientInfo::Interface::TCP_INTERSERVER : ClientInfo::Interface::TCP; - auto res = std::make_unique(server.context(), interface, socket().secure()); + auto res = std::make_unique(server.context(), interface, socket().secure(), certificate); auto & client_info = res->getClientInfo(); client_info.forwarded_for = forwarded_for; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 13c3c5f70c1..c36ce1e9378 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -153,6 +153,7 @@ private: Poco::Logger * log; String forwarded_for; + String certificate; String client_name; UInt64 client_version_major = 0; diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h index bc90de8c678..ea5641ec775 100644 --- a/src/Server/TCPProtocolStackData.h +++ b/src/Server/TCPProtocolStackData.h @@ -10,6 +10,7 @@ struct TCPProtocolStackData { Poco::Net::StreamSocket socket; std::string forwarded_for; + std::string certificate; }; } diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 32f0ca59776..5b7377515c1 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -43,6 +43,7 @@ public: ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); socket() = SecureStreamSocket::attach(socket(), ctx); stack_data.socket = socket(); + stack_data.certificate = certificate; #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; From fc78af3f6974f3f3db843de31bf1303c75c0fbe5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 19 Sep 2022 02:01:09 +0000 Subject: [PATCH 030/266] add 'default_database' endpoint config parameter --- src/Server/TCPHandler.cpp | 6 +++++- src/Server/TCPProtocolStackData.h | 1 + src/Server/TCPProtocolStackFactory.h | 2 +- src/Server/TCPProtocolStackHandler.h | 11 ++++++++--- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fe30655c19a..9ff572c6bb5 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -116,6 +116,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N , log(&Poco::Logger::get("TCPHandler")) , forwarded_for(stack_data.forwarded_for) , certificate(stack_data.certificate) + , default_database(stack_data.default_database) , server_display_name(std::move(server_display_name_)) { } @@ -1093,6 +1094,7 @@ void TCPHandler::receiveHello() UInt64 packet_type = 0; String user; String password; + String default_db; readVarUInt(packet_type, *in); if (packet_type != Protocol::Client::Hello) @@ -1114,7 +1116,9 @@ void TCPHandler::receiveHello() readVarUInt(client_version_minor, *in); // NOTE For backward compatibility of the protocol, client cannot send its version_patch. readVarUInt(client_tcp_protocol_version, *in); - readStringBinary(default_database, *in); + readStringBinary(default_db, *in); + if (!default_db.empty()) + default_database = default_db; readStringBinary(user, *in); readStringBinary(password, *in); diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h index ea5641ec775..f2d00d8a845 100644 --- a/src/Server/TCPProtocolStackData.h +++ b/src/Server/TCPProtocolStackData.h @@ -11,6 +11,7 @@ struct TCPProtocolStackData Poco::Net::StreamSocket socket; std::string forwarded_for; std::string certificate; + std::string default_database; }; } diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 4acbd3e5059..50c6555fe9f 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -39,7 +39,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStackHandler(tcp_server, socket, stack, conf_name); + return new TCPProtocolStackHandler(server, tcp_server, socket, stack, conf_name); } catch (const Poco::Net::NetException &) { diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h index 7b513298022..9ca388da17b 100644 --- a/src/Server/TCPProtocolStackHandler.h +++ b/src/Server/TCPProtocolStackHandler.h @@ -3,7 +3,9 @@ #include #include #include -#include "Server/TCPProtocolStackData.h" +#include +#include +#include namespace DB @@ -15,19 +17,22 @@ class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection using StreamSocket = Poco::Net::StreamSocket; using TCPServerConnection = Poco::Net::TCPServerConnection; private: + IServer & server; TCPServer & tcp_server; std::list stack; std::string conf_name; public: - TCPProtocolStackHandler(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) - : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) + TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), server(server_), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) {} void run() override { + const auto & conf = server.config(); TCPProtocolStackData stack_data; stack_data.socket = socket(); + stack_data.default_database = conf.getString(conf_name + ".default_database", ""); for (auto & factory : stack) { std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); From 35fbac03f248cbc86b4bb9d7d392502df4856c06 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 19 Sep 2022 11:08:03 +0800 Subject: [PATCH 031/266] fix FunctionDecrypt constructor --- src/Functions/FunctionsAES.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index f8cfdb9a2f3..a52c228839c 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -416,10 +416,10 @@ template class FunctionDecrypt : public IFunction { public: - explicit FunctionDecrypt(const ContextPtr & context) : use_null_when_decrypt_fail(AESHelpers::getParamsFromContext(context)) { } + explicit FunctionDecrypt(bool use_null_when_decrypt_fail_) : use_null_when_decrypt_fail(use_null_when_decrypt_fail_) { } static constexpr OpenSSLDetails::CompatibilityMode compatibility_mode = Impl::compatibility_mode; static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + static FunctionPtr create(ContextPtr context) { return std::make_shared(AESHelpers::getParamsFromContext(context)); } private: using CipherMode = OpenSSLDetails::CipherMode; From fb3ebbb2d20d5077c81cb5b2b71d8b8e63b68c29 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 20 Sep 2022 12:12:01 +0200 Subject: [PATCH 032/266] Apply style fix for tests/integration/test_disk_types/test.py --- tests/integration/test_disk_types/test.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 715a5d08ae3..099821bf494 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -29,21 +29,27 @@ def cluster(): def test_different_types(cluster): node = cluster.instances["node"] - response = TSV.toMat( node.query("SELECT * FROM system.disks FORMAT TSVWithNames") ) + response = TSV.toMat(node.query("SELECT * FROM system.disks FORMAT TSVWithNames")) - assert len(response) > len(disk_types) # at least one extra line for header + assert len(response) > len(disk_types) # at least one extra line for header - name_col_ix = response[0].index("name") - type_col_ix = response[0].index("type") + name_col_ix = response[0].index("name") + type_col_ix = response[0].index("type") encrypted_col_ix = response[0].index("is_encrypted") for fields in response[1:]: # skip header assert len(fields) >= 7 - assert disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix], f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" + assert ( + disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix] + ), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" if "encrypted" in fields[name_col_ix]: - assert fields[encrypted_col_ix] == "1", f"{fields[name_col_ix]} expected to be encrypted!" + assert ( + fields[encrypted_col_ix] == "1" + ), f"{fields[name_col_ix]} expected to be encrypted!" else: - assert fields[encrypted_col_ix] == "0", f"{fields[name_col_ix]} expected to be non-encrypted!" + assert ( + fields[encrypted_col_ix] == "0" + ), f"{fields[name_col_ix]} expected to be non-encrypted!" def test_select_by_type(cluster): From 4ccfbedea45a1b2b47310e8c83c250643fc34e15 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 20 Sep 2022 17:10:18 +0000 Subject: [PATCH 033/266] add allowed networks for endpoint --- src/Server/TCPProtocolStackFactory.h | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 50c6555fe9f..c0ec29411d4 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -6,11 +6,18 @@ #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; + extern const int IP_ADDRESS_NOT_ALLOWED; +} + class TCPProtocolStackFactory : public TCPServerConnectionFactory { @@ -19,6 +26,7 @@ private: Poco::Logger * log; std::string conf_name; std::list stack; + AllowedClientHosts allowed_client_hosts; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -32,10 +40,33 @@ public: explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) { + const auto & config = server.config(); + /// Fill list of allowed hosts. + const auto networks_config = conf_name + ".networks"; + if (config.has(networks_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(networks_config, keys); + for (const String & key : keys) + { + String value = config.getString(networks_config + "." + key); + if (key.starts_with("ip")) + allowed_client_hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + allowed_client_hosts.addNameRegexp(value); + else if (key.starts_with("host")) + allowed_client_hosts.addName(value); + else + throw Exception("Unknown address pattern type: " + key, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); + } + } } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override { + if (!allowed_client_hosts.empty() && !allowed_client_hosts.contains(socket.peerAddress().host())) + throw Exception("Connections from " + socket.peerAddress().toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); From d3d06251a344e704807d23f2384233593d2cdab4 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Sep 2022 16:48:54 +0000 Subject: [PATCH 034/266] Add setting to obtain object name as column value in JSONObjectEachRow format --- src/Core/Settings.h | 2 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 5 +++ .../Impl/JSONEachRowRowInputFormat.cpp | 2 +- .../Formats/Impl/JSONEachRowRowInputFormat.h | 12 ++--- .../Impl/JSONObjectEachRowRowInputFormat.cpp | 45 ++++++++++++++++--- .../Impl/JSONObjectEachRowRowInputFormat.h | 6 ++- .../Impl/JSONObjectEachRowRowOutputFormat.cpp | 35 +++++++++++++-- .../Impl/JSONObjectEachRowRowOutputFormat.h | 6 ++- ..._each_row_column_for_object_name.reference | 20 +++++++++ ...object_each_row_column_for_object_name.sql | 11 +++++ 11 files changed, 128 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.reference create mode 100644 tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 314dccbc818..0d49f64f19f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -772,6 +772,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \ M(Bool, output_format_json_validate_utf8, false, "Validate UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8", 0) \ \ + M(String, format_json_object_each_row_column_for_object_name, "", "The name of column that will be used as object names in JSONObjectEachRow format. Column type should be String", 0) \ + \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 5a327a2f31b..15799f4d6af 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -100,6 +100,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; + format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b6efb0bd391..8346da89f07 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -155,6 +155,11 @@ struct FormatSettings bool validate_utf8 = false; } json; + struct + { + String column_for_object_name; + } json_object_each_row; + struct { UInt64 row_group_size = 1000000; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 0c150750e09..db5a027844b 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -214,7 +214,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi seen_columns.assign(num_columns, false); nested_prefix_length = 0; - readRowStart(); + readRowStart(columns); readJSONObject(columns); const auto & header = getPort().getHeader(); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index 59447180f77..4e2946cfea6 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -48,7 +48,7 @@ private: void readJSONObject(MutableColumns & columns); void readNestedData(const String & name, MutableColumns & columns); - virtual void readRowStart() {} + virtual void readRowStart(MutableColumns &) {} virtual bool checkEndOfData(bool is_first_row); const FormatSettings format_settings; @@ -66,10 +66,6 @@ private: /// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.' size_t nested_prefix_length = 0; - /// Set of columns for which the values were read. The rest will be filled with default values. - std::vector read_columns; - /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. - std::vector seen_columns; /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true /// for row like {..., "non-nullable column name" : null, ...} @@ -85,6 +81,12 @@ private: bool yield_strings; protected: + + /// Set of columns for which the values were read. The rest will be filled with default values. + std::vector read_columns; + /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. + std::vector seen_columns; + /// This flag is needed to know if data is in square brackets. bool data_in_square_brackets = false; }; diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index 5ca1ba33c27..ff52d16636f 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -2,12 +2,39 @@ #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::optional getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & format_settings) +{ + if (format_settings.json_object_each_row.column_for_object_name.empty()) + return std::nullopt; + + if (!header.has(format_settings.json_object_each_row.column_for_object_name)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Column name '{}' from setting format_json_object_each_row_column_for_object_name doesn't exists in header", + format_settings.json_object_each_row.column_for_object_name); + + size_t index = header.getPositionByName(format_settings.json_object_each_row.column_for_object_name); + if (!isStringOrFixedString(header.getDataTypes()[index])) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Column '{}' from setting json_object_each_row_column_for_object_name must have String type", + format_settings.json_object_each_row.column_for_object_name); + + return index; +} + JSONObjectEachRowInputFormat::JSONObjectEachRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_) - : JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false) + : JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, format_settings_)) { } @@ -16,9 +43,15 @@ void JSONObjectEachRowInputFormat::readPrefix() JSONUtils::skipObjectStart(*in); } -void JSONObjectEachRowInputFormat::readRowStart() +void JSONObjectEachRowInputFormat::readRowStart(MutableColumns & columns) { - JSONUtils::readFieldName(*in); + auto object_name = JSONUtils::readFieldName(*in); + if (field_index_for_object_name) + { + columns[*field_index_for_object_name]->insertData(object_name.data(), object_name.size()); + seen_columns[*field_index_for_object_name] = true; + read_columns[*field_index_for_object_name] = true; + } } bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row) @@ -30,7 +63,6 @@ bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row) return false; } - JSONObjectEachRowSchemaReader::JSONObjectEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowWithNamesSchemaReader(in_, format_settings_) { @@ -53,7 +85,10 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes( JSONUtils::skipComma(in); JSONUtils::readFieldName(in); - return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false); + auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false); + if (!format_settings.json_object_each_row.column_for_object_name.empty()) + names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared()); + return names_and_types; } void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h index fd98f43649f..466c0111a03 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h @@ -27,8 +27,10 @@ public: private: void readPrefix() override; void readSuffix() override {} - void readRowStart() override; + void readRowStart(MutableColumns & columns) override; bool checkEndOfData(bool is_first_row) override; + + std::optional field_index_for_object_name; }; @@ -44,4 +46,6 @@ private: bool first_row = true; }; +std::optional getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings); + } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index 10c1e9beda5..fa82649d4ee 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -6,10 +7,38 @@ namespace DB { JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) - : JSONEachRowRowOutputFormat(out_, header_, params_, settings_) + : JSONEachRowRowOutputFormat(out_, header_, params_, settings_), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, settings_)) { } +void JSONObjectEachRowRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row) +{ + if (field_number == field_index_for_object_name) + { + ++field_number; + return; + } + JSONEachRowRowOutputFormat::writeField(column, serialization, row); +} + +void JSONObjectEachRowRowOutputFormat::write(const Columns & columns, size_t row) +{ + if (field_index_for_object_name) + object_name = columns[*field_index_for_object_name]->getDataAt(row).toString(); + else + object_name = "row_" + std::to_string(row + 1); + + IRowOutputFormat::write(columns, row); +} + +void JSONObjectEachRowRowOutputFormat::writeFieldDelimiter() +{ + /// We should not write comma before column that is used for + /// object name and also after it if it's in the first place + if (field_number != field_index_for_object_name && !(field_index_for_object_name == 0 && field_number == 1)) + JSONEachRowRowOutputFormat::writeFieldDelimiter(); +} + void JSONObjectEachRowRowOutputFormat::writePrefix() { JSONUtils::writeObjectStart(*ostr); @@ -17,9 +46,7 @@ void JSONObjectEachRowRowOutputFormat::writePrefix() void JSONObjectEachRowRowOutputFormat::writeRowStartDelimiter() { - ++row_num; - String title = "row_" + std::to_string(row_num); - JSONUtils::writeCompactObjectStart(*ostr, 1, title.c_str()); + JSONUtils::writeCompactObjectStart(*ostr, 1, object_name.c_str()); } void JSONObjectEachRowRowOutputFormat::writeRowEndDelimiter() diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h index 51db22fb606..19d9fe1aa53 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h @@ -29,6 +29,9 @@ public: String getName() const override { return "JSONObjectEachRowRowOutputFormat"; } private: + void write(const Columns & columns, size_t row) override; + void writeField(const IColumn & column, const ISerialization & serialization, size_t row) override; + void writeFieldDelimiter() override; void writeRowStartDelimiter() override; void writeRowEndDelimiter() override; void writeRowBetweenDelimiter() override; @@ -36,7 +39,8 @@ private: void writePrefix() override; void writeSuffix() override; - size_t row_num = 0; + std::optional field_index_for_object_name; + String object_name; }; } diff --git a/tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.reference b/tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.reference new file mode 100644 index 00000000000..8925084f2ed --- /dev/null +++ b/tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.reference @@ -0,0 +1,20 @@ +{ + "name_0": {"number":"0"}, + "name_1": {"number":"1"}, + "name_2": {"number":"2"} +} +{ + "name_0": {"number":"0","x":"1"}, + "name_1": {"number":"1","x":"2"}, + "name_2": {"number":"2","x":"3"} +} +{ + "name_0": {"number":"0"}, + "name_1": {"number":"1"}, + "name_2": {"number":"2"} +} +name String +number Nullable(Int64) +name_0 0 +name_1 1 +name_2 2 diff --git a/tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.sql b/tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.sql new file mode 100644 index 00000000000..df0f75f68f2 --- /dev/null +++ b/tests/queries/0_stateless/02454_json_object_each_row_column_for_object_name.sql @@ -0,0 +1,11 @@ +-- Tags: no-fasttest, no-parallel +set format_json_object_each_row_column_for_object_name='name'; + +select number, concat('name_', toString(number)) as name from numbers(3) format JSONObjectEachRow; +select number, concat('name_', toString(number)) as name, number + 1 as x from numbers(3) format JSONObjectEachRow; +select concat('name_', toString(number)) as name, number from numbers(3) format JSONObjectEachRow; + +insert into function file(02454_data.jsonobjecteachrow) select number, concat('name_', toString(number)) as name from numbers(3) settings engine_file_truncate_on_insert=1; +desc file(02454_data.jsonobjecteachrow); +select * from file(02454_data.jsonobjecteachrow); + From 4f32ef9bb715625cee13a562c36c8a9771f2c90f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Sep 2022 17:04:42 +0000 Subject: [PATCH 035/266] Add docs --- docs/en/interfaces/formats.md | 43 +++++++++++++++++++ docs/en/operations/settings/settings.md | 7 +++ .../Impl/JSONObjectEachRowRowInputFormat.cpp | 6 ++- .../Impl/JSONObjectEachRowRowOutputFormat.cpp | 1 + 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 9b8354f23a2..c6064cbcf01 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1020,6 +1020,49 @@ Example: } ``` +To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). +Examples: + +For output: +```sql +insert into function file('data.json', JSONObjectEachRow) select 'obj' as object_name, number from numbers(3) settings format_json_object_each_row_column_for_object_name='object_name' +``` + +File "data.json" will contain: +```json +{ + "obj": {"number":"0"}, + "obj": {"number":"1"}, + "obj": {"number":"2"} +} +``` + +For input: +```sql +select * from file('data.json', JSONObjectEachRow, 'obj String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name' +``` + +``` +โ”Œโ”€object_nameโ”€โ”ฌโ”€numberโ”€โ” +โ”‚ obj โ”‚ 0 โ”‚ +โ”‚ obj โ”‚ 1 โ”‚ +โ”‚ obj โ”‚ 2 โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +It also works in schema inference: + +```sql +desc file('data.json', JSONObjectEachRow) settings format_json_object_each_row_column_for_object_name='object_name' +``` + +``` +โ”Œโ”€nameโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€typeโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ object_name โ”‚ String โ”‚ +โ”‚ number โ”‚ Nullable(Int64) โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + ### Inserting Data {#json-inserting-data} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 31609fe24be..27b28d4e19f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3906,6 +3906,13 @@ Controls validation of UTF-8 sequences in JSON output formats, doesn't impact fo Disabled by default. +### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name} + +The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md#jsonobjecteachrow) format. +Column type should be String. If value is empty, default names `row_{i}`will be used for object names. + +Default value: ''. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index ff52d16636f..3c4bc2fed3e 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -118,8 +118,10 @@ void registerJSONObjectEachRowSchemaReader(FormatFactory & factory) }); factory.registerAdditionalInfoForSchemaCacheGetter("JSONObjectEachRow", [](const FormatSettings & settings) { - return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON); - }); + return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON) + + fmt::format( + ", format_json_object_each_row_column_for_object_name={}", settings.json_object_each_row.column_for_object_name); + }); } } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index fa82649d4ee..6155efd4b63 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -79,6 +79,7 @@ void registerOutputFormatJSONObjectEachRow(FormatFactory & factory) return std::make_shared(buf, sample, params, settings); }); factory.markOutputFormatSupportsParallelFormatting("JSONObjectEachRow"); + factory.markFormatHasNoAppendSupport("JSONObjectEachRow"); } } From 6a1cb604c43d4a0d83693f1b1d2d4ee597470f72 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Sep 2022 17:06:56 +0000 Subject: [PATCH 036/266] Style --- .../Formats/Impl/JSONObjectEachRowRowInputFormat.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index 3c4bc2fed3e..6e6d6287840 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -119,9 +119,8 @@ void registerJSONObjectEachRowSchemaReader(FormatFactory & factory) factory.registerAdditionalInfoForSchemaCacheGetter("JSONObjectEachRow", [](const FormatSettings & settings) { return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON) - + fmt::format( - ", format_json_object_each_row_column_for_object_name={}", settings.json_object_each_row.column_for_object_name); - }); + + fmt::format(", format_json_object_each_row_column_for_object_name={}", settings.json_object_each_row.column_for_object_name); + }); } } From c23b1f4ceaa678e17562d477e734832dc1b503f1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Fri, 23 Sep 2022 10:54:12 -0400 Subject: [PATCH 037/266] remove maybe_unused --- src/Server/TLSHandlerFactory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 8063ffa783d..9e3002d2971 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -18,7 +18,7 @@ namespace DB class TLSHandlerFactory : public TCPServerConnectionFactory { private: - IServer & server [[maybe_unused]]; + IServer & server; Poco::Logger * log; std::string conf_name; From e882b4f69445c18bc6914f689ef4e293125c058f Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sun, 25 Sep 2022 13:50:22 +0800 Subject: [PATCH 038/266] remove settings, add tryDecrypt function --- src/Core/Settings.h | 2 - src/Functions/FunctionsAES.cpp | 5 --- src/Functions/FunctionsAES.h | 42 +++++++++------------ src/Functions/aes_decrypt_mysql.cpp | 1 + src/Functions/decrypt.cpp | 9 +++++ tests/queries/0_stateless/01318_decrypt.sql | 9 ++--- 6 files changed, 30 insertions(+), 38 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8ed5d77b2ed..213582c19b5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -634,8 +634,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \ M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ - \ - M(Bool, aes_decryption_use_null_when_fail, false, "Decrypt a string with incorrect key will return NULL instead of throwing error", 0) \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ diff --git a/src/Functions/FunctionsAES.cpp b/src/Functions/FunctionsAES.cpp index e514884f2d1..87fa794955a 100644 --- a/src/Functions/FunctionsAES.cpp +++ b/src/Functions/FunctionsAES.cpp @@ -17,11 +17,6 @@ namespace ErrorCodes } } -bool AESHelpers::getParamsFromContext(DB::ContextPtr context) -{ - return context->getSettingsRef().aes_decryption_use_null_when_fail; -} - namespace OpenSSLDetails { void onError(std::string error_message) diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index a52c228839c..e073dd3a025 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -31,11 +31,6 @@ namespace ErrorCodes } } -namespace AESHelpers -{ - bool NO_INLINE getParamsFromContext(DB::ContextPtr context); -} - namespace OpenSSLDetails { [[noreturn]] void onError(std::string error_message); @@ -416,14 +411,13 @@ template class FunctionDecrypt : public IFunction { public: - explicit FunctionDecrypt(bool use_null_when_decrypt_fail_) : use_null_when_decrypt_fail(use_null_when_decrypt_fail_) { } static constexpr OpenSSLDetails::CompatibilityMode compatibility_mode = Impl::compatibility_mode; static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr context) { return std::make_shared(AESHelpers::getParamsFromContext(context)); } + static constexpr bool use_null_when_decrypt_fail = Impl::use_null_when_decrypt_fail; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } private: using CipherMode = OpenSSLDetails::CipherMode; - bool use_null_when_decrypt_fail = false; String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -454,7 +448,7 @@ private: optional_args ); - if (use_null_when_decrypt_fail) + if constexpr (use_null_when_decrypt_fail) return std::make_shared(std::make_shared()); return std::make_shared(); @@ -480,7 +474,7 @@ private: ColumnPtr result_column; if (arguments.size() <= 3) { - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr, use_null_when_decrypt_fail); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr); } else { @@ -490,7 +484,7 @@ private: if (arguments.size() <= 4) { - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr, use_null_when_decrypt_fail); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr); } else { @@ -498,50 +492,48 @@ private: throw Exception("AAD can be only set for GCM-mode", ErrorCodes::BAD_ARGUMENTS); const auto aad_column = arguments[4].column; - result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); + result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); } } return result_column; } - + template static ColumnPtr doDecrypt( const EVP_CIPHER * evp_cipher, size_t input_rows_count, const ColumnPtr & input_column, const ColumnPtr & key_column, const ColumnPtr & iv_column, - const ColumnPtr & aad_column, - bool use_null_when_decrypt_fail = false) + const ColumnPtr & aad_column) { if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::MySQL) { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); } else { const auto cipher_mode = EVP_CIPHER_mode(evp_cipher); if (cipher_mode == EVP_CIPH_GCM_MODE) { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); } else { - return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column, use_null_when_decrypt_fail); + return doDecryptImpl(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column); } } return nullptr; } - template + template static ColumnPtr doDecryptImpl(const EVP_CIPHER * evp_cipher, size_t input_rows_count, const ColumnPtr & input_column, const ColumnPtr & key_column, [[maybe_unused]] const ColumnPtr & iv_column, - [[maybe_unused]] const ColumnPtr & aad_column, - bool use_null_when_decrypt_fail = false) + [[maybe_unused]] const ColumnPtr & aad_column) { using namespace OpenSSLDetails; @@ -679,7 +671,7 @@ private: reinterpret_cast(decrypted), &output_len, reinterpret_cast(input_value.data), static_cast(input_value.size)) != 1) { - if (!use_null_when_decrypt_fail) + if constexpr (!use_null_when_decrypt_fail) onError("Failed to decrypt"); decrypt_fail = true; } @@ -699,7 +691,7 @@ private: if (!decrypt_fail && EVP_DecryptFinal_ex(evp_ctx, reinterpret_cast(decrypted), &output_len) != 1) { - if (!use_null_when_decrypt_fail) + if constexpr (!use_null_when_decrypt_fail) onError("Failed to decrypt"); decrypt_fail = true; } @@ -712,7 +704,7 @@ private: ++decrypted; decrypted_result_column_offsets.push_back(decrypted - decrypted_result_column_data.data()); - if (use_null_when_decrypt_fail) + if constexpr (use_null_when_decrypt_fail) { if (decrypt_fail) null_map->insertValue(1); @@ -729,7 +721,7 @@ private: } decrypted_result_column->validate(); - if (use_null_when_decrypt_fail) + if constexpr (use_null_when_decrypt_fail) return ColumnNullable::create(std::move(decrypted_result_column), std::move(null_map)); else return decrypted_result_column; diff --git a/src/Functions/aes_decrypt_mysql.cpp b/src/Functions/aes_decrypt_mysql.cpp index 8ba7eaa8e80..6b59b07a736 100644 --- a/src/Functions/aes_decrypt_mysql.cpp +++ b/src/Functions/aes_decrypt_mysql.cpp @@ -12,6 +12,7 @@ struct DecryptMySQLModeImpl { static constexpr auto name = "aes_decrypt_mysql"; static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::MySQL; + static constexpr bool use_null_when_decrypt_fail = false; }; } diff --git a/src/Functions/decrypt.cpp b/src/Functions/decrypt.cpp index da794116a41..664e071e858 100644 --- a/src/Functions/decrypt.cpp +++ b/src/Functions/decrypt.cpp @@ -12,6 +12,14 @@ struct DecryptImpl { static constexpr auto name = "decrypt"; static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL; + static constexpr bool use_null_when_decrypt_fail = false; +}; + +struct TryDecryptImpl +{ + static constexpr auto name = "tryDecrypt"; + static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL; + static constexpr bool use_null_when_decrypt_fail = true; }; } @@ -22,6 +30,7 @@ namespace DB REGISTER_FUNCTION(Decrypt) { factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/tests/queries/0_stateless/01318_decrypt.sql b/tests/queries/0_stateless/01318_decrypt.sql index f478b043432..8cd1414d11b 100644 --- a/tests/queries/0_stateless/01318_decrypt.sql +++ b/tests/queries/0_stateless/01318_decrypt.sql @@ -129,7 +129,7 @@ SELECT hex(decrypt('aes-256-gcm', concat(ciphertext, tag), key, iv, aad)) as plaintext_actual, plaintext_actual = hex(plaintext); --- decrypt with null when fail +-- tryDecrypt CREATE TABLE decrypt_null ( dt DateTime, user_id UInt32, @@ -140,10 +140,7 @@ CREATE TABLE decrypt_null ( INSERT INTO decrypt_null VALUES ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3'); SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); --{serverError 454} - -SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2') SETTINGS aes_decryption_use_null_when_fail = 1; - -SELECT dt, user_id, (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv)) as value FROM decrypt_null ORDER BY user_id SETTINGS aes_decryption_use_null_when_fail = 1; - +SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); +SELECT dt, user_id, (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv)) as value FROM decrypt_null ORDER BY user_id; DROP TABLE encryption_test; From 3283cdb3d16f82332d69e59ede155f0ebe0d2423 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sun, 25 Sep 2022 13:54:20 +0800 Subject: [PATCH 039/266] fix style --- src/Core/Settings.h | 1 + src/Functions/FunctionsAES.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 213582c19b5..9dd87904a56 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -634,6 +634,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \ M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index e073dd3a025..c9002079b30 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -677,7 +677,6 @@ private: } else { - decrypted += output_len; // 3: optionally get tag from the ciphertext (RFC5116) and feed it to the context if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM) From 1407f6bcda8c237eb1c09bd6a38750563d9e28e6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 25 Sep 2022 15:46:12 +0000 Subject: [PATCH 040/266] add test --- tests/integration/helpers/client.py | 7 ++- .../test_composable_protocols/__init__.py | 0 .../configs/client.xml | 10 +++ .../configs/config.xml | 57 +++++++++++++++++ .../configs/server.crt | 18 ++++++ .../configs/server.key | 28 +++++++++ .../configs/users.xml | 18 ++++++ .../test_composable_protocols/test.py | 63 +++++++++++++++++++ 8 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_composable_protocols/__init__.py create mode 100644 tests/integration/test_composable_protocols/configs/client.xml create mode 100644 tests/integration/test_composable_protocols/configs/config.xml create mode 100644 tests/integration/test_composable_protocols/configs/server.crt create mode 100644 tests/integration/test_composable_protocols/configs/server.key create mode 100644 tests/integration/test_composable_protocols/configs/users.xml create mode 100644 tests/integration/test_composable_protocols/test.py diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index a4407d5b442..fa7d1b379da 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -8,13 +8,18 @@ DEFAULT_QUERY_TIMEOUT = 600 class Client: - def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client"): + def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client", secure=False, config=None): self.host = host self.port = port self.command = [command] if os.path.basename(command) == "clickhouse": self.command.append("client") + + if secure: + self.command.append("--secure") + if config is not None: + self.command += ["--config-file", config] self.command += ["--host", self.host, "--port", str(self.port), "--stacktrace"] diff --git a/tests/integration/test_composable_protocols/__init__.py b/tests/integration/test_composable_protocols/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_composable_protocols/configs/client.xml b/tests/integration/test_composable_protocols/configs/client.xml new file mode 100644 index 00000000000..15d83a7b1ab --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/client.xml @@ -0,0 +1,10 @@ + + + + none + + AcceptCertificateHandler + + + + diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml new file mode 100644 index 00000000000..553128d4386 --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -0,0 +1,57 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + none + true + true + sslv2,sslv3 + true + + + + 0.0.0.0 + + + + tcp + 0.0.0.0 + 9000 + native protocol (tcp) + + + tls + tcp + 9440 + secure native protocol (tcp_secure) + + + tcp + 0.0.0.0 + 9001 + native protocol endpoint (tcp) + + + http + 8123 + http protocol + + + tls + http + 0.0.0.0 + 8443 + https protocol + + + https + 8444 + https protocol endpoint + + + + + diff --git a/tests/integration/test_composable_protocols/configs/server.crt b/tests/integration/test_composable_protocols/configs/server.crt new file mode 100644 index 00000000000..6f4deca038f --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJAIhI9ozZJ+TWMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0xOTA0MjIwNDMyNTJaFw0yMDA0MjEwNDMyNTJaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAK+wVUEdqF2uXvN0MJBgnAHyXi6JTi4p/F6igsrCjSNjJWzHH0vQmK8ujfcF +CkifW88i+W5eHctuEtQqNHK+t9x9YiZtXrj6m/XkOXs20mYgENSmbbbHbriTPnZB +zZrq6UqMlwIHNNAa+I3NMORQxVRaI0ybXnGVO5elr70xHpk03xL0JWKHpEqYp4db +2aBQgF6y3Ww4khxjIYqpUYXWXGFnVIRU7FKVEAM1xyKqvQzXjQ5sVM/wyHknveEF +3b/X4ggN+KNl5KOc0cWDh1/XaatJAPaUUPqZcq76tynLbP64Xm3dxHcj+gtRkO67 +ef6MSg6l63m3XQP6Qb+MIkd06OsCAwEAAaNQME4wHQYDVR0OBBYEFDmODTO8QLDN +ykR3x0LIOnjNhrKhMB8GA1UdIwQYMBaAFDmODTO8QLDNykR3x0LIOnjNhrKhMAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAAwaiJc7uqEpnH3aukbftDwX +m8GfEnj1HVdgg+9GGNq+9rvUYBF6gdPmjRCX9dO0cclLFx8jc2org0rTSq9WoOhX +E6qL4Eqrmc5SE3Y9jZM0h6GRD4oXK014FmtZ3T6ddZU3dQLj3BS2r1XrvmubTvGN +ZuTJNY8nx8Hh6H5XINmsEjUF9E5hog+PwCE03xt2adIdYL+gsbxASeNYyeUFpZv5 +zcXR3VoakBWnAaOVgCHq2qh96QAnL7ZKzFkGf/MdwV10KU3dmb+ICbQUUdf9Gc17 +aaDCIRws312F433FdXBkGs2UkB7ZZme9dfn6O1QbeTNvex2VLMqYx/CTkfFbOQA= +-----END CERTIFICATE----- diff --git a/tests/integration/test_composable_protocols/configs/server.key b/tests/integration/test_composable_protocols/configs/server.key new file mode 100644 index 00000000000..6eddb3295db --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCvsFVBHahdrl7z +dDCQYJwB8l4uiU4uKfxeooLKwo0jYyVsxx9L0JivLo33BQpIn1vPIvluXh3LbhLU +KjRyvrfcfWImbV64+pv15Dl7NtJmIBDUpm22x264kz52Qc2a6ulKjJcCBzTQGviN +zTDkUMVUWiNMm15xlTuXpa+9MR6ZNN8S9CVih6RKmKeHW9mgUIBest1sOJIcYyGK +qVGF1lxhZ1SEVOxSlRADNcciqr0M140ObFTP8Mh5J73hBd2/1+IIDfijZeSjnNHF +g4df12mrSQD2lFD6mXKu+rcpy2z+uF5t3cR3I/oLUZDuu3n+jEoOpet5t10D+kG/ +jCJHdOjrAgMBAAECggEARF66zrxb6RkSmmt8+rKeA6PuQu3sHsr4C1vyyjUr97l9 +tvdGlpp20LWtSZQMjHZ3pARYTTsTHTeY3DgQcRcHNicVKx8k3ZepWeeW9vw+pL+V +zSt3RsoVrH6gsCSrfr4sS3aqzX9AbjwQvh48CJ3mLQ1m70kHV+xbZIh1+4pB/hyP +1wKyUE18ZkOptXvO/TtoHzLQCecpkXtWzmry1Eh2isvXA+NMrAtLibGsyM1mtm7i +5ozevzHabvvCDBEe+KgZdONgVhhhvm2eOd+/s4w3rw4ETud4fI/ZAJyWXhiIKFnA +VJbElWruSAoVBW7p2bsF5PbmVzvo8vXL+VylxYD+AQKBgQDhLoRKTVhNkn/QjKxq +sdOh+QZra0LzjVpAmkQzu7wZMSHEz9qePQciDQQrYKrmRF1vNcIRCVUTqWYheJ/1 +lKRrCGa0ab6k96zkWMqLHD5u+UeJV7r1dJIx08ME9kNJ+x/XtB8klRIji16NiQUS +qc6p8z0M2AnbJzsRfWZRH8FeYwKBgQDHu8dzdtVGI7MtxfPOE/bfajiopDg8BdTC +pdug2T8XofRHRq7Q+0vYjTAZFT/slib91Pk6VvvPdo9VBZiL4omv4dAq6mOOdX/c +U14mJe1X5GCrr8ExZ8BfNJ3t/6sV1fcxyJwAw7iBguqxA2JqdM/wFk10K8XqvzVn +CD6O9yGt2QKBgFX1BMi8N538809vs41S7l9hCQNOQZNo/O+2M5yv6ECRkbtoQKKw +1x03bMUGNJaLuELweXE5Z8GGo5bZTe5X3F+DKHlr+DtO1C+ieUaa9HY2MAmMdLCn +2/qrREGLo+oEs4YKmuzC/taUp/ZNPKOAMISNdluFyFVg51pozPrgrVbTAoGBAKkE +LBl3O67o0t0vH8sJdeVFG8EJhlS0koBMnfgVHqC++dm+5HwPyvTrNQJkyv1HaqNt +r6FArkG3ED9gRuBIyT6+lctbIPgSUip9mbQqcBfqOCvQxGksZMur2ODncz09HLtS +CUFUXjOqNzOnq4ZuZu/Bz7U4vXiSaXxQq6+LTUKxAoGAFZU/qrI06XxnrE9A1X0W +l7DSkpZaDcu11NrZ473yONih/xOZNh4SSBpX8a7F6Pmh9BdtGqphML8NFPvQKcfP +b9H2iid2tc292uyrUEb5uTMmv61zoTwtitqLzO0+tS6PT3fXobX+eyeEWKzPBljL +HFtxG5CCXpkdnWRmaJnhTzA= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_composable_protocols/configs/users.xml b/tests/integration/test_composable_protocols/configs/users.xml new file mode 100644 index 00000000000..6f94d1696e3 --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/users.xml @@ -0,0 +1,18 @@ + + + + 10000000000 + 64999 + + + + + + + + ::/0 + + default + + + diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py new file mode 100644 index 00000000000..d4607495da7 --- /dev/null +++ b/tests/integration/test_composable_protocols/test.py @@ -0,0 +1,63 @@ +import ssl +import pytest +import os.path as p +import os +from helpers.cluster import ClickHouseCluster +from helpers.client import Client +import urllib.request, urllib.parse + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +server = cluster.add_instance("server", base_config_dir="configs", main_configs=["configs/server.crt", "configs/server.key"]) + + +@pytest.fixture(scope="module", autouse=True) +def setup_nodes(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def execute_query_https(host, port, query): + url = ( + f"https://{host}:{port}/?query={urllib.parse.quote(query)}" + ) + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request, context=ctx).read() + return response.decode("utf-8") + + +def execute_query_http(host, port, query): + url = ( + f"http://{host}:{port}/?query={urllib.parse.quote(query)}" + ) + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request).read() + return response.decode("utf-8") + + +def test_connections(): + + client = Client(server.ip_address, 9000, command=cluster.client_bin_path) + assert client.query("SELECT 1") == "1\n" + + client = Client(server.ip_address, 9440, command=cluster.client_bin_path, secure=True, config=f"{SCRIPT_DIR}/configs/client.xml") + assert client.query("SELECT 1") == "1\n" + + client = Client(server.ip_address, 9001, command=cluster.client_bin_path) + assert client.query("SELECT 1") == "1\n" + + assert execute_query_http(server.ip_address, 8123, "SELECT 1") == "1\n" + + assert execute_query_https(server.ip_address, 8443, "SELECT 1") == "1\n" + + assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" From 7a6386c7329aeb81cf6cd6304fd78022cd31fd6e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 25 Sep 2022 16:13:17 +0000 Subject: [PATCH 041/266] Automatic style fix --- tests/integration/helpers/client.py | 11 ++++++++-- .../test_composable_protocols/test.py | 22 ++++++++++++------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index fa7d1b379da..ab1cc65e9a9 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -8,14 +8,21 @@ DEFAULT_QUERY_TIMEOUT = 600 class Client: - def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client", secure=False, config=None): + def __init__( + self, + host, + port=9000, + command="/usr/bin/clickhouse-client", + secure=False, + config=None, + ): self.host = host self.port = port self.command = [command] if os.path.basename(command) == "clickhouse": self.command.append("client") - + if secure: self.command.append("--secure") if config is not None: diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index d4607495da7..c0c0e5e0a83 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -9,7 +9,11 @@ import urllib.request, urllib.parse SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__) -server = cluster.add_instance("server", base_config_dir="configs", main_configs=["configs/server.crt", "configs/server.key"]) +server = cluster.add_instance( + "server", + base_config_dir="configs", + main_configs=["configs/server.crt", "configs/server.key"], +) @pytest.fixture(scope="module", autouse=True) @@ -22,9 +26,7 @@ def setup_nodes(): def execute_query_https(host, port, query): - url = ( - f"https://{host}:{port}/?query={urllib.parse.quote(query)}" - ) + url = f"https://{host}:{port}/?query={urllib.parse.quote(query)}" ctx = ssl.create_default_context() ctx.check_hostname = False @@ -36,9 +38,7 @@ def execute_query_https(host, port, query): def execute_query_http(host, port, query): - url = ( - f"http://{host}:{port}/?query={urllib.parse.quote(query)}" - ) + url = f"http://{host}:{port}/?query={urllib.parse.quote(query)}" request = urllib.request.Request(url) response = urllib.request.urlopen(request).read() @@ -50,7 +50,13 @@ def test_connections(): client = Client(server.ip_address, 9000, command=cluster.client_bin_path) assert client.query("SELECT 1") == "1\n" - client = Client(server.ip_address, 9440, command=cluster.client_bin_path, secure=True, config=f"{SCRIPT_DIR}/configs/client.xml") + client = Client( + server.ip_address, + 9440, + command=cluster.client_bin_path, + secure=True, + config=f"{SCRIPT_DIR}/configs/client.xml", + ) assert client.query("SELECT 1") == "1\n" client = Client(server.ip_address, 9001, command=cluster.client_bin_path) From 9b35d54e8fc1f9b0e6e7d43817bc76c4568f5ef3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Aug 2022 12:21:30 +0000 Subject: [PATCH 042/266] First try --- programs/server/Server.cpp | 1 - src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 16 ++++++++++------ src/Interpreters/AsynchronousInsertQueue.h | 7 ++++--- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 14f97923ce3..c2eedbbb99c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1417,7 +1417,6 @@ int Server::main(const std::vector & /*args*/) global_context->setAsynchronousInsertQueue(std::make_shared( global_context, settings.async_insert_threads, - settings.async_insert_max_data_size, AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms})); /// Size of cache for marks (index of MergeTree family of tables). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9dd87904a56..c718f05d2d9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -592,7 +592,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ - M(UInt64, async_insert_max_data_size, 100000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ + M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \ \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index cad2200c5ec..55ce518a591 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -120,9 +120,8 @@ std::exception_ptr AsynchronousInsertQueue::InsertData::Entry::getException() co } -AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size_, const Timeout & timeouts) +AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, const Timeout & timeouts) : WithContext(context_) - , max_data_size(max_data_size_) , busy_timeout(timeouts.busy) , stale_timeout(timeouts.stale) , pool(pool_size) @@ -250,7 +249,10 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'", data->entries.size(), data->size, queryToString(it->first.query)); - if (data->size > max_data_size) + /// Here we check whether we hit the limit on maximum data size in the buffer. + /// And use setting from query context! + /// It works, because queries with the same set of settings are already grouped together. + if (data->size > it->first.settings.async_insert_max_data_size) scheduleDataProcessingJob(it->first, std::move(data), getContext()); CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert); @@ -290,17 +292,19 @@ void AsynchronousInsertQueue::busyCheck() timeout = busy_timeout; std::shared_lock read_lock(rwlock); + const auto now = std::chrono::steady_clock::now(); + for (auto & [key, elem] : queue) { std::lock_guard data_lock(elem->mutex); if (!elem->data) continue; - auto lag = std::chrono::steady_clock::now() - elem->data->first_update; - if (lag >= busy_timeout) + const auto deadline = elem->data->first_update + std::chrono::milliseconds(key.settings.async_insert_busy_timeout_ms); + if (now >= deadline) scheduleDataProcessingJob(key, std::move(elem->data), getContext()); else - timeout = std::min(timeout, std::chrono::ceil(busy_timeout - lag)); + timeout = std::min(timeout, std::chrono::ceil(deadline - now)); } } } diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 6d9aeb7f55d..9f99c334752 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -25,7 +25,7 @@ public: Milliseconds stale; }; - AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size, const Timeout & timeouts); + AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, const Timeout & timeouts); ~AsynchronousInsertQueue(); void push(ASTPtr query, ContextPtr query_context); @@ -109,9 +109,10 @@ private: /// grow for a long period of time and users will be able to select new data in deterministic manner. /// - stale_timeout: if queue is stale for too long, then we dump the data too, so that users will be able to select the last /// piece of inserted data. - /// - max_data_size: if the maximum size of data is reached, then again we dump the data. + /// + /// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached (async_insert_max_data_size setting) + /// If so, then again we dump the data. - const size_t max_data_size; /// in bytes const Milliseconds busy_timeout; const Milliseconds stale_timeout; From 636cedf488c76c4649726786f5890e1830cf7309 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Aug 2022 20:29:26 +0000 Subject: [PATCH 043/266] Better --- programs/server/Server.cpp | 3 +- src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 73 ++++++++----------- src/Interpreters/AsynchronousInsertQueue.h | 35 ++++----- .../StorageSystemAsynchronousInserts.cpp | 3 +- 5 files changed, 48 insertions(+), 68 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c2eedbbb99c..758c8b5999d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1416,8 +1416,7 @@ int Server::main(const std::vector & /*args*/) if (settings.async_insert_threads) global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - settings.async_insert_threads, - AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms})); + settings.async_insert_threads)); /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c718f05d2d9..2d47024d2b1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -594,7 +594,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ - M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \ \ M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ @@ -667,6 +666,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) MAKE_OBSOLETE(M, Bool, allow_experimental_database_atomic, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_bigint_types, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \ + MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \ MAKE_OBSOLETE(M, HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT) \ MAKE_OBSOLETE(M, Bool, database_replicated_ddl_output, true) \ MAKE_OBSOLETE(M, UInt64, replication_alter_columns_timeout, 60) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 55ce518a591..af3dadd3c1d 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -120,10 +120,8 @@ std::exception_ptr AsynchronousInsertQueue::InsertData::Entry::getException() co } -AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, const Timeout & timeouts) +AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size) : WithContext(context_) - , busy_timeout(timeouts.busy) - , stale_timeout(timeouts.stale) , pool(pool_size) , dump_by_first_update_thread(&AsynchronousInsertQueue::busyCheck, this) , cleanup_thread(&AsynchronousInsertQueue::cleanup, this) @@ -131,9 +129,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo using namespace std::chrono; assert(pool_size); - - if (stale_timeout > 0ms) - dump_by_last_update_thread = ThreadFromGlobalPool(&AsynchronousInsertQueue::staleCheck, this); } AsynchronousInsertQueue::~AsynchronousInsertQueue() @@ -154,9 +149,6 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() assert(cleanup_thread.joinable()); cleanup_thread.join(); - if (dump_by_last_update_thread.joinable()) - dump_by_last_update_thread.join(); - pool.wait(); std::lock_guard lock(currently_processing_mutex); @@ -233,12 +225,15 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator std::lock_guard data_lock(data_mutex); if (!data) - data = std::make_unique(); + { + auto now = std::chrono::steady_clock::now(); + data = std::make_unique(now); + deadline_queue.insert({now, it}); + } size_t entry_data_size = entry->bytes.size(); data->size += entry_data_size; - data->last_update = std::chrono::steady_clock::now(); data->entries.emplace_back(entry); { @@ -284,46 +279,37 @@ void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, co void AsynchronousInsertQueue::busyCheck() { - auto timeout = busy_timeout; - - while (!waitForShutdown(timeout)) + while (!shutdown) { - /// TODO: use priority queue instead of raw unsorted queue. - timeout = busy_timeout; - std::shared_lock read_lock(rwlock); + std::unique_lock lock(deadline_mutex); + are_tasks_available.wait(lock, [this]() + { + if (shutdown) + return true; + + if (!deadline_queue.empty() && deadline_queue.begin()->first >= std::chrono::steady_clock::now()) + return true; + + return false; + }); const auto now = std::chrono::steady_clock::now(); - for (auto & [key, elem] : queue) + while (true) { + if (deadline_queue.empty() || deadline_queue.begin()->first < now) + break; + + + std::shared_lock read_lock(rwlock); + auto main_queue_it = deadline_queue.begin()->second; + auto & [key, elem] = *main_queue_it; + std::lock_guard data_lock(elem->mutex); if (!elem->data) continue; - const auto deadline = elem->data->first_update + std::chrono::milliseconds(key.settings.async_insert_busy_timeout_ms); - if (now >= deadline) - scheduleDataProcessingJob(key, std::move(elem->data), getContext()); - else - timeout = std::min(timeout, std::chrono::ceil(deadline - now)); - } - } -} - -void AsynchronousInsertQueue::staleCheck() -{ - while (!waitForShutdown(stale_timeout)) - { - std::shared_lock read_lock(rwlock); - - for (auto & [key, elem] : queue) - { - std::lock_guard data_lock(elem->mutex); - if (!elem->data) - continue; - - auto lag = std::chrono::steady_clock::now() - elem->data->last_update; - if (lag >= stale_timeout) - scheduleDataProcessingJob(key, std::move(elem->data), getContext()); + scheduleDataProcessingJob(key, std::move(elem->data), getContext()); } } } @@ -332,7 +318,8 @@ void AsynchronousInsertQueue::cleanup() { /// Do not run cleanup too often, /// because it holds exclusive lock. - auto timeout = busy_timeout * 5; + /// FIXME: Come up with another mechanism. + auto timeout = Milliseconds(1000); while (!waitForShutdown(timeout)) { diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 9f99c334752..cc1e8048612 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -18,14 +18,7 @@ class AsynchronousInsertQueue : public WithContext public: using Milliseconds = std::chrono::milliseconds; - /// Using structure to allow and benefit from designated initialization and not mess with a positional arguments in ctor. - struct Timeout - { - Milliseconds busy; - Milliseconds stale; - }; - - AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, const Timeout & timeouts); + AsynchronousInsertQueue(ContextPtr context_, size_t pool_size); ~AsynchronousInsertQueue(); void push(ASTPtr query, ContextPtr query_context); @@ -69,6 +62,10 @@ private: std::exception_ptr exception; }; + explicit InsertData(std::chrono::steady_clock::time_point now) + : first_update(now) + {} + using EntryPtr = std::shared_ptr; std::list entries; @@ -76,11 +73,7 @@ private: /// Timestamp of the first insert into queue, or after the last queue dump. /// Used to detect for how long the queue is active, so we can dump it by timer. - std::chrono::time_point first_update = std::chrono::steady_clock::now(); - - /// Timestamp of the last insert into queue. - /// Used to detect for how long the queue is stale, so we can dump it by another timer. - std::chrono::time_point last_update; + std::chrono::time_point first_update; }; using InsertDataPtr = std::unique_ptr; @@ -96,10 +89,17 @@ private: using Queue = std::unordered_map, InsertQuery::Hash>; using QueueIterator = Queue::iterator; + /// Ordered container + using DeadlineQueue = std::map; + mutable std::shared_mutex rwlock; Queue queue; + mutable std::mutex deadline_mutex; + mutable std::condition_variable are_tasks_available; + DeadlineQueue deadline_queue; + using QueryIdToEntry = std::unordered_map; mutable std::mutex currently_processing_mutex; QueryIdToEntry currently_processing_queries; @@ -113,22 +113,15 @@ private: /// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached (async_insert_max_data_size setting) /// If so, then again we dump the data. - const Milliseconds busy_timeout; - const Milliseconds stale_timeout; - - std::mutex shutdown_mutex; - std::condition_variable shutdown_cv; - bool shutdown{false}; + std::atomic shutdown{false}; ThreadPool pool; /// dump the data only inside this pool. ThreadFromGlobalPool dump_by_first_update_thread; /// uses busy_timeout and busyCheck() - ThreadFromGlobalPool dump_by_last_update_thread; /// uses stale_timeout and staleCheck() ThreadFromGlobalPool cleanup_thread; /// uses busy_timeout and cleanup() Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue"); void busyCheck(); - void staleCheck(); void cleanup(); /// Should be called with shared or exclusively locked 'rwlock'. diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 80fc070c83a..7c100a831c3 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -77,7 +77,8 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co res_columns[i++]->insert(insert_query.format); res_columns[i++]->insert(time_in_microseconds(elem->data->first_update)); - res_columns[i++]->insert(time_in_microseconds(elem->data->last_update)); + /// FIXME: + res_columns[i++]->insert(time_in_microseconds(std::chrono::steady_clock::now())); res_columns[i++]->insert(elem->data->size); Array arr_query_id; From a5eacc2e029e596484b9e9ac5eeb250df4d65cbb Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 26 Aug 2022 20:41:34 +0000 Subject: [PATCH 044/266] Even better --- src/Interpreters/AsynchronousInsertQueue.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index af3dadd3c1d..af546855f05 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -143,6 +143,11 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() shutdown_cv.notify_all(); } + { + std::lock_guard lock(deadline_mutex); + are_tasks_available.notify_one(); + } + assert(dump_by_first_update_thread.joinable()); dump_by_first_update_thread.join(); @@ -228,7 +233,10 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator { auto now = std::chrono::steady_clock::now(); data = std::make_unique(now); - deadline_queue.insert({now, it}); + + std::lock_guard lock(deadline_mutex); + deadline_queue.insert({now + Milliseconds{it->first.settings.async_insert_busy_timeout_ms}, it}); + are_tasks_available.notify_one(); } size_t entry_data_size = entry->bytes.size(); @@ -282,7 +290,7 @@ void AsynchronousInsertQueue::busyCheck() while (!shutdown) { std::unique_lock lock(deadline_mutex); - are_tasks_available.wait(lock, [this]() + are_tasks_available.wait_for(lock, Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [this]() { if (shutdown) return true; @@ -293,6 +301,9 @@ void AsynchronousInsertQueue::busyCheck() return false; }); + if (shutdown) + return; + const auto now = std::chrono::steady_clock::now(); while (true) @@ -302,9 +313,12 @@ void AsynchronousInsertQueue::busyCheck() std::shared_lock read_lock(rwlock); + std::unique_lock deadline_lock(deadline_mutex); auto main_queue_it = deadline_queue.begin()->second; auto & [key, elem] = *main_queue_it; + deadline_queue.erase(deadline_queue.begin()); + std::lock_guard data_lock(elem->mutex); if (!elem->data) continue; From a089ab9238caa8c5844d60e2f87190ef4d85cc9d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 29 Aug 2022 10:53:27 +0000 Subject: [PATCH 045/266] Style --- src/Storages/System/StorageSystemAsynchronousInserts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 7c100a831c3..0aca98b9864 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -77,7 +77,7 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co res_columns[i++]->insert(insert_query.format); res_columns[i++]->insert(time_in_microseconds(elem->data->first_update)); - /// FIXME: + /// FIXME: res_columns[i++]->insert(time_in_microseconds(std::chrono::steady_clock::now())); res_columns[i++]->insert(elem->data->size); From fa587ce05796f4deb104ee65d2fa672b2e1acadb Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 30 Aug 2022 17:31:10 +0000 Subject: [PATCH 046/266] Better --- src/Interpreters/AsynchronousInsertQueue.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index af546855f05..6e670efeae6 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -295,7 +295,7 @@ void AsynchronousInsertQueue::busyCheck() if (shutdown) return true; - if (!deadline_queue.empty() && deadline_queue.begin()->first >= std::chrono::steady_clock::now()) + if (!deadline_queue.empty() && deadline_queue.begin()->first < std::chrono::steady_clock::now()) return true; return false; @@ -308,12 +308,11 @@ void AsynchronousInsertQueue::busyCheck() while (true) { - if (deadline_queue.empty() || deadline_queue.begin()->first < now) + if (deadline_queue.empty() || deadline_queue.begin()->first > now) break; std::shared_lock read_lock(rwlock); - std::unique_lock deadline_lock(deadline_mutex); auto main_queue_it = deadline_queue.begin()->second; auto & [key, elem] = *main_queue_it; From 852d084950079c2704537491040464d52c93747a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 14 Sep 2022 20:31:19 +0000 Subject: [PATCH 047/266] Save --- src/Core/Settings.h | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 30 ++++----- src/Interpreters/AsynchronousInsertQueue.h | 9 ++- .../tests/gtest_async_inserts.cpp | 63 +++++++++++++++++++ .../StorageSystemAsynchronousInserts.cpp | 3 - 5 files changed, 83 insertions(+), 23 deletions(-) create mode 100644 src/Interpreters/tests/gtest_async_inserts.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2d47024d2b1..c41dd92df61 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -594,6 +594,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ + M(Milliseconds, async_insert_cleanup_timeout_ms, 1000, "Time to wait before each iteration of cleaning up buffers for INSERT queries which don't appear anymore. Only has meaning at server startup.", 0) \ \ M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 6e670efeae6..4681a9b6bd5 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -120,8 +120,9 @@ std::exception_ptr AsynchronousInsertQueue::InsertData::Entry::getException() co } -AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size) +AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout_) : WithContext(context_) + , cleanup_timeout(cleanup_timeout_) , pool(pool_size) , dump_by_first_update_thread(&AsynchronousInsertQueue::busyCheck, this) , cleanup_thread(&AsynchronousInsertQueue::cleanup, this) @@ -137,14 +138,9 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); - { - std::lock_guard lock(shutdown_mutex); - shutdown = true; - shutdown_cv.notify_all(); - } - { std::lock_guard lock(deadline_mutex); + shutdown = true; are_tasks_available.notify_one(); } @@ -329,13 +325,16 @@ void AsynchronousInsertQueue::busyCheck() void AsynchronousInsertQueue::cleanup() { - /// Do not run cleanup too often, - /// because it holds exclusive lock. - /// FIXME: Come up with another mechanism. - auto timeout = Milliseconds(1000); - - while (!waitForShutdown(timeout)) + while (true) { + { + std::unique_lock shutdown_lock(shutdown_mutex); + shutdown_cv.wait_for(shutdown_lock, Milliseconds(cleanup_timeout), [this]() { return shutdown; }); + + if (shutdown) + return; + } + std::vector keys_to_remove; { @@ -387,11 +386,6 @@ void AsynchronousInsertQueue::cleanup() } } -bool AsynchronousInsertQueue::waitForShutdown(const Milliseconds & timeout) -{ - std::unique_lock shutdown_lock(shutdown_mutex); - return shutdown_cv.wait_for(shutdown_lock, timeout, [this]() { return shutdown; }); -} // static void AsynchronousInsertQueue::processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context) diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index cc1e8048612..aa5f02203fa 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -18,7 +18,7 @@ class AsynchronousInsertQueue : public WithContext public: using Milliseconds = std::chrono::milliseconds; - AsynchronousInsertQueue(ContextPtr context_, size_t pool_size); + AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout); ~AsynchronousInsertQueue(); void push(ASTPtr query, ContextPtr query_context); @@ -96,6 +96,9 @@ private: mutable std::shared_mutex rwlock; Queue queue; + mutable std::mutex shutdown_mutex; + mutable std::condition_variable shutdown_cv; + mutable std::mutex deadline_mutex; mutable std::condition_variable are_tasks_available; DeadlineQueue deadline_queue; @@ -113,7 +116,9 @@ private: /// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached (async_insert_max_data_size setting) /// If so, then again we dump the data. - std::atomic shutdown{false}; + const Milliseconds cleanup_timeout; + + bool shutdown{false}; ThreadPool pool; /// dump the data only inside this pool. ThreadFromGlobalPool dump_by_first_update_thread; /// uses busy_timeout and busyCheck() diff --git a/src/Interpreters/tests/gtest_async_inserts.cpp b/src/Interpreters/tests/gtest_async_inserts.cpp new file mode 100644 index 00000000000..0720592b8a7 --- /dev/null +++ b/src/Interpreters/tests/gtest_async_inserts.cpp @@ -0,0 +1,63 @@ +#include + +#include +#include +#include "Processors/Executors/PullingPipelineExecutor.h" + +#include +#include +#include +#include +#include +#include +#include +#include "Common/Exception.h" + +using namespace DB; + +static SharedContextHolder shared_context; +static ContextMutablePtr context; + +static bool initialize() +{ + try + { + shared_context = Context::createShared(); + context = Context::createGlobal(shared_context.get()); + context->makeGlobalContext(); + context->setApplicationType(Context::ApplicationType::LOCAL); + + // registerFunctions(); + // registerAggregateFunctions(); + // registerTableFunctions(); + // registerStorages(); + // registerDictionaries(); + // registerDisks(); + // registerFormats(); + + return true; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } +} + +[[ maybe_unused ]] static bool initialized = initialize(); + +TEST(AsyncInsertQueue, SimpleTest) +{ + try + { + auto io = executeQuery("CREATE TABLE SimpleTest ENGINE=Memory()", context, true, QueryProcessingStage::Complete); + PullingPipelineExecutor executor(io.pipeline); + Block res; + while (!res && executor.pull(res)); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + +} diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 0aca98b9864..5ebdb828c34 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -24,7 +24,6 @@ NamesAndTypesList StorageSystemAsynchronousInserts::getNamesAndTypes() {"table", std::make_shared()}, {"format", std::make_shared()}, {"first_update", std::make_shared(TIME_SCALE)}, - {"last_update", std::make_shared(TIME_SCALE)}, {"total_bytes", std::make_shared()}, {"entries.query_id", std::make_shared(std::make_shared())}, {"entries.bytes", std::make_shared(std::make_shared())}, @@ -77,8 +76,6 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co res_columns[i++]->insert(insert_query.format); res_columns[i++]->insert(time_in_microseconds(elem->data->first_update)); - /// FIXME: - res_columns[i++]->insert(time_in_microseconds(std::chrono::steady_clock::now())); res_columns[i++]->insert(elem->data->size); Array arr_query_id; From 502338560c4175e8c50083db22464e15a005c1a6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 20 Sep 2022 22:46:40 +0000 Subject: [PATCH 048/266] Added a test --- programs/server/Server.cpp | 3 +- ...51_async_insert_user_level_settings.python | 62 +++++++++++++++++++ ...async_insert_user_level_settings.reference | 1 + .../02451_async_insert_user_level_settings.sh | 9 +++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02451_async_insert_user_level_settings.python create mode 100644 tests/queries/0_stateless/02451_async_insert_user_level_settings.reference create mode 100755 tests/queries/0_stateless/02451_async_insert_user_level_settings.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 758c8b5999d..8c0ebba694d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1416,7 +1416,8 @@ int Server::main(const std::vector & /*args*/) if (settings.async_insert_threads) global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - settings.async_insert_threads)); + settings.async_insert_threads, + settings.async_insert_cleanup_timeout_ms)); /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); diff --git a/tests/queries/0_stateless/02451_async_insert_user_level_settings.python b/tests/queries/0_stateless/02451_async_insert_user_level_settings.python new file mode 100644 index 00000000000..9fe6142edc6 --- /dev/null +++ b/tests/queries/0_stateless/02451_async_insert_user_level_settings.python @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import os +import sys +import time + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") +CLICKHOUSE_TMP = os.environ.get("CLICKHOUSE_TMP") + +from pure_http_client import ClickHouseClient + +client = ClickHouseClient() + +NUM_RUNS = 20 +TIME_TO_WAIT_MS = 500 + +# The purpose of this test is to check that AsyncInsertQueue +# respects timeouts specified in the scope of query. +# Like if we execute NUM_RUNS subsequent inserts +# then we should spend at least (NUM_RUNS - 1) * TIME_TO_WAIT_MS +# Because each query corresponds to a timepoint when it been flushed +# And time period between first and last flush is exactly such +# as descibed above. +# Note that this doesn't include the time to process the query itself +# and this time maybe different depending on the build type (release or with sanitizer) + +gen_data_query = "SELECT number + {} AS id, toString(id) AS s, range(id) AS arr FROM numbers(10) FORMAT TSV" +insert_query = "INSERT INTO t_async_insert_user_settings FORMAT TSV" +settings = { + "async_insert": 1, + "wait_for_async_insert": 1, + "async_insert_busy_timeout_ms": TIME_TO_WAIT_MS, +} + +all_data = [] + +for i in range(NUM_RUNS): + all_data.append( + client.query(gen_data_query.format(i * 10), settings={}, binary_result=True) + ) + +client.query("DROP TABLE IF EXISTS t_async_insert_user_settings") +client.query( + "CREATE TABLE t_async_insert_user_settings (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory" +) + +start_ms = time.time() * 1000.0 +for i in range(NUM_RUNS): + client.query_with_data(insert_query, all_data[i], settings=settings) +end_ms = time.time() * 1000.0 + +duration = end_ms - start_ms + +expected = (NUM_RUNS - 1) * TIME_TO_WAIT_MS +if duration >= expected: + print("Ok.") +else: + print(f"Fail. Duration: {duration}. Expected: {expected}") + +client.query("DROP TABLE IF EXISTS t_async_insert_user_settings") diff --git a/tests/queries/0_stateless/02451_async_insert_user_level_settings.reference b/tests/queries/0_stateless/02451_async_insert_user_level_settings.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/02451_async_insert_user_level_settings.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/02451_async_insert_user_level_settings.sh b/tests/queries/0_stateless/02451_async_insert_user_level_settings.sh new file mode 100755 index 00000000000..3d627e273b9 --- /dev/null +++ b/tests/queries/0_stateless/02451_async_insert_user_level_settings.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02451_async_insert_user_level_settings.python From 4d1d87da2c4370fbeafbc35407e3a6b56c637e51 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 20 Sep 2022 22:53:09 +0000 Subject: [PATCH 049/266] Delete garbage --- .../tests/gtest_async_inserts.cpp | 63 ------------------- 1 file changed, 63 deletions(-) delete mode 100644 src/Interpreters/tests/gtest_async_inserts.cpp diff --git a/src/Interpreters/tests/gtest_async_inserts.cpp b/src/Interpreters/tests/gtest_async_inserts.cpp deleted file mode 100644 index 0720592b8a7..00000000000 --- a/src/Interpreters/tests/gtest_async_inserts.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include - -#include -#include -#include "Processors/Executors/PullingPipelineExecutor.h" - -#include -#include -#include -#include -#include -#include -#include -#include "Common/Exception.h" - -using namespace DB; - -static SharedContextHolder shared_context; -static ContextMutablePtr context; - -static bool initialize() -{ - try - { - shared_context = Context::createShared(); - context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - context->setApplicationType(Context::ApplicationType::LOCAL); - - // registerFunctions(); - // registerAggregateFunctions(); - // registerTableFunctions(); - // registerStorages(); - // registerDictionaries(); - // registerDisks(); - // registerFormats(); - - return true; - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - throw; - } -} - -[[ maybe_unused ]] static bool initialized = initialize(); - -TEST(AsyncInsertQueue, SimpleTest) -{ - try - { - auto io = executeQuery("CREATE TABLE SimpleTest ENGINE=Memory()", context, true, QueryProcessingStage::Complete); - PullingPipelineExecutor executor(io.pipeline); - Block res; - while (!res && executor.pull(res)); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - -} From 3e6cc4421e782292812a688ebbce5960256d4e8d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 21 Sep 2022 17:27:22 +0000 Subject: [PATCH 050/266] Fix fast test --- .../queries/0_stateless/02117_show_create_table_system.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 02a0d339e3a..7653a27b34a 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -12,7 +12,6 @@ CREATE TABLE system.asynchronous_inserts `table` String, `format` String, `first_update` DateTime64(6), - `last_update` DateTime64(6), `total_bytes` UInt64, `entries.query_id` Array(String), `entries.bytes` Array(UInt64), From c99305126555919ba42aa760a10856ed6fc42217 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 26 Sep 2022 15:17:34 +0000 Subject: [PATCH 051/266] Fix deadlock and flaky test --- src/Interpreters/AsynchronousInsertQueue.cpp | 58 +++++++++++-------- src/Interpreters/AsynchronousInsertQueue.h | 8 ++- ...51_async_insert_user_level_settings.python | 11 +--- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 4681a9b6bd5..7f03bc25b62 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -138,11 +138,15 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); + shutdown = true; { std::lock_guard lock(deadline_mutex); - shutdown = true; are_tasks_available.notify_one(); } + { + std::lock_guard lock(cleanup_mutex); + cleanup_can_run.notify_one(); + } assert(dump_by_first_update_thread.joinable()); dump_by_first_update_thread.join(); @@ -285,35 +289,39 @@ void AsynchronousInsertQueue::busyCheck() { while (!shutdown) { - std::unique_lock lock(deadline_mutex); - are_tasks_available.wait_for(lock, Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [this]() + std::vector entries_to_flush; { + std::unique_lock deadline_lock(deadline_mutex); + are_tasks_available.wait_for(deadline_lock, Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [this]() + { + if (shutdown) + return true; + + if (!deadline_queue.empty() && deadline_queue.begin()->first < std::chrono::steady_clock::now()) + return true; + + return false; + }); + if (shutdown) - return true; + return; - if (!deadline_queue.empty() && deadline_queue.begin()->first < std::chrono::steady_clock::now()) - return true; + const auto now = std::chrono::steady_clock::now(); - return false; - }); + while (true) + { + if (deadline_queue.empty() || deadline_queue.begin()->first > now) + break; - if (shutdown) - return; + entries_to_flush.emplace_back(deadline_queue.begin()->second); + deadline_queue.erase(deadline_queue.begin()); + } + } - const auto now = std::chrono::steady_clock::now(); - - while (true) + std::shared_lock read_lock(rwlock); + for (auto & entry : entries_to_flush) { - if (deadline_queue.empty() || deadline_queue.begin()->first > now) - break; - - - std::shared_lock read_lock(rwlock); - auto main_queue_it = deadline_queue.begin()->second; - auto & [key, elem] = *main_queue_it; - - deadline_queue.erase(deadline_queue.begin()); - + auto & [key, elem] = *entry; std::lock_guard data_lock(elem->mutex); if (!elem->data) continue; @@ -328,8 +336,8 @@ void AsynchronousInsertQueue::cleanup() while (true) { { - std::unique_lock shutdown_lock(shutdown_mutex); - shutdown_cv.wait_for(shutdown_lock, Milliseconds(cleanup_timeout), [this]() { return shutdown; }); + std::unique_lock cleanup_lock(cleanup_mutex); + cleanup_can_run.wait_for(cleanup_lock, Milliseconds(cleanup_timeout), [this]() -> bool { return shutdown; }); if (shutdown) return; diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index aa5f02203fa..93483301ee6 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -5,6 +5,7 @@ #include #include +#include #include @@ -96,8 +97,9 @@ private: mutable std::shared_mutex rwlock; Queue queue; - mutable std::mutex shutdown_mutex; - mutable std::condition_variable shutdown_cv; + /// This is needed only for using inside cleanup() function and correct signaling about shutdown + mutable std::mutex cleanup_mutex; + mutable std::condition_variable cleanup_can_run; mutable std::mutex deadline_mutex; mutable std::condition_variable are_tasks_available; @@ -118,7 +120,7 @@ private: const Milliseconds cleanup_timeout; - bool shutdown{false}; + std::atomic shutdown{false}; ThreadPool pool; /// dump the data only inside this pool. ThreadFromGlobalPool dump_by_first_update_thread; /// uses busy_timeout and busyCheck() diff --git a/tests/queries/0_stateless/02451_async_insert_user_level_settings.python b/tests/queries/0_stateless/02451_async_insert_user_level_settings.python index 9fe6142edc6..8c75f4898c4 100644 --- a/tests/queries/0_stateless/02451_async_insert_user_level_settings.python +++ b/tests/queries/0_stateless/02451_async_insert_user_level_settings.python @@ -13,7 +13,7 @@ from pure_http_client import ClickHouseClient client = ClickHouseClient() -NUM_RUNS = 20 +NUM_RUNS = 5 TIME_TO_WAIT_MS = 500 # The purpose of this test is to check that AsyncInsertQueue @@ -27,7 +27,7 @@ TIME_TO_WAIT_MS = 500 # and this time maybe different depending on the build type (release or with sanitizer) gen_data_query = "SELECT number + {} AS id, toString(id) AS s, range(id) AS arr FROM numbers(10) FORMAT TSV" -insert_query = "INSERT INTO t_async_insert_user_settings FORMAT TSV" +insert_query = "INSERT INTO t_async_insert_user_settings VALUES ({}, '{}', [{}])" settings = { "async_insert": 1, "wait_for_async_insert": 1, @@ -36,11 +36,6 @@ settings = { all_data = [] -for i in range(NUM_RUNS): - all_data.append( - client.query(gen_data_query.format(i * 10), settings={}, binary_result=True) - ) - client.query("DROP TABLE IF EXISTS t_async_insert_user_settings") client.query( "CREATE TABLE t_async_insert_user_settings (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory" @@ -48,7 +43,7 @@ client.query( start_ms = time.time() * 1000.0 for i in range(NUM_RUNS): - client.query_with_data(insert_query, all_data[i], settings=settings) + client.query(query = insert_query.format(i,i,i), settings=settings) end_ms = time.time() * 1000.0 duration = end_ms - start_ms From d497b72f80674db2943d2afc22fb664691f81584 Mon Sep 17 00:00:00 2001 From: zhongyuankai <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 27 Sep 2022 11:00:38 +0800 Subject: [PATCH 052/266] Update DirectoryMonitor.cpp --- src/Storages/Distributed/DirectoryMonitor.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index e8d48431a9e..9dc3d773e01 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -819,10 +819,18 @@ struct StorageDistributedDirectoryMonitor::Batch } else { - std::vector files(file_index_to_path.size()); + std::vector files; for (const auto && file_info : file_index_to_path | boost::adaptors::indexed()) - files[file_info.index()] = file_info.value().second; - e.addMessage(fmt::format("While sending batch {}", fmt::join(files, "\n"))); + { + if (file_info.index() > 8) + { + files.push_back("..."); + break; + } + + files.push_back(file_info.value().second); + } + e.addMessage(fmt::format("While sending batch, nums: {}, files: {}", file_index_to_path.size(), fmt::join(files, "\n"))); throw; } From c39c138416e5a89e6bc878b2ce965e505fe9d7ef Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 27 Sep 2022 22:00:09 +0800 Subject: [PATCH 053/266] Better index analysis with NULL literal --- src/Storages/MergeTree/KeyCondition.cpp | 14 +++++++++++++ ...index_analysis_with_null_literal.reference | 2 ++ ...02428_index_analysis_with_null_literal.sql | 21 +++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference create mode 100644 tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 9f5f2873b98..24b3a4a60b9 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1640,6 +1640,13 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl } else if (func.getArgumentAt(1).tryGetConstant(block_with_constants, const_value, const_type)) { + /// If the const operand is null, the atom will be always false + if (const_value.isNull()) + { + out.function = RPNElement::ALWAYS_FALSE; + return true; + } + if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain)) { key_arg_pos = 0; @@ -1663,6 +1670,13 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl } else if (func.getArgumentAt(0).tryGetConstant(block_with_constants, const_value, const_type)) { + /// If the const operand is null, the atom will be always false + if (const_value.isNull()) + { + out.function = RPNElement::ALWAYS_FALSE; + return true; + } + if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), context, key_column_num, key_expr_type, chain)) { key_arg_pos = 1; diff --git a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql new file mode 100644 index 00000000000..33b0ea4b818 --- /dev/null +++ b/tests/queries/0_stateless/02428_index_analysis_with_null_literal.sql @@ -0,0 +1,21 @@ +-- From https://github.com/ClickHouse/ClickHouse/issues/41814 +drop table if exists test; + +create table test(a UInt64, m UInt64, d DateTime) engine MergeTree partition by toYYYYMM(d) order by (a, m, d); + +insert into test select number, number, '2022-01-01 00:00:00' from numbers(1000000); + +select count() from test where a = (select toUInt64(1) where 1 = 2) settings enable_early_constant_folding = 0, force_primary_key = 1; + +drop table test; + +-- From https://github.com/ClickHouse/ClickHouse/issues/34063 +drop table if exists test_null_filter; + +create table test_null_filter(key UInt64, value UInt32) engine MergeTree order by key; + +insert into test_null_filter select number, number from numbers(10000000); + +select count() from test_null_filter where key = null and value > 0 settings force_primary_key = 1; + +drop table test_null_filter; From bb755d13345ddde95e5c7076ef86a85c3af6f891 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 28 Sep 2022 15:50:13 +0800 Subject: [PATCH 054/266] Fix wrong test result --- tests/queries/0_stateless/01710_projections.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/01710_projections.reference b/tests/queries/0_stateless/01710_projections.reference index 1e4f659c639..578f7523830 100644 --- a/tests/queries/0_stateless/01710_projections.reference +++ b/tests/queries/0_stateless/01710_projections.reference @@ -3,5 +3,4 @@ 2020-10-24 00:00:00 1.3619605237696326 0.16794469697335793 0.7637956767025532 0.8899329799574005 0.6227685185389797 0.30795997278638165 0.7637956767025532 2020-10-24 00:00:00 19 -1.9455094931672063 0.7759802460082872 0.6 0 2020-10-24 00:00:00 852 894 -2 -1 999 From 1885bb05242af0b742e46f0c221648920a9afe3c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 08:11:09 +0000 Subject: [PATCH 055/266] Make comment consistent accross generated files --- programs/config_tools.h.in | 4 ++-- src/Common/config.h.in | 4 ++-- src/Common/config_version.h.in | 4 ++-- src/Core/config_core.h.in | 4 ++-- src/Daemon/GitHash.cpp.in | 2 +- src/Formats/config_formats.h.in | 4 ++-- src/Functions/config_functions.h.in | 4 ++-- src/Storages/System/StorageSystemBuildOptions.cpp.in | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index f1787801dc4..30444e8c84e 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake ! +#pragma once #cmakedefine01 ENABLE_CLICKHOUSE_SERVER #cmakedefine01 ENABLE_CLICKHOUSE_CLIENT diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 27db791b954..160937d36fa 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake! +#pragma once #cmakedefine01 USE_CPUID #cmakedefine01 USE_BASE64 diff --git a/src/Common/config_version.h.in b/src/Common/config_version.h.in index 3b0700b8a8a..7c768bbfad9 100644 --- a/src/Common/config_version.h.in +++ b/src/Common/config_version.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake! +#pragma once // NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, // only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 46c77593d4e..0624301d002 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake! +#pragma once #cmakedefine01 USE_ICU #cmakedefine01 USE_MYSQL diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in index 4a2da793fc2..7e33682d670 100644 --- a/src/Daemon/GitHash.cpp.in +++ b/src/Daemon/GitHash.cpp.in @@ -1,4 +1,4 @@ -// File was generated by CMake +/// This file was autogenerated by CMake #include diff --git a/src/Formats/config_formats.h.in b/src/Formats/config_formats.h.in index 427abc7d1ce..a0c14981eb7 100644 --- a/src/Formats/config_formats.h.in +++ b/src/Formats/config_formats.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake! +#pragma once #cmakedefine01 USE_AVRO #cmakedefine01 USE_CAPNP diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in index 86535d65069..fc59968a05e 100644 --- a/src/Functions/config_functions.h.in +++ b/src/Functions/config_functions.h.in @@ -1,6 +1,6 @@ -#pragma once +/// This file was autogenerated by CMake -// .h autogenerated by cmake! +#pragma once #cmakedefine01 USE_BASE64 #cmakedefine01 USE_SIMDJSON diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 117d97d2cfd..47a448900a4 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -1,4 +1,4 @@ -// File was generated by CMake +/// This file was autogenerated by CMake const char * auto_config_build[] { From 0c095b30b26a89d58eac80175e6022bae7598ea8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 08:12:15 +0000 Subject: [PATCH 056/266] Remove unused file --- src/Functions/URL/config_functions_url.h.in | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 src/Functions/URL/config_functions_url.h.in diff --git a/src/Functions/URL/config_functions_url.h.in b/src/Functions/URL/config_functions_url.h.in deleted file mode 100644 index 44cd73d1ed0..00000000000 --- a/src/Functions/URL/config_functions_url.h.in +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -// .h autogenerated by cmake! From 06507c40de7ddfb209752a0887c1c3c4b2e3687e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 08:28:47 +0000 Subject: [PATCH 057/266] ${ConfigIncludePath} --> ${CONFIG_INCLUDE_PATH} --- CMakeLists.txt | 4 ++-- programs/CMakeLists.txt | 2 +- src/Formats/CMakeLists.txt | 2 +- src/Functions/CMakeLists.txt | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c737046a5f6..c7274a35c9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -567,8 +567,8 @@ function (add_native_target) set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV}) endfunction (add_native_target) -set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") -include_directories(${ConfigIncludePath}) +set(CONFIG_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") +include_directories(${CONFIG_INCLUDE_PATH}) # Add as many warnings as possible for our own code. include (cmake/warnings.cmake) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 9cf7cb2b624..ef44ecd5c04 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -194,7 +194,7 @@ else() message(STATUS "ClickHouse su: OFF") endif() -configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h) +configure_file (config_tools.h.in ${CONFIG_INCLUDE_PATH}/config_tools.h) macro(clickhouse_target_link_split_lib target name) if(NOT CLICKHOUSE_ONE_SHARED) diff --git a/src/Formats/CMakeLists.txt b/src/Formats/CMakeLists.txt index 44883c271f4..ace9e37d76b 100644 --- a/src/Formats/CMakeLists.txt +++ b/src/Formats/CMakeLists.txt @@ -1,2 +1,2 @@ include(configure_config.cmake) -configure_file(config_formats.h.in ${ConfigIncludePath}/config_formats.h) +configure_file(config_formats.h.in ${CONFIG_INCLUDE_PATH}/config_formats.h) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 0387cc86d48..f89c87d6e90 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -1,5 +1,5 @@ include(configure_config.cmake) -configure_file(config_functions.h.in ${ConfigIncludePath}/config_functions.h) +configure_file(config_functions.h.in ${CONFIG_INCLUDE_PATH}/config_functions.h) add_subdirectory(divide) From 170ca264357ce290a982e13fd166fd2e10472217 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 28 Sep 2022 17:55:20 +0800 Subject: [PATCH 058/266] add document --- .../functions/encryption-functions.md | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 642c8643c16..a7e82e94d2c 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -294,6 +294,74 @@ Result: Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption. +## tryDecrypt + +Similar to `decrypt`, but returns NULL if decryption fail because of using wrong key. + +**Examples** + +Let's create a table with `user_id` is unique user id, `encrypted` is an encrypted string field, `iv` is intitial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field: + +```sql +CREATE TABLE decrypt_null ( + dt DateTime, + user_id UInt32, + encrypted String, + iv String +) ENGINE = Memory; +``` + +Insert some data: + +```sql +INSERT INTO decrypt_null VALUES + ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), + ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), + ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3'); +``` + +Query with `decrypt`: + +```sql +SELECT + dt, + user_id, + decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value +FROM decrypt_null +ORDER BY user_id ASC +``` + +Result: + +``` +0 rows in set. Elapsed: 0.329 sec. + +Received exception from server (version 22.10.1): +Code: 454. DB::Exception: Received from localhost:24071. DB::Exception: Failed to decrypt. OpenSSL error code: 0: while executing 'FUNCTION decrypt('aes-256-gcm' :: 4, encrypted :: 2, 'keykeykeykeykeykeykeykeykeykey02' :: 5, iv :: 3) -> decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) String : 6'. (OPENSSL_ERROR) + +``` + +Query with `tryDecrypt`: + +```sql +SELECT + dt, + user_id, + tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value +FROM decrypt_null +ORDER BY user_id ASC +``` + +Result: + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€dtโ”€โ”ฌโ”€user_idโ”€โ”ฌโ”€valueโ”€โ”€โ” +โ”‚ 2022-08-02 00:00:00 โ”‚ 1 โ”‚ แดบแตแดธแดธ โ”‚ +โ”‚ 2022-09-02 00:00:00 โ”‚ 2 โ”‚ value2 โ”‚ +โ”‚ 2022-09-02 00:00:01 โ”‚ 3 โ”‚ แดบแตแดธแดธ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + ## aes_decrypt_mysql Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function. From 2036641ea856859a6ccd07d1f9b5f8f760c92121 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Sep 2022 13:42:46 +0200 Subject: [PATCH 059/266] Fix 02267_file_globs_schema_inference.sql flakiness --- tests/queries/0_stateless/02267_file_globs_schema_inference.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index 6862d6f0602..b51c0cf6fa1 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -5,7 +5,7 @@ insert into function file('02267_data3.jsonl') select * from numbers(0); insert into function file('02267_data4.jsonl') select 1 as x; select * from file('02267_data*.jsonl') order by x; -insert into function file('02267_data1.jsonl', 'TSV') select 1 as x; +insert into function file('02267_data4.jsonl', 'TSV') select 1 as x; insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} From 6fd234d6520eec4aa1a7f73066cd7931970fbc80 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 28 Sep 2022 20:06:18 +0800 Subject: [PATCH 060/266] fix document typo and format --- .../functions/encryption-functions.md | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index a7e82e94d2c..1c123aa3db2 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -296,11 +296,11 @@ Notice how only a portion of the data was properly decrypted, and the rest is gi ## tryDecrypt -Similar to `decrypt`, but returns NULL if decryption fail because of using wrong key. +Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key. **Examples** -Let's create a table with `user_id` is unique user id, `encrypted` is an encrypted string field, `iv` is intitial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field: +Let's create a table where `user_id` is the unique user id, `encrypted` is an encrypted string field, `iv` is an initial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field: ```sql CREATE TABLE decrypt_null ( @@ -320,28 +320,7 @@ INSERT INTO decrypt_null VALUES ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3'); ``` -Query with `decrypt`: - -```sql -SELECT - dt, - user_id, - decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value -FROM decrypt_null -ORDER BY user_id ASC -``` - -Result: - -``` -0 rows in set. Elapsed: 0.329 sec. - -Received exception from server (version 22.10.1): -Code: 454. DB::Exception: Received from localhost:24071. DB::Exception: Failed to decrypt. OpenSSL error code: 0: while executing 'FUNCTION decrypt('aes-256-gcm' :: 4, encrypted :: 2, 'keykeykeykeykeykeykeykeykeykey02' :: 5, iv :: 3) -> decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) String : 6'. (OPENSSL_ERROR) - -``` - -Query with `tryDecrypt`: +Query: ```sql SELECT From bfddb91c9a1e008c519ebeba644e4c663287aa45 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Sep 2022 14:15:53 +0200 Subject: [PATCH 061/266] Update docs/en/interfaces/formats.md Co-authored-by: Sergei Trifonov --- docs/en/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index c6064cbcf01..1086784b3f8 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1020,7 +1020,7 @@ Example: } ``` -To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). +To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object. Examples: For output: From 78fc36ca498f37bb2199512f803852c66af6e4c1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 08:45:15 +0000 Subject: [PATCH 062/266] Generate config.h into ${CONFIG_INCLUDE_PATH} This makes the target location consistent with other auto-generated files like config_formats.h, config_core.h, and config_functions.h and simplifies the build of clickhouse_common. --- programs/odbc-bridge/ColumnInfoHandler.h | 2 +- programs/odbc-bridge/IdentifierQuoteHandler.h | 2 +- programs/odbc-bridge/MainHandler.cpp | 2 +- programs/odbc-bridge/ODBCHandlerFactory.cpp | 2 +- programs/odbc-bridge/SchemaAllowedHandler.h | 2 +- programs/odbc-bridge/getIdentifierQuote.h | 2 +- src/AggregateFunctions/AggregateFunctionAvg.h | 2 +- src/AggregateFunctions/AggregateFunctionBitwise.h | 2 +- src/AggregateFunctions/AggregateFunctionCount.h | 2 +- src/AggregateFunctions/AggregateFunctionIf.h | 2 +- src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 2 +- src/AggregateFunctions/AggregateFunctionNull.h | 2 +- src/AggregateFunctions/AggregateFunctionSum.h | 2 +- src/AggregateFunctions/ThetaSketchData.h | 2 +- src/Bridge/IBridge.cpp | 2 +- src/BridgeHelper/XDBCBridgeHelper.h | 2 +- src/CMakeLists.txt | 5 ++--- src/Client/ClientBase.cpp | 2 +- src/Client/ClientBaseHelpers.h | 2 +- src/Client/Connection.cpp | 2 +- src/Client/Connection.h | 2 +- src/Common/Config/ConfigProcessor.cpp | 2 +- src/Common/Config/ConfigProcessor.h | 2 +- src/Common/Config/YAMLParser.cpp | 2 +- src/Common/Config/YAMLParser.h | 2 +- src/Common/OpenSSLHelpers.cpp | 2 +- src/Common/OpenSSLHelpers.h | 2 +- src/Common/OptimizedRegularExpression.h | 3 +-- src/Common/QueryProfiler.h | 2 +- src/Common/StackTrace.cpp | 2 +- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- src/Common/getNumberOfPhysicalCPUCores.cpp | 2 +- src/Common/memory.h | 2 +- src/Common/new_delete.cpp | 2 +- src/Common/tests/gtest_yaml_parser.cpp | 2 +- src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/Coordination/KeeperDispatcher.h | 2 +- src/Daemon/SentryWriter.cpp | 2 +- src/Dictionaries/CassandraHelpers.h | 2 +- src/Dictionaries/CassandraSource.cpp | 2 +- src/Dictionaries/XDBCDictionarySource.cpp | 2 +- src/Disks/DiskEncrypted.h | 2 +- src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h | 2 +- src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp | 2 +- src/Disks/IO/ReadBufferFromAzureBlobStorage.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- src/Disks/IO/ReadIndirectBufferFromRemoteFS.h | 2 +- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 2 +- src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp | 2 +- src/Disks/IO/WriteBufferFromAzureBlobStorage.h | 2 +- src/Disks/IO/WriteIndirectBufferFromRemoteFS.h | 2 +- .../ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h | 2 +- .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 2 +- .../AzureBlobStorage/registerDiskAzureBlobStorage.cpp | 2 +- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h | 2 +- src/Disks/ObjectStorages/LocalObjectStorage.h | 2 +- src/Disks/ObjectStorages/S3/ProxyConfiguration.h | 2 +- src/Disks/ObjectStorages/S3/ProxyListConfiguration.h | 2 +- src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 2 +- src/Disks/ObjectStorages/S3/diskSettings.h | 2 +- src/Disks/ObjectStorages/S3/parseConfig.h | 2 +- src/Disks/ObjectStorages/S3/registerDiskS3.cpp | 2 +- src/Disks/ObjectStorages/Web/WebObjectStorage.h | 2 +- src/Disks/registerDisks.cpp | 2 +- src/Formats/registerFormats.cpp | 2 +- src/Functions/DivisionUtils.h | 2 +- src/Functions/FunctionHashID.h | 2 +- src/Functions/FunctionIfBase.h | 2 +- src/Functions/FunctionShowCertificate.h | 2 +- src/Functions/FunctionUnaryArithmetic.h | 2 +- src/Functions/FunctionsAES.h | 2 +- src/Functions/FunctionsEmbeddedDictionaries.h | 2 +- src/Functions/IFunction.cpp | 2 +- src/Functions/MatchImpl.h | 2 +- src/Functions/MultiMatchAllIndicesImpl.h | 2 +- src/Functions/MultiMatchAnyImpl.h | 2 +- src/Functions/Regexps.h | 2 +- src/Functions/ReplaceRegexpImpl.h | 2 +- src/Functions/UniqTheta/FunctionsUniqTheta.h | 2 +- src/Functions/aes_decrypt_mysql.cpp | 2 +- src/Functions/aes_encrypt_mysql.cpp | 2 +- src/Functions/decrypt.cpp | 2 +- src/Functions/encrypt.cpp | 2 +- src/IO/Archives/ZipArchiveReader.h | 2 +- src/IO/Archives/ZipArchiveWriter.h | 2 +- src/IO/BrotliReadBuffer.cpp | 2 +- src/IO/BrotliWriteBuffer.cpp | 2 +- src/IO/Bzip2ReadBuffer.cpp | 2 +- src/IO/Bzip2WriteBuffer.cpp | 2 +- src/IO/CompressionMethod.cpp | 2 +- src/IO/FileEncryptionCommon.h | 2 +- src/IO/HTTPCommon.cpp | 2 +- src/IO/HadoopSnappyReadBuffer.cpp | 2 +- src/IO/HadoopSnappyReadBuffer.h | 2 +- src/IO/ReadBufferFromEncryptedFile.h | 2 +- src/IO/ReadBufferFromS3.cpp | 2 +- src/IO/ReadBufferFromS3.h | 2 +- src/IO/ReadWriteBufferFromHTTP.h | 2 +- src/IO/S3/PocoHTTPClient.cpp | 2 +- src/IO/S3/PocoHTTPClient.h | 2 +- src/IO/S3/PocoHTTPClientFactory.cpp | 2 +- src/IO/S3/tests/gtest_aws_s3_client.cpp | 2 +- src/IO/S3Common.cpp | 2 +- src/IO/S3Common.h | 2 +- src/IO/SnappyReadBuffer.cpp | 2 +- src/IO/SnappyReadBuffer.h | 2 +- src/IO/SnappyWriteBuffer.cpp | 2 +- src/IO/SnappyWriteBuffer.h | 2 +- src/IO/UseSSL.cpp | 2 +- src/IO/WriteBufferFromEncryptedFile.h | 2 +- src/IO/WriteBufferFromS3.cpp | 2 +- src/IO/WriteBufferFromS3.h | 2 +- src/IO/tests/gtest_archive_reader_and_writer.cpp | 2 +- src/IO/tests/gtest_file_encryption.cpp | 2 +- src/IO/tests/gtest_s3_uri.cpp | 2 +- src/Parsers/Access/ParserCreateUserQuery.cpp | 2 +- src/Processors/Formats/Impl/HiveTextRowInputFormat.h | 2 +- src/Server/CertificateReloader.h | 2 +- src/Server/GRPCServer.h | 2 +- src/Server/HTTP/HTTPServerRequest.h | 2 +- src/Server/HTTPHandler.cpp | 2 +- src/Server/KeeperTCPHandler.h | 2 +- src/Server/MySQLHandler.h | 2 +- src/Server/MySQLHandlerFactory.h | 2 +- src/Server/PostgreSQLHandler.h | 2 +- src/Server/PostgreSQLHandlerFactory.h | 2 +- src/Server/ProtocolServerAdapter.h | 2 +- src/Storages/Cache/registerRemoteFileMetadatas.cpp | 2 +- src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h | 2 +- src/Storages/HDFS/HDFSCommon.h | 2 +- src/Storages/HDFS/ReadBufferFromHDFS.h | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 2 +- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/HDFS/StorageHDFSCluster.cpp | 2 +- src/Storages/HDFS/StorageHDFSCluster.h | 2 +- src/Storages/HDFS/WriteBufferFromHDFS.cpp | 2 +- src/Storages/HDFS/WriteBufferFromHDFS.h | 2 +- src/Storages/Hive/HiveCommon.h | 2 +- src/Storages/Hive/HiveFile.h | 2 +- src/Storages/Hive/HiveSettings.h | 2 +- src/Storages/Hive/StorageHive.h | 2 +- src/Storages/Hive/StorageHiveMetadata.h | 2 +- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/StorageMongoDBSocketFactory.cpp | 2 +- src/Storages/StorageS3.cpp | 2 +- src/Storages/StorageS3.h | 2 +- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageS3Cluster.h | 2 +- src/Storages/System/StorageSystemCertificates.cpp | 2 +- src/Storages/registerStorages.cpp | 2 +- src/TableFunctions/Hive/TableFunctionHive.h | 2 +- src/TableFunctions/ITableFunctionXDBC.h | 2 +- src/TableFunctions/TableFunctionHDFS.cpp | 2 +- src/TableFunctions/TableFunctionHDFS.h | 2 +- src/TableFunctions/TableFunctionHDFSCluster.cpp | 2 +- src/TableFunctions/TableFunctionHDFSCluster.h | 2 +- src/TableFunctions/TableFunctionS3.cpp | 2 +- src/TableFunctions/TableFunctionS3.h | 2 +- src/TableFunctions/TableFunctionS3Cluster.cpp | 2 +- src/TableFunctions/TableFunctionS3Cluster.h | 2 +- src/TableFunctions/registerTableFunctions.h | 2 +- 162 files changed, 163 insertions(+), 165 deletions(-) diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index 76c0103d604..3ba8b182ba6 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_ODBC diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index 23ffd84663b..d57bbc0ca8a 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -2,7 +2,7 @@ #include #include -#include +#include "config.h" #include #if USE_ODBC diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index fe22d8facfd..0875cc2e9d9 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include "config.h" #include #include diff --git a/programs/odbc-bridge/ODBCHandlerFactory.cpp b/programs/odbc-bridge/ODBCHandlerFactory.cpp index 2ae533431d3..dd21358df8c 100644 --- a/programs/odbc-bridge/ODBCHandlerFactory.cpp +++ b/programs/odbc-bridge/ODBCHandlerFactory.cpp @@ -1,7 +1,7 @@ #include "ODBCHandlerFactory.h" #include "PingHandler.h" #include "ColumnInfoHandler.h" -#include +#include "config.h" #include #include #include diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index 7afa77ca091..cb71a6fb5a2 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -2,7 +2,7 @@ #include #include -#include +#include "config.h" #include #if USE_ODBC diff --git a/programs/odbc-bridge/getIdentifierQuote.h b/programs/odbc-bridge/getIdentifierQuote.h index 53ee1afd720..703586cd08e 100644 --- a/programs/odbc-bridge/getIdentifierQuote.h +++ b/programs/odbc-bridge/getIdentifierQuote.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_ODBC diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index c41a51997df..b80024b1efd 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -11,7 +11,7 @@ #include #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index a251d4d9f5f..2b46f86bf30 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -9,7 +9,7 @@ #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index a58eecf5aca..3e53190ae8c 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -12,7 +12,7 @@ #include #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index 6b0905d6d5e..79312dff2c7 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -5,7 +5,7 @@ #include #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index ad633418ec3..22011a6f757 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -14,7 +14,7 @@ #include #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index c26f4b03b14..7fda8df8a00 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -10,7 +10,7 @@ #include #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 0f2357d64a8..8e24b288fff 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -13,7 +13,7 @@ #include -#include +#include "config.h" #include #if USE_EMBEDDED_COMPILER diff --git a/src/AggregateFunctions/ThetaSketchData.h b/src/AggregateFunctions/ThetaSketchData.h index cd17719a45a..f32386d945b 100644 --- a/src/AggregateFunctions/ThetaSketchData.h +++ b/src/AggregateFunctions/ThetaSketchData.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_DATASKETCHES diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index 824ab23a882..04d904d0a00 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include "config.h" #include #include #include diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h index f2cd76d5d81..b62cb277ecb 100644 --- a/src/BridgeHelper/XDBCBridgeHelper.h +++ b/src/BridgeHelper/XDBCBridgeHelper.h @@ -16,7 +16,7 @@ #include #include -#include +#include "config.h" namespace DB diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 07d34faf112..3518ce0be6b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,12 +19,11 @@ endif() include(../cmake/limit_jobs.cmake) set (CONFIG_VERSION "${CMAKE_CURRENT_BINARY_DIR}/Common/config_version.h") -set (CONFIG_COMMON "${CMAKE_CURRENT_BINARY_DIR}/Common/config.h") include (../cmake/version.cmake) message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}") include (configure_config.cmake) -configure_file (Common/config.h.in ${CONFIG_COMMON}) +configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h) configure_file (Common/config_version.h.in ${CONFIG_VERSION}) configure_file (Core/config_core.h.in "${CMAKE_CURRENT_BINARY_DIR}/Core/include/config_core.h") @@ -153,7 +152,7 @@ else() endif () list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) -list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON}) +list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION}) list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/FunctionsLogical.cpp Functions/indexHint.cpp) list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/FunctionsLogical.h Functions/indexHint.h) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f407fab68f1..f4bea93f458 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include "config.h" #include #include #include diff --git a/src/Client/ClientBaseHelpers.h b/src/Client/ClientBaseHelpers.h index 3fb2863082a..2a79332eb98 100644 --- a/src/Client/ClientBaseHelpers.h +++ b/src/Client/ClientBaseHelpers.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include "config.h" #if USE_REPLXX # include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index a9795e75b28..f4190ef8f01 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -34,7 +34,7 @@ #include #include -#include +#include "config.h" #if USE_SSL # include diff --git a/src/Client/Connection.h b/src/Client/Connection.h index a92f78fad46..0b6fc605894 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -4,7 +4,7 @@ #include -#include +#include "config.h" #include #include diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index d3462321e83..41535fad8f7 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include "ConfigProcessor.h" #include "YAMLParser.h" diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index ac05516e951..aa8ac71446f 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include diff --git a/src/Common/Config/YAMLParser.cpp b/src/Common/Config/YAMLParser.cpp index a34b539ee81..d1cda7185dd 100644 --- a/src/Common/Config/YAMLParser.cpp +++ b/src/Common/Config/YAMLParser.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_YAML_CPP #include "YAMLParser.h" diff --git a/src/Common/Config/YAMLParser.h b/src/Common/Config/YAMLParser.h index 5294bcc4782..b986fc2d895 100644 --- a/src/Common/Config/YAMLParser.h +++ b/src/Common/Config/YAMLParser.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include diff --git a/src/Common/OpenSSLHelpers.cpp b/src/Common/OpenSSLHelpers.cpp index 4e7848afc85..b72261090a5 100644 --- a/src/Common/OpenSSLHelpers.cpp +++ b/src/Common/OpenSSLHelpers.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SSL #include "OpenSSLHelpers.h" diff --git a/src/Common/OpenSSLHelpers.h b/src/Common/OpenSSLHelpers.h index 41f092f0109..b7438d4be3f 100644 --- a/src/Common/OpenSSLHelpers.h +++ b/src/Common/OpenSSLHelpers.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SSL # include diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index dad8706a50d..0c6e32cb383 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -5,9 +5,8 @@ #include #include #include +#include "config.h" #include - -#include #include diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index e3938cbc5d6..fb2f470b6d6 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -5,7 +5,7 @@ #include #include -#include +#include "config.h" namespace Poco diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 37ce3a03cd8..cab673c9ab2 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include "config.h" #if USE_UNWIND # include diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0647bf0f069..9a909a88535 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include "config.h" #if USE_SSL # include diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 88542b3e705..7bb68b324b2 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -1,6 +1,6 @@ #include "getNumberOfPhysicalCPUCores.h" -#include +#include "config.h" #if defined(OS_LINUX) # include # include diff --git a/src/Common/memory.h b/src/Common/memory.h index 8a6b98933fb..4cb1c535e56 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -7,7 +7,7 @@ #include #include -#include +#include "config.h" #if USE_JEMALLOC # include diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index 7b4bff04185..871ab750907 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include "config.h" #include #if defined(OS_DARWIN) && (USE_JEMALLOC) diff --git a/src/Common/tests/gtest_yaml_parser.cpp b/src/Common/tests/gtest_yaml_parser.cpp index 4ffd66ae3a1..163625c4c98 100644 --- a/src/Common/tests/gtest_yaml_parser.cpp +++ b/src/Common/tests/gtest_yaml_parser.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_YAML_CPP #include "gtest_helper_functions.h" diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index bf36fa114fb..965bed8e755 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -1,5 +1,5 @@ #include -#include +#include "config.h" #include #include #include diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 5e2701299f4..0b443d64786 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include "config_core.h" #if USE_NURAFT diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 51794bfdc37..928a06cbee5 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -16,7 +16,7 @@ #include #include "Common/config_version.h" -#include +#include "config.h" #if USE_SENTRY && !defined(KEEPER_STANDALONE_BUILD) diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 3b90d46acdf..542cce5c939 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_CASSANDRA #include diff --git a/src/Dictionaries/CassandraSource.cpp b/src/Dictionaries/CassandraSource.cpp index f5277e77eba..fd5982443fa 100644 --- a/src/Dictionaries/CassandraSource.cpp +++ b/src/Dictionaries/CassandraSource.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_CASSANDRA diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 0a097c4faef..dec4feb5ced 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include "config.h" namespace DB diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index d2795e01086..02b4104f36a 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SSL #include diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index cf7feb416b2..1ce5010af4c 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include #include diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 96ae50bbbcf..42b73b56147 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h index c7bab346618..711b4ce23f7 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 4ed61501281..b6c626c75e6 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include #include diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index 996e69296a6..35214915cd2 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 561acc00f6f..d5d78b2a324 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include "config.h" #include #include diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 5c4debd56b6..4f1ab8de7a4 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 8bfd23a6379..24019f94e27 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h index 38a1872bb45..f06ebe89589 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h index 6ebe169af50..18e8bf159d5 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 4fc434e691b..47ac0d6badd 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,5 +1,5 @@ #pragma once -#include +#include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index e6615316391..6a12d8ef2e8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index bbf2f593a68..82cddfb9122 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -1,5 +1,5 @@ #pragma once -#include +#include "config.h" #if USE_HDFS diff --git a/src/Disks/ObjectStorages/LocalObjectStorage.h b/src/Disks/ObjectStorages/LocalObjectStorage.h index de38581e7bb..644c5249d8f 100644 --- a/src/Disks/ObjectStorages/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/LocalObjectStorage.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include diff --git a/src/Disks/ObjectStorages/S3/ProxyConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyConfiguration.h index 97535ad7729..fd2761c2cba 100644 --- a/src/Disks/ObjectStorages/S3/ProxyConfiguration.h +++ b/src/Disks/ObjectStorages/S3/ProxyConfiguration.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h index 2ef1b5bbb16..14fac8baff5 100644 --- a/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h +++ b/src/Disks/ObjectStorages/S3/ProxyListConfiguration.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h index 58eda8a9a06..d6d7456a6ac 100644 --- a/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h +++ b/src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index ecbd8cc9aa1..ce5235d4323 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index 9092f5e712d..05ba8819f83 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Disks/ObjectStorages/S3/parseConfig.h b/src/Disks/ObjectStorages/S3/parseConfig.h index 97fafd620f9..1defc673c2e 100644 --- a/src/Disks/ObjectStorages/S3/parseConfig.h +++ b/src/Disks/ObjectStorages/S3/parseConfig.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 12e4df42863..62cacde3f14 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include #include diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h index a0e10cd25a3..2fda5e576aa 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index b43efbba78e..54ad74d47b5 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -2,7 +2,7 @@ #include "DiskFactory.h" -#include +#include "config.h" namespace DB { diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index bea63f830ef..593e4568be1 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index c246f7fd31a..b4809580f5d 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -7,7 +7,7 @@ #include #include "config_core.h" -#include +#include "config.h" namespace DB diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h index e469381a784..7e063be6626 100644 --- a/src/Functions/FunctionHashID.h +++ b/src/Functions/FunctionHashID.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include diff --git a/src/Functions/FunctionIfBase.h b/src/Functions/FunctionIfBase.h index d0e69ca1ebc..2b0f8289914 100644 --- a/src/Functions/FunctionIfBase.h +++ b/src/Functions/FunctionIfBase.h @@ -3,7 +3,7 @@ #include #include -#include +#include "config.h" namespace DB { diff --git a/src/Functions/FunctionShowCertificate.h b/src/Functions/FunctionShowCertificate.h index 0724158f66b..32041e55b00 100644 --- a/src/Functions/FunctionShowCertificate.h +++ b/src/Functions/FunctionShowCertificate.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 445eb45fd9d..e4605e5e214 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -12,7 +12,7 @@ #include #include -#include +#include "config.h" #include #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 0d8e5a5546a..6b573dad2b0 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SSL #include diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index aa2144d271f..af71ba45955 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -18,7 +18,7 @@ #include #include -#include +#include "config.h" namespace DB diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index c1c6606e40f..105b30594a2 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -17,7 +17,7 @@ #include #include -#include +#include "config.h" #if USE_EMBEDDED_COMPILER # pragma GCC diagnostic push diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index d5465dc3498..874f4d3c5b8 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -8,7 +8,7 @@ #include "Regexps.h" #include "config_functions.h" -#include +#include "config.h" #include diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index e19d1691c6a..79f4d564e42 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -9,7 +9,7 @@ #include "Regexps.h" #include "config_functions.h" -#include +#include "config.h" #if USE_VECTORSCAN # include diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index a5d5a354290..508e568d232 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -8,7 +8,7 @@ #include "Regexps.h" #include "config_functions.h" -#include +#include "config.h" #if USE_VECTORSCAN # include diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 08c819fae99..08177c0259e 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include "config.h" #include #include #include diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 549edf70dff..b1d04d6e9e2 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -6,7 +6,7 @@ #include #include "config_functions.h" -#include +#include "config.h" #include diff --git a/src/Functions/UniqTheta/FunctionsUniqTheta.h b/src/Functions/UniqTheta/FunctionsUniqTheta.h index 7cdbf587cf7..331ca92e3f8 100644 --- a/src/Functions/UniqTheta/FunctionsUniqTheta.h +++ b/src/Functions/UniqTheta/FunctionsUniqTheta.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_DATASKETCHES diff --git a/src/Functions/aes_decrypt_mysql.cpp b/src/Functions/aes_decrypt_mysql.cpp index 8ba7eaa8e80..513ce735164 100644 --- a/src/Functions/aes_decrypt_mysql.cpp +++ b/src/Functions/aes_decrypt_mysql.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SSL diff --git a/src/Functions/aes_encrypt_mysql.cpp b/src/Functions/aes_encrypt_mysql.cpp index 24d618b5a86..0dcb4108770 100644 --- a/src/Functions/aes_encrypt_mysql.cpp +++ b/src/Functions/aes_encrypt_mysql.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SSL diff --git a/src/Functions/decrypt.cpp b/src/Functions/decrypt.cpp index da794116a41..6998609b41c 100644 --- a/src/Functions/decrypt.cpp +++ b/src/Functions/decrypt.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SSL diff --git a/src/Functions/encrypt.cpp b/src/Functions/encrypt.cpp index dc68b650d74..38feafbea19 100644 --- a/src/Functions/encrypt.cpp +++ b/src/Functions/encrypt.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SSL diff --git a/src/IO/Archives/ZipArchiveReader.h b/src/IO/Archives/ZipArchiveReader.h index 68a4e09a2f8..9d0da28b080 100644 --- a/src/IO/Archives/ZipArchiveReader.h +++ b/src/IO/Archives/ZipArchiveReader.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_MINIZIP #include diff --git a/src/IO/Archives/ZipArchiveWriter.h b/src/IO/Archives/ZipArchiveWriter.h index 3a22612d603..a54130556b3 100644 --- a/src/IO/Archives/ZipArchiveWriter.h +++ b/src/IO/Archives/ZipArchiveWriter.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_MINIZIP #include diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp index c93dfbb5cac..c16b7c2b397 100644 --- a/src/IO/BrotliReadBuffer.cpp +++ b/src/IO/BrotliReadBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_BROTLI # include diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp index 3e55c80222f..e764b58ccd4 100644 --- a/src/IO/BrotliWriteBuffer.cpp +++ b/src/IO/BrotliWriteBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_BROTLI # include diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp index 4bffdbe41dc..cafd4255ada 100644 --- a/src/IO/Bzip2ReadBuffer.cpp +++ b/src/IO/Bzip2ReadBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_BZIP2 # include diff --git a/src/IO/Bzip2WriteBuffer.cpp b/src/IO/Bzip2WriteBuffer.cpp index d8efa0ade1a..10a1803fec8 100644 --- a/src/IO/Bzip2WriteBuffer.cpp +++ b/src/IO/Bzip2WriteBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_BZIP2 # include diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index 83db5eeaeed..c40176f9700 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include "config.h" #include diff --git a/src/IO/FileEncryptionCommon.h b/src/IO/FileEncryptionCommon.h index 496c9e66b20..efc0194da52 100644 --- a/src/IO/FileEncryptionCommon.h +++ b/src/IO/FileEncryptionCommon.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SSL #include diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 8d2f7b4c39b..9fd48914f64 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -9,7 +9,7 @@ #include -#include +#include "config.h" #if USE_SSL # include diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp index 408e76e19be..de04fb39b13 100644 --- a/src/IO/HadoopSnappyReadBuffer.cpp +++ b/src/IO/HadoopSnappyReadBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SNAPPY #include diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index 4536039505a..e23971f75b3 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SNAPPY diff --git a/src/IO/ReadBufferFromEncryptedFile.h b/src/IO/ReadBufferFromEncryptedFile.h index 267477b3b98..3626daccb3e 100644 --- a/src/IO/ReadBufferFromEncryptedFile.h +++ b/src/IO/ReadBufferFromEncryptedFile.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SSL #include diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index c17bf731c62..c49941b025d 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include "IO/S3Common.h" #if USE_AWS_S3 diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index dbd6cf9667f..cc836bba495 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 60885da6da3..0b3794559e9 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include "config.h" #include #include diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 30373816eca..7d053bebe4a 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,5 +1,5 @@ #include "Common/DNSResolver.h" -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 9005f132974..57e4369e565 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp index b257f96e383..9dd52a263b0 100644 --- a/src/IO/S3/PocoHTTPClientFactory.cpp +++ b/src/IO/S3/PocoHTTPClientFactory.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 21d421bb4f6..9b2a65d84fc 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -1,6 +1,6 @@ #include -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 41d5d63a5e1..df19748b493 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index ce469dfb2b4..5c27b32985f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/SnappyReadBuffer.cpp b/src/IO/SnappyReadBuffer.cpp index c75aee9dc3a..dbdf32a6d07 100644 --- a/src/IO/SnappyReadBuffer.cpp +++ b/src/IO/SnappyReadBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SNAPPY #include diff --git a/src/IO/SnappyReadBuffer.h b/src/IO/SnappyReadBuffer.h index e440f2d3003..b1e2dfa876c 100644 --- a/src/IO/SnappyReadBuffer.h +++ b/src/IO/SnappyReadBuffer.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SNAPPY diff --git a/src/IO/SnappyWriteBuffer.cpp b/src/IO/SnappyWriteBuffer.cpp index 5f3b5df4c3c..ada9afebcf5 100644 --- a/src/IO/SnappyWriteBuffer.cpp +++ b/src/IO/SnappyWriteBuffer.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SNAPPY #include diff --git a/src/IO/SnappyWriteBuffer.h b/src/IO/SnappyWriteBuffer.h index 90fb8521c25..2ff86fb64ef 100644 --- a/src/IO/SnappyWriteBuffer.h +++ b/src/IO/SnappyWriteBuffer.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SNAPPY #include diff --git a/src/IO/UseSSL.cpp b/src/IO/UseSSL.cpp index 9ddeb9ccdb5..1e724e54958 100644 --- a/src/IO/UseSSL.cpp +++ b/src/IO/UseSSL.cpp @@ -1,6 +1,6 @@ #include "UseSSL.h" -#include +#include "config.h" #if USE_SSL # include diff --git a/src/IO/WriteBufferFromEncryptedFile.h b/src/IO/WriteBufferFromEncryptedFile.h index 5f5897cbda0..25dd54ca9d5 100644 --- a/src/IO/WriteBufferFromEncryptedFile.h +++ b/src/IO/WriteBufferFromEncryptedFile.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #if USE_SSL diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 3b9b042e2af..14118c3c04e 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include #if USE_AWS_S3 diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index b655fe1d14b..782e580d8be 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index e1864415e1b..3bc9d670f05 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -1,5 +1,5 @@ #include -#include +#include "config.h" #include #include diff --git a/src/IO/tests/gtest_file_encryption.cpp b/src/IO/tests/gtest_file_encryption.cpp index cae40afbb38..5353faa6086 100644 --- a/src/IO/tests/gtest_file_encryption.cpp +++ b/src/IO/tests/gtest_file_encryption.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_SSL #include diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 41ce102ca8a..161dc81266c 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -1,5 +1,5 @@ #include -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index a8dd6bec37a..9e32b3c4618 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include "config.h" #include #if USE_SSL # include diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 82d76d05673..61f5bf77b07 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HIVE #include diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index a42ba64667b..9f04179b8d6 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_SSL diff --git a/src/Server/GRPCServer.h b/src/Server/GRPCServer.h index e2b48f1c16b..359a2506e95 100644 --- a/src/Server/GRPCServer.h +++ b/src/Server/GRPCServer.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_GRPC #include diff --git a/src/Server/HTTP/HTTPServerRequest.h b/src/Server/HTTP/HTTPServerRequest.h index cfaeb108095..7ddbd296280 100644 --- a/src/Server/HTTP/HTTPServerRequest.h +++ b/src/Server/HTTP/HTTPServerRequest.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include "config.h" #include diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 8886a77c9b5..6b540c3484b 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -32,7 +32,7 @@ #include #include -#include +#include "config.h" #include #include diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index ee83c4fa21b..54978268c19 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include "config_core.h" #if USE_NURAFT diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 3af5f7a0eb2..2f43d471c40 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -9,7 +9,7 @@ #include #include "IServer.h" -#include +#include "config.h" #if USE_SSL # include diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index 25f1af85273..38caae922ee 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -5,7 +5,7 @@ #include #include -#include +#include "config.h" #if USE_SSL # include diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index db19d3ae0bd..6fc128e3883 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include "config.h" #include #include #include diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index e9241da6f0e..35046325386 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include "config.h" namespace DB { diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index 90aec7471ee..850640ab70a 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include #include diff --git a/src/Storages/Cache/registerRemoteFileMetadatas.cpp b/src/Storages/Cache/registerRemoteFileMetadatas.cpp index 39705b810b7..c76e85e7fc2 100644 --- a/src/Storages/Cache/registerRemoteFileMetadatas.cpp +++ b/src/Storages/Cache/registerRemoteFileMetadatas.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include "config.h" namespace DB { diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index 0cb4b9b7a74..dd77fc70358 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS #include diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index d33a0ac97b6..4588480602a 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS #include diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index c3b859f0566..6ea4fb8c106 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 45caddb21ea..2170b4142e8 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_HDFS diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 896371f9685..90a42d0c692 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 47a6fbf5eaa..467203c58f6 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_HDFS diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 21ae73c11ea..3239a1e4076 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index f8079d95f3c..a179f484652 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_HDFS diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index 3cc11a35186..ec54348c174 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS #include diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index 297b79da935..e2c19fb1684 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include "config.h" #if USE_HIVE diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 26e1b844b9d..1556d6860c1 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -1,5 +1,5 @@ #pragma once -#include +#include "config.h" #if USE_HIVE diff --git a/src/Storages/Hive/HiveSettings.h b/src/Storages/Hive/HiveSettings.h index ed430ba97cd..ed4869608ba 100644 --- a/src/Storages/Hive/HiveSettings.h +++ b/src/Storages/Hive/HiveSettings.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HIVE diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index efc744c6a9f..9c02d228f97 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HIVE diff --git a/src/Storages/Hive/StorageHiveMetadata.h b/src/Storages/Hive/StorageHiveMetadata.h index d385274588f..a3dc814652c 100644 --- a/src/Storages/Hive/StorageHiveMetadata.h +++ b/src/Storages/Hive/StorageHiveMetadata.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HIVE diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index a7aae90fcc1..475461aa0d6 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,6 +1,6 @@ #include -#include +#include "config.h" #include #include diff --git a/src/Storages/StorageMongoDBSocketFactory.cpp b/src/Storages/StorageMongoDBSocketFactory.cpp index f21e8746eb9..7308c4b3ce7 100644 --- a/src/Storages/StorageMongoDBSocketFactory.cpp +++ b/src/Storages/StorageMongoDBSocketFactory.cpp @@ -2,7 +2,7 @@ #include -#include +#include "config.h" #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index e15956f78be..675dd548088 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include #include "IO/ParallelReadBuffer.h" #include "IO/IOThreadPool.h" diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index c63508c8e6a..a983a59d98c 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 800bce0afde..df927069bb0 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -1,6 +1,6 @@ #include "Storages/StorageS3Cluster.h" -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index e5ca3b58123..d2cf1b917a1 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index 4ad3d4df29c..c4d262f2f44 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include #include #include diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 055270be4ae..107c3d7fb56 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include "config.h" #include "config_core.h" #include "config_formats.h" diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h index 20bc61c21aa..ec09a87a876 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.h +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -1,5 +1,5 @@ #pragma once -#include +#include "config.h" #if USE_HIVE #include diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h index 0d43e580458..42a3d30a728 100644 --- a/src/TableFunctions/ITableFunctionXDBC.h +++ b/src/TableFunctions/ITableFunctionXDBC.h @@ -5,7 +5,7 @@ #include #include -#include +#include "config.h" namespace DB { diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 57f692eadad..7aab55b48c9 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #include "registerTableFunctions.h" #if USE_HDFS diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index 74139818209..a391673e04d 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 385d280a100..26fcb514cca 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_HDFS diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h index f8f86dda939..a0555a904d1 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ b/src/TableFunctions/TableFunctionHDFSCluster.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_HDFS diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 0bf33007760..b8e4fcb67fa 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index be84bc4d8ab..5c12c2a3975 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 99c3ff85009..5823aaad876 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -1,4 +1,4 @@ -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index d4278980e9f..42dbfe2ec23 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #if USE_AWS_S3 diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index d7e38403cae..25690c29b76 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config.h" #include "config_core.h" namespace DB From 6d70b4a1f64c76c944f96a242da72e4688ade796 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 08:55:05 +0000 Subject: [PATCH 063/266] Generate config_version.h into ${CONFIG_INCLUDE_PATH} This makes the target location consistent with other auto-generated files like config_formats.h, config_core.h, and config_functions.h and simplifies the build of clickhouse_common. --- programs/client/Client.cpp | 2 +- programs/keeper/Keeper.cpp | 2 +- programs/server/Server.cpp | 2 +- src/CMakeLists.txt | 5 +---- src/Client/ClientBase.cpp | 2 +- src/Client/Connection.cpp | 2 +- src/Common/ClickHouseRevision.cpp | 2 +- src/Common/Exception.cpp | 2 +- src/Coordination/FourLetterCommand.h | 2 +- src/Daemon/BaseDaemon.cpp | 2 +- src/Daemon/SentryWriter.cpp | 2 +- src/Functions/serverConstants.cpp | 2 +- src/IO/ReadWriteBufferFromHTTP.h | 2 +- src/Interpreters/ClientInfo.cpp | 2 +- src/Interpreters/CrashLog.cpp | 2 +- src/Server/MySQLHandler.cpp | 2 +- src/Server/PostgreSQLHandler.cpp | 2 +- src/Server/TCPHandler.cpp | 2 +- src/Storages/Kafka/StorageKafka.cpp | 2 +- 19 files changed, 19 insertions(+), 22 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 5bd9d28d8e3..06f37b58163 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -15,7 +15,7 @@ #include -#include +#include "config_version.h" #include #include #include diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index fdfe0cef2b3..c4bfa92c72d 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -25,7 +25,7 @@ #include #include "config_core.h" -#include "Common/config_version.h" +#include "config_version.h" #if USE_SSL # include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 14f97923ce3..612bf880b67 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -89,7 +89,7 @@ #include #include "config_core.h" -#include "Common/config_version.h" +#include "config_version.h" #if defined(OS_LINUX) # include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3518ce0be6b..11283e49d8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,13 +18,11 @@ else() endif() include(../cmake/limit_jobs.cmake) -set (CONFIG_VERSION "${CMAKE_CURRENT_BINARY_DIR}/Common/config_version.h") - include (../cmake/version.cmake) message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}") include (configure_config.cmake) configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h) -configure_file (Common/config_version.h.in ${CONFIG_VERSION}) +configure_file (Common/config_version.h.in ${CONFIG_INCLUDE_PATH}/config_version.h) configure_file (Core/config_core.h.in "${CMAKE_CURRENT_BINARY_DIR}/Core/include/config_core.h") if (USE_DEBUG_HELPERS) @@ -152,7 +150,6 @@ else() endif () list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) -list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION}) list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/FunctionsLogical.cpp Functions/indexHint.cpp) list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/FunctionsLogical.h Functions/indexHint.h) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f4bea93f458..8925acdfa6d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -24,7 +24,7 @@ #include #include -#include +#include "config_version.h" #include #include #include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index f4190ef8f01..8ddd0334396 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -33,7 +33,7 @@ #include #include -#include +#include "config_version.h" #include "config.h" #if USE_SSL diff --git a/src/Common/ClickHouseRevision.cpp b/src/Common/ClickHouseRevision.cpp index c7c27436466..9dd91159f28 100644 --- a/src/Common/ClickHouseRevision.cpp +++ b/src/Common/ClickHouseRevision.cpp @@ -1,5 +1,5 @@ #include -#include +#include "config_version.h" namespace ClickHouseRevision { diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 931f06fdb51..399ccecf000 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include "config_version.h" namespace fs = std::filesystem; diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 8a98b94b33a..3374687ad82 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -7,7 +7,7 @@ #include #include -#include +#include "config_version.h" namespace DB { diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 157255bba12..2dddfd6874a 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -59,7 +59,7 @@ #include #include -#include +#include "config_version.h" #if defined(OS_DARWIN) # pragma GCC diagnostic ignored "-Wunused-macros" diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 928a06cbee5..ad8967c3977 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -15,8 +15,8 @@ #include #include -#include "Common/config_version.h" #include "config.h" +#include "config_version.h" #if USE_SENTRY && !defined(KEEPER_STANDALONE_BUILD) diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 623382e1da3..ccefd74b85b 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -10,7 +10,7 @@ #include -#include +#include "config_version.h" namespace DB diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 0b3794559e9..de2b5654ae5 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -24,7 +24,7 @@ #include #include #include "config.h" -#include +#include "config_version.h" #include diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 8136a2dde67..abd6f226ff5 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include "config_version.h" namespace DB diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 59bd00d4ac0..47a9d62fba6 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include "config_version.h" namespace DB diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index b4d94d8a78c..8e701956d29 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -23,7 +23,7 @@ #include #include -#include +#include "config_version.h" #if USE_SSL # include diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index b0a0a474fd9..b2a3935263d 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include "config_version.h" #if USE_SSL # include diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 550ae1bff31..df22afebb1d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -54,7 +54,7 @@ #include "Core/Protocol.h" #include "TCPHandler.h" -#include +#include "config_version.h" using namespace std::literals; using namespace DB; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index d9bacffd053..28eb85ab6ad 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -35,7 +35,7 @@ #include #include #include -#include +#include "config_version.h" #include #include #include From 0753fd1c77b49a9ac4aba9ddc72aae750b6149d7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 12:35:02 +0000 Subject: [PATCH 064/266] Consolidate config_functions.h into config.h Less duplication, less confusion ... --- src/Common/JSONParsers/RapidJSONParser.h | 2 +- src/Common/JSONParsers/SimdJSONParser.h | 2 +- src/Common/config.h.in | 6 +++++- src/Functions/CMakeLists.txt | 3 --- src/Functions/FunctionBase64Conversion.h | 2 +- src/Functions/FunctionMathBinaryFloat64.h | 2 +- src/Functions/FunctionMathUnary.h | 2 +- src/Functions/FunctionSQLJSON.h | 2 +- src/Functions/FunctionsHashing.h | 2 +- src/Functions/FunctionsJSON.cpp | 2 +- .../FunctionsLanguageClassification.cpp | 2 +- src/Functions/MatchImpl.h | 2 +- src/Functions/MultiMatchAllIndicesImpl.h | 2 +- src/Functions/MultiMatchAnyImpl.h | 2 +- src/Functions/Regexps.h | 2 +- src/Functions/ReplaceRegexpImpl.h | 2 +- src/Functions/base64Encode.cpp | 2 +- src/Functions/config_functions.h.in | 12 ----------- src/Functions/configure_config.cmake | 21 ------------------- src/Functions/geoToH3.cpp | 2 +- src/Functions/geoToS2.cpp | 2 +- src/Functions/h3CellAreaM2.cpp | 2 +- src/Functions/h3CellAreaRads2.cpp | 2 +- src/Functions/h3Distance.cpp | 2 +- src/Functions/h3EdgeAngle.cpp | 2 +- src/Functions/h3EdgeLengthKm.cpp | 2 +- src/Functions/h3EdgeLengthM.cpp | 2 +- src/Functions/h3ExactEdgeLengthKm.cpp | 2 +- src/Functions/h3ExactEdgeLengthM.cpp | 2 +- src/Functions/h3ExactEdgeLengthRads.cpp | 2 +- src/Functions/h3GetBaseCell.cpp | 2 +- ...DestinationIndexFromUnidirectionalEdge.cpp | 2 +- src/Functions/h3GetFaces.cpp | 2 +- .../h3GetIndexesFromUnidirectionalEdge.cpp | 2 +- ...h3GetOriginIndexFromUnidirectionalEdge.cpp | 2 +- src/Functions/h3GetPentagonIndexes.cpp | 2 +- src/Functions/h3GetRes0Indexes.cpp | 2 +- src/Functions/h3GetResolution.cpp | 2 +- src/Functions/h3GetUnidirectionalEdge.cpp | 2 +- .../h3GetUnidirectionalEdgeBoundary.cpp | 2 +- .../h3GetUnidirectionalEdgesFromHexagon.cpp | 2 +- src/Functions/h3HexAreaKm2.cpp | 2 +- src/Functions/h3HexAreaM2.cpp | 2 +- src/Functions/h3HexRing.cpp | 2 +- src/Functions/h3IndexesAreNeighbors.cpp | 2 +- src/Functions/h3IsPentagon.cpp | 2 +- src/Functions/h3IsResClassIII.cpp | 2 +- src/Functions/h3IsValid.cpp | 2 +- src/Functions/h3Line.cpp | 2 +- src/Functions/h3NumHexagons.cpp | 2 +- src/Functions/h3PointDist.cpp | 2 +- src/Functions/h3ToCenterChild.cpp | 2 +- src/Functions/h3ToChildren.cpp | 2 +- src/Functions/h3ToGeoBoundary.cpp | 2 +- src/Functions/h3ToParent.cpp | 2 +- src/Functions/h3ToString.cpp | 2 +- src/Functions/h3UnidirectionalEdgeIsValid.cpp | 2 +- src/Functions/h3kRing.cpp | 2 +- src/Functions/h3toGeo.cpp | 2 +- src/Functions/s2CapContains.cpp | 2 +- src/Functions/s2CapUnion.cpp | 2 +- src/Functions/s2CellsIntersect.cpp | 2 +- src/Functions/s2GetNeighbors.cpp | 2 +- src/Functions/s2RectAdd.cpp | 2 +- src/Functions/s2RectContains.cpp | 2 +- src/Functions/s2RectIntersection.cpp | 2 +- src/Functions/s2RectUnion.cpp | 2 +- src/Functions/s2ToGeo.cpp | 2 +- src/Functions/stringToH3.cpp | 2 +- src/Storages/System/CMakeLists.txt | 2 +- src/configure_config.cmake | 12 +++++++++++ 71 files changed, 83 insertions(+), 103 deletions(-) delete mode 100644 src/Functions/config_functions.h.in delete mode 100644 src/Functions/configure_config.cmake diff --git a/src/Common/JSONParsers/RapidJSONParser.h b/src/Common/JSONParsers/RapidJSONParser.h index 77e8f6b2a74..01730bc0692 100644 --- a/src/Common/JSONParsers/RapidJSONParser.h +++ b/src/Common/JSONParsers/RapidJSONParser.h @@ -1,6 +1,6 @@ #pragma once -#include "config_functions.h" +#include "config.h" #if USE_RAPIDJSON # include diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index f3bbfe4dfde..14eb3cd6d78 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -1,6 +1,6 @@ #pragma once -#include "config_functions.h" +#include "config.h" #if USE_SIMDJSON # include diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 160937d36fa..33d2c6cd141 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -15,7 +15,6 @@ #cmakedefine01 USE_GRPC #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON - #cmakedefine01 USE_DATASKETCHES #cmakedefine01 USE_YAML_CPP #cmakedefine01 USE_BZIP2 @@ -25,3 +24,8 @@ #cmakedefine01 USE_ODBC #cmakedefine01 USE_REPLXX #cmakedefine01 USE_JEMALLOC +#cmakedefine01 USE_H3 +#cmakedefine01 USE_S2_GEOMETRY +#cmakedefine01 USE_FASTOPS +#cmakedefine01 USE_NLP +#cmakedefine01 USE_VECTORSCAN diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index f89c87d6e90..611642ab5ca 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -1,6 +1,3 @@ -include(configure_config.cmake) -configure_file(config_functions.h.in ${CONFIG_INCLUDE_PATH}/config_functions.h) - add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 87a3309f7ef..2caff6c6e6a 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -1,5 +1,5 @@ #pragma once -#include "config_functions.h" +#include "config.h" #if USE_BASE64 # include diff --git a/src/Functions/FunctionMathBinaryFloat64.h b/src/Functions/FunctionMathBinaryFloat64.h index aec20d30271..f95279ab261 100644 --- a/src/Functions/FunctionMathBinaryFloat64.h +++ b/src/Functions/FunctionMathBinaryFloat64.h @@ -9,7 +9,7 @@ #include #include -#include "config_functions.h" +#include "config.h" namespace DB { diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index bd656db792b..6e4bff7122d 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -8,7 +8,7 @@ #include #include -#include "config_functions.h" +#include "config.h" /** FastOps is a fast vector math library from Mikhail Parakhin, https://www.linkedin.com/in/mikhail-parakhin/ * Enabled by default. diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index e45951e3ec5..dce953ddc6f 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -22,7 +22,7 @@ #include #include -#include "config_functions.h" +#include "config.h" namespace DB { diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 862592254c1..1375d490cc4 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -7,7 +7,7 @@ #include #include -#include "config_functions.h" +#include "config.h" #include "config_core.h" #include diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 814f709af27..aefc82d2f5d 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -43,7 +43,7 @@ #include -#include "config_functions.h" +#include "config.h" namespace DB diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 18579e674ec..ecc958a0a0c 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_NLP diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 874f4d3c5b8..92233790bb1 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -7,7 +7,7 @@ #include #include "Regexps.h" -#include "config_functions.h" +#include "config.h" #include "config.h" #include diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 79f4d564e42..fa724bae005 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -8,7 +8,7 @@ #include #include "Regexps.h" -#include "config_functions.h" +#include "config.h" #include "config.h" #if USE_VECTORSCAN diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 508e568d232..32318a27ea1 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -7,7 +7,7 @@ #include #include "Regexps.h" -#include "config_functions.h" +#include "config.h" #include "config.h" #if USE_VECTORSCAN diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 08177c0259e..f4e4f924678 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -16,7 +16,7 @@ #include #include -#include "config_functions.h" +#include "config.h" #if USE_VECTORSCAN # include diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index b1d04d6e9e2..72282278062 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -5,7 +5,7 @@ #include #include -#include "config_functions.h" +#include "config.h" #include "config.h" #include diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index e895230d44f..fc06935e0a1 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -1,7 +1,7 @@ #include #include -#include "config_functions.h" +#include "config.h" #if USE_BASE64 # include diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in deleted file mode 100644 index fc59968a05e..00000000000 --- a/src/Functions/config_functions.h.in +++ /dev/null @@ -1,12 +0,0 @@ -/// This file was autogenerated by CMake - -#pragma once - -#cmakedefine01 USE_BASE64 -#cmakedefine01 USE_SIMDJSON -#cmakedefine01 USE_RAPIDJSON -#cmakedefine01 USE_H3 -#cmakedefine01 USE_S2_GEOMETRY -#cmakedefine01 USE_FASTOPS -#cmakedefine01 USE_NLP -#cmakedefine01 USE_VECTORSCAN diff --git a/src/Functions/configure_config.cmake b/src/Functions/configure_config.cmake deleted file mode 100644 index 33c36412844..00000000000 --- a/src/Functions/configure_config.cmake +++ /dev/null @@ -1,21 +0,0 @@ -if (TARGET ch_contrib::fastops) - set(USE_FASTOPS 1) -endif() -if (TARGET ch_contrib::base64) - set(USE_BASE64 1) -endif() -if (TARGET ch_contrib::simdjson) - set(USE_SIMDJSON 1) -endif() -if (TARGET ch_contrib::rapidjson) - set(USE_RAPIDJSON 1) -endif() -if (TARGET ch_contrib::s2) - set(USE_S2_GEOMETRY 1) -endif() -if (TARGET ch_contrib::h3) - set(USE_H3 1) -endif() -if (TARGET ch_contrib::vectorscan) - set(USE_VECTORSCAN 1) -endif() diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 284598ee4d5..91c0e5b2361 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/geoToS2.cpp b/src/Functions/geoToS2.cpp index 6cfa892b193..c1f333f63e6 100644 --- a/src/Functions/geoToS2.cpp +++ b/src/Functions/geoToS2.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/h3CellAreaM2.cpp b/src/Functions/h3CellAreaM2.cpp index 10fac6c9441..44d9b458e5a 100644 --- a/src/Functions/h3CellAreaM2.cpp +++ b/src/Functions/h3CellAreaM2.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3CellAreaRads2.cpp b/src/Functions/h3CellAreaRads2.cpp index c74944e4e2f..942b587fb16 100644 --- a/src/Functions/h3CellAreaRads2.cpp +++ b/src/Functions/h3CellAreaRads2.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3Distance.cpp b/src/Functions/h3Distance.cpp index d4291c30424..33328d74f9b 100644 --- a/src/Functions/h3Distance.cpp +++ b/src/Functions/h3Distance.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index f80bfd1cdf9..bd6d5d2b47f 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3EdgeLengthKm.cpp b/src/Functions/h3EdgeLengthKm.cpp index ca2b050b485..821b699e8bb 100644 --- a/src/Functions/h3EdgeLengthKm.cpp +++ b/src/Functions/h3EdgeLengthKm.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index a2786da51f1..5544f8555ff 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ExactEdgeLengthKm.cpp b/src/Functions/h3ExactEdgeLengthKm.cpp index 1cd43c98576..15ae38610a7 100644 --- a/src/Functions/h3ExactEdgeLengthKm.cpp +++ b/src/Functions/h3ExactEdgeLengthKm.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ExactEdgeLengthM.cpp b/src/Functions/h3ExactEdgeLengthM.cpp index 1b930a592db..b3d1448c21f 100644 --- a/src/Functions/h3ExactEdgeLengthM.cpp +++ b/src/Functions/h3ExactEdgeLengthM.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ExactEdgeLengthRads.cpp b/src/Functions/h3ExactEdgeLengthRads.cpp index 62b9d916cdf..d7f3af86969 100644 --- a/src/Functions/h3ExactEdgeLengthRads.cpp +++ b/src/Functions/h3ExactEdgeLengthRads.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp index a0cd75c86a9..185cc00be2d 100644 --- a/src/Functions/h3GetBaseCell.cpp +++ b/src/Functions/h3GetBaseCell.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp b/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp index 38a97ac8be0..7748de11af3 100644 --- a/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp +++ b/src/Functions/h3GetDestinationIndexFromUnidirectionalEdge.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetFaces.cpp b/src/Functions/h3GetFaces.cpp index 42f430fb2ab..83816671e39 100644 --- a/src/Functions/h3GetFaces.cpp +++ b/src/Functions/h3GetFaces.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp b/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp index 3d98a6374c6..936652cba87 100644 --- a/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp +++ b/src/Functions/h3GetIndexesFromUnidirectionalEdge.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp b/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp index cbe69a4e887..52f8466811e 100644 --- a/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp +++ b/src/Functions/h3GetOriginIndexFromUnidirectionalEdge.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetPentagonIndexes.cpp b/src/Functions/h3GetPentagonIndexes.cpp index b24b58a2568..098a577f05c 100644 --- a/src/Functions/h3GetPentagonIndexes.cpp +++ b/src/Functions/h3GetPentagonIndexes.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetRes0Indexes.cpp b/src/Functions/h3GetRes0Indexes.cpp index 0db89752fa0..22659608a3d 100644 --- a/src/Functions/h3GetRes0Indexes.cpp +++ b/src/Functions/h3GetRes0Indexes.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp index 153cf883b19..f530e9b0559 100644 --- a/src/Functions/h3GetResolution.cpp +++ b/src/Functions/h3GetResolution.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp index eddc35bd45a..4e41cdbfef6 100644 --- a/src/Functions/h3GetUnidirectionalEdge.cpp +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp index bf2e904c473..12e6f4d810a 100644 --- a/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp +++ b/src/Functions/h3GetUnidirectionalEdgeBoundary.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp b/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp index e31359c297d..0dc5a3d1a06 100644 --- a/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp +++ b/src/Functions/h3GetUnidirectionalEdgesFromHexagon.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3HexAreaKm2.cpp b/src/Functions/h3HexAreaKm2.cpp index e29b66f7538..b6c9434077f 100644 --- a/src/Functions/h3HexAreaKm2.cpp +++ b/src/Functions/h3HexAreaKm2.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index eb90e5daa2e..07b276fe155 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3HexRing.cpp b/src/Functions/h3HexRing.cpp index 25cde81e061..633f2f6b8a9 100644 --- a/src/Functions/h3HexRing.cpp +++ b/src/Functions/h3HexRing.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp index 82a05a02f0d..ccd63e7d4e4 100644 --- a/src/Functions/h3IndexesAreNeighbors.cpp +++ b/src/Functions/h3IndexesAreNeighbors.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3IsPentagon.cpp b/src/Functions/h3IsPentagon.cpp index 048a5ca50ce..76317a68bb1 100644 --- a/src/Functions/h3IsPentagon.cpp +++ b/src/Functions/h3IsPentagon.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3IsResClassIII.cpp b/src/Functions/h3IsResClassIII.cpp index 23a11f1a544..fce109ce3d4 100644 --- a/src/Functions/h3IsResClassIII.cpp +++ b/src/Functions/h3IsResClassIII.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp index 6b7b47bc8d4..94231115c87 100644 --- a/src/Functions/h3IsValid.cpp +++ b/src/Functions/h3IsValid.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3Line.cpp b/src/Functions/h3Line.cpp index d596c6ec956..6767e7a93aa 100644 --- a/src/Functions/h3Line.cpp +++ b/src/Functions/h3Line.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3NumHexagons.cpp b/src/Functions/h3NumHexagons.cpp index 3a13071d6cf..5414d42b49c 100644 --- a/src/Functions/h3NumHexagons.cpp +++ b/src/Functions/h3NumHexagons.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3PointDist.cpp b/src/Functions/h3PointDist.cpp index 2d3512f0192..00b8fb0089e 100644 --- a/src/Functions/h3PointDist.cpp +++ b/src/Functions/h3PointDist.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ToCenterChild.cpp b/src/Functions/h3ToCenterChild.cpp index 6104d179d30..daa7336bebc 100644 --- a/src/Functions/h3ToCenterChild.cpp +++ b/src/Functions/h3ToCenterChild.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index f18d96c6a90..8a17d014f3a 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ToGeoBoundary.cpp b/src/Functions/h3ToGeoBoundary.cpp index dedb195f79c..185fc361442 100644 --- a/src/Functions/h3ToGeoBoundary.cpp +++ b/src/Functions/h3ToGeoBoundary.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp index d7678004125..2dc756f0f71 100644 --- a/src/Functions/h3ToParent.cpp +++ b/src/Functions/h3ToParent.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3ToString.cpp b/src/Functions/h3ToString.cpp index 9a6b1504af0..897329ed9ec 100644 --- a/src/Functions/h3ToString.cpp +++ b/src/Functions/h3ToString.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3UnidirectionalEdgeIsValid.cpp b/src/Functions/h3UnidirectionalEdgeIsValid.cpp index 012a14823c9..129bd6730c3 100644 --- a/src/Functions/h3UnidirectionalEdgeIsValid.cpp +++ b/src/Functions/h3UnidirectionalEdgeIsValid.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index f2d50532e61..affd70a1d4a 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/h3toGeo.cpp b/src/Functions/h3toGeo.cpp index aff55324e48..974b09e1c69 100644 --- a/src/Functions/h3toGeo.cpp +++ b/src/Functions/h3toGeo.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Functions/s2CapContains.cpp b/src/Functions/s2CapContains.cpp index f7a31120e0f..9dfbc05a6a0 100644 --- a/src/Functions/s2CapContains.cpp +++ b/src/Functions/s2CapContains.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2CapUnion.cpp b/src/Functions/s2CapUnion.cpp index da329065553..06c0b4e6d83 100644 --- a/src/Functions/s2CapUnion.cpp +++ b/src/Functions/s2CapUnion.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2CellsIntersect.cpp b/src/Functions/s2CellsIntersect.cpp index 51cef79285f..1fac5fd6e60 100644 --- a/src/Functions/s2CellsIntersect.cpp +++ b/src/Functions/s2CellsIntersect.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp index 906a0e01195..b200f61315b 100644 --- a/src/Functions/s2GetNeighbors.cpp +++ b/src/Functions/s2GetNeighbors.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index fe74f8b2507..e086fdd6b3a 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index c10a4e5ecae..e4d74ee2545 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp index cf4f7c8aa9d..072c7147809 100644 --- a/src/Functions/s2RectIntersection.cpp +++ b/src/Functions/s2RectIntersection.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp index 845dcb982b6..bb63229b484 100644 --- a/src/Functions/s2RectUnion.cpp +++ b/src/Functions/s2RectUnion.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/s2ToGeo.cpp b/src/Functions/s2ToGeo.cpp index 63edfc84f97..5c1dbfa0382 100644 --- a/src/Functions/s2ToGeo.cpp +++ b/src/Functions/s2ToGeo.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_S2_GEOMETRY diff --git a/src/Functions/stringToH3.cpp b/src/Functions/stringToH3.cpp index 8a90866e131..21da96b41a9 100644 --- a/src/Functions/stringToH3.cpp +++ b/src/Functions/stringToH3.cpp @@ -1,4 +1,4 @@ -#include "config_functions.h" +#include "config.h" #if USE_H3 diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 6bc080045f8..3e38f630fad 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -7,7 +7,7 @@ get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) - include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) + include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 3f3ddf54716..c7b4c7f84cf 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -103,3 +103,15 @@ endif() if (TARGET ch_contrib::jemalloc) set(USE_JEMALLOC 1) endif() +if (TARGET ch_contrib::h3) + set(USE_H3 1) +endif() +if (TARGET ch_contrib::s2) + set(USE_S2_GEOMETRY 1) +endif() +if (TARGET ch_contrib::fastops) + set(USE_FASTOPS 1) +endif() +if (TARGET ch_contrib::vectorscan) + set(USE_VECTORSCAN 1) +endif() From 03ee7efcb985bacb710f24774fbeb62aea93516b Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Sep 2022 12:48:31 +0000 Subject: [PATCH 065/266] Better example in docs --- docs/en/interfaces/formats.md | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 1086784b3f8..999d659329b 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1024,29 +1024,42 @@ To use object name as column value you can use special setting [format_json_obje Examples: For output: + +Let's say we have table `test` with two columns: +``` +โ”Œโ”€object_nameโ”€โ”ฌโ”€numberโ”€โ” +โ”‚ first_obj โ”‚ 1 โ”‚ +โ”‚ second_obj โ”‚ 2 โ”‚ +โ”‚ trhird_obj โ”‚ 3 โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` +Let's output it in `JSONObjectEachRow` format and use `format_json_object_each_row_column_for_object_name` setting: + ```sql -insert into function file('data.json', JSONObjectEachRow) select 'obj' as object_name, number from numbers(3) settings format_json_object_each_row_column_for_object_name='object_name' +select * from test settings format_json_object_each_row_column_for_object_name='object_name' ``` -File "data.json" will contain: +The output: ```json { - "obj": {"number":"0"}, - "obj": {"number":"1"}, - "obj": {"number":"2"} + "first_obj": {"number": 1}, + "second_obj": {"number": 2}, + "trhird_obj": {"number": 3} } ``` For input: + +Let's say we stored output from previous example in a file with name `data.json`: ```sql -select * from file('data.json', JSONObjectEachRow, 'obj String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name' +select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name' ``` ``` โ”Œโ”€object_nameโ”€โ”ฌโ”€numberโ”€โ” -โ”‚ obj โ”‚ 0 โ”‚ -โ”‚ obj โ”‚ 1 โ”‚ -โ”‚ obj โ”‚ 2 โ”‚ +โ”‚ first_obj โ”‚ 1 โ”‚ +โ”‚ second_obj โ”‚ 2 โ”‚ +โ”‚ trhird_obj โ”‚ 3 โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` From 09c62f6728e2b61eaa06de6772f514c659c74b1a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 12:57:31 +0000 Subject: [PATCH 066/266] Consolidate config_formats.h into config.h Less duplication, less confusion ... --- src/CMakeLists.txt | 1 - src/Common/config.h.in | 7 +++++++ src/Formats/CMakeLists.txt | 2 -- src/Formats/CapnProtoUtils.h | 2 +- src/Formats/ProtobufReader.h | 2 +- src/Formats/ProtobufSchemas.cpp | 2 +- src/Formats/ProtobufSchemas.h | 2 +- src/Formats/ProtobufSerializer.h | 2 +- src/Formats/ProtobufWriter.h | 2 +- src/Formats/config_formats.h.in | 12 ----------- src/Formats/configure_config.cmake | 20 ------------------- .../Formats/Impl/ArrowBlockInputFormat.h | 2 +- .../Formats/Impl/ArrowBlockOutputFormat.h | 2 +- .../Formats/Impl/ArrowBufferedStreams.h | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.h | 2 +- .../Formats/Impl/AvroRowInputFormat.h | 2 +- .../Formats/Impl/AvroRowOutputFormat.h | 2 +- .../Formats/Impl/CHColumnToArrowColumn.h | 2 +- .../Formats/Impl/CapnProtoRowInputFormat.h | 2 +- .../Formats/Impl/CapnProtoRowOutputFormat.h | 2 +- .../Formats/Impl/MsgPackRowInputFormat.h | 2 +- .../Formats/Impl/MsgPackRowOutputFormat.h | 2 +- .../Formats/Impl/ORCBlockInputFormat.h | 2 +- .../Formats/Impl/ORCBlockOutputFormat.h | 2 +- .../Formats/Impl/ParquetBlockInputFormat.h | 2 +- .../Formats/Impl/ParquetBlockOutputFormat.h | 2 +- .../Formats/Impl/ProtobufListInputFormat.h | 2 +- .../Formats/Impl/ProtobufListOutputFormat.h | 2 +- .../Formats/Impl/ProtobufRowInputFormat.h | 2 +- .../Formats/Impl/ProtobufRowOutputFormat.h | 2 +- src/Storages/System/CMakeLists.txt | 2 -- src/Storages/registerStorages.cpp | 2 +- src/configure_config.cmake | 17 ++++++++++++++++ 33 files changed, 50 insertions(+), 63 deletions(-) delete mode 100644 src/Formats/CMakeLists.txt delete mode 100644 src/Formats/config_formats.h.in delete mode 100644 src/Formats/configure_config.cmake diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 11283e49d8f..f726ae77415 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -72,7 +72,6 @@ add_subdirectory (AggregateFunctions) add_subdirectory (Client) add_subdirectory (TableFunctions) add_subdirectory (Processors) -add_subdirectory (Formats) add_subdirectory (Compression) add_subdirectory (Server) add_subdirectory (Coordination) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 33d2c6cd141..601470347b0 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -29,3 +29,10 @@ #cmakedefine01 USE_FASTOPS #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN +#cmakedefine01 USE_AVRO +#cmakedefine01 USE_CAPNP +#cmakedefine01 USE_PARQUET +#cmakedefine01 USE_ORC +#cmakedefine01 USE_ARROW +#cmakedefine01 USE_PROTOBUF +#cmakedefine01 USE_MSGPACK diff --git a/src/Formats/CMakeLists.txt b/src/Formats/CMakeLists.txt deleted file mode 100644 index ace9e37d76b..00000000000 --- a/src/Formats/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -include(configure_config.cmake) -configure_file(config_formats.h.in ${CONFIG_INCLUDE_PATH}/config_formats.h) diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h index 50f146a05f6..102c3a2e306 100644 --- a/src/Formats/CapnProtoUtils.h +++ b/src/Formats/CapnProtoUtils.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_CAPNP #include diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h index 2e2a71a7d11..a1a1ce7b2f1 100644 --- a/src/Formats/ProtobufReader.h +++ b/src/Formats/ProtobufReader.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp index 249737d1838..be7c97d40ba 100644 --- a/src/Formats/ProtobufSchemas.cpp +++ b/src/Formats/ProtobufSchemas.cpp @@ -1,4 +1,4 @@ -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h index 40e386b4642..6f868cd6803 100644 --- a/src/Formats/ProtobufSchemas.h +++ b/src/Formats/ProtobufSchemas.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF #include diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h index 7cbfe5fd42c..ebd136c1a82 100644 --- a/src/Formats/ProtobufSerializer.h +++ b/src/Formats/ProtobufSerializer.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Formats/ProtobufWriter.h b/src/Formats/ProtobufWriter.h index 3ede956e910..b8f649a0e3c 100644 --- a/src/Formats/ProtobufWriter.h +++ b/src/Formats/ProtobufWriter.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Formats/config_formats.h.in b/src/Formats/config_formats.h.in deleted file mode 100644 index a0c14981eb7..00000000000 --- a/src/Formats/config_formats.h.in +++ /dev/null @@ -1,12 +0,0 @@ -/// This file was autogenerated by CMake - -#pragma once - -#cmakedefine01 USE_AVRO -#cmakedefine01 USE_CAPNP -#cmakedefine01 USE_SNAPPY -#cmakedefine01 USE_PARQUET -#cmakedefine01 USE_ORC -#cmakedefine01 USE_ARROW -#cmakedefine01 USE_PROTOBUF -#cmakedefine01 USE_MSGPACK diff --git a/src/Formats/configure_config.cmake b/src/Formats/configure_config.cmake deleted file mode 100644 index 3a11f3c6448..00000000000 --- a/src/Formats/configure_config.cmake +++ /dev/null @@ -1,20 +0,0 @@ -if (TARGET ch_contrib::avrocpp) - set(USE_AVRO 1) -endif() -if (TARGET ch_contrib::parquet) - set(USE_PARQUET 1) - set(USE_ARROW 1) - set(USE_ORC 1) -endif() -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY 1) -endif() -if (TARGET ch_contrib::protobuf) - set(USE_PROTOBUF 1) -endif() -if (TARGET ch_contrib::msgpack) - set(USE_MSGPACK 1) -endif() -if (TARGET ch_contrib::capnp) - set(USE_CAPNP 1) -endif() diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index ee1e2d6c5a8..02648d28048 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ARROW diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h index ab5a0e7351a..ce0bdab9bcb 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ARROW diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index da038283731..dc69b5a50fa 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ARROW || USE_ORC || USE_PARQUET diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 092ed65d61a..3540778940e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ARROW || USE_ORC || USE_PARQUET diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 17203925f2f..460dc4b1fb3 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #include "config_core.h" #if USE_AVRO diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index a36b36286c3..4834c8948b2 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_AVRO #include diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h index 2896fb3642f..1db035e55a6 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ARROW || USE_PARQUET diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index a8aa6ccda05..cf23f22b643 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_CAPNP #include diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index 12dc5eda2b3..d1f64838145 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_CAPNP #include diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 2298e35fed5..dab6d7a3d5b 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #include "config_core.h" #if USE_MSGPACK diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index e2abbd588c4..2de8e9cdc2f 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #include "config_core.h" #if USE_MSGPACK diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 3c363699a0b..bc2abe41cc1 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ORC #include diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 6467f2148f5..322778299ae 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_ORC #include diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 76803bb5b89..25814090587 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PARQUET #include diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index dee25ee1aa4..c0421a4d99f 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -1,5 +1,5 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PARQUET # include diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h index 2f334048ad2..7c8bfb9b443 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h index d3cade38b2a..7b3513bb7ed 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 3d00ee4794e..1747d090976 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h index 9f7f0b96923..01eaac288f5 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h @@ -1,6 +1,6 @@ #pragma once -#include "config_formats.h" +#include "config.h" #if USE_PROTOBUF # include diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 3e38f630fad..1d2a3de5101 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -7,8 +7,6 @@ get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) - include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) - include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 107c3d7fb56..fb5b2efb0fc 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -3,7 +3,7 @@ #include "config.h" #include "config_core.h" -#include "config_formats.h" +#include "config.h" namespace DB { diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c7b4c7f84cf..aafbddc69f5 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -115,3 +115,20 @@ endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() +if (TARGET ch_contrib::avrocpp) + set(USE_AVRO 1) +endif() +if (TARGET ch_contrib::parquet) + set(USE_PARQUET 1) + set(USE_ARROW 1) + set(USE_ORC 1) +endif() +if (TARGET ch_contrib::protobuf) + set(USE_PROTOBUF 1) +endif() +if (TARGET ch_contrib::msgpack) + set(USE_MSGPACK 1) +endif() +if (TARGET ch_contrib::capnp) + set(USE_CAPNP 1) +endif() From fd86829824ff2186487a1113fc7daf513a3c7d24 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 13:29:29 +0000 Subject: [PATCH 067/266] Consolidate config_core.h into config.h Less duplication, less confusion ... --- programs/keeper/Keeper.cpp | 2 +- programs/server/Server.cpp | 2 +- src/Access/GSSAcceptor.h | 2 +- src/Access/KerberosInit.h | 2 +- src/Access/LDAPClient.h | 2 +- src/AggregateFunctions/IAggregateFunction.h | 2 +- src/CMakeLists.txt | 1 - src/Columns/Collator.cpp | 2 +- src/Columns/ColumnNullable.h | 2 +- src/Columns/ColumnVector.h | 2 +- src/Columns/IColumn.h | 2 +- src/Common/MemoryTracker.cpp | 2 +- src/Common/config.h.in | 14 +++++++++++ src/Common/examples/hashes_test.cpp | 2 +- src/Compression/CompressionFactory.cpp | 2 +- src/Coordination/KeeperDispatcher.h | 2 +- src/Coordination/KeeperServer.cpp | 2 +- src/Coordination/tests/gtest_coordination.cpp | 2 +- src/Core/MySQL/Authentication.h | 2 +- src/Core/PostgreSQL/Connection.h | 2 +- src/Core/PostgreSQL/ConnectionHolder.h | 2 +- src/Core/PostgreSQL/PoolWithFailover.h | 2 +- src/Core/PostgreSQL/Utils.h | 2 +- src/Core/PostgreSQL/insertPostgreSQLValue.h | 2 +- src/Core/SortCursor.h | 2 +- src/Core/config_core.h.in | 24 ------------------- src/DataTypes/Native.h | 2 +- src/Databases/DatabaseFactory.cpp | 2 +- .../MySQL/DatabaseMaterializedMySQL.cpp | 2 +- .../MySQL/DatabaseMaterializedMySQL.h | 2 +- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Databases/MySQL/DatabaseMySQL.h | 2 +- .../MySQL/FetchTablesColumnsList.cpp | 2 +- src/Databases/MySQL/FetchTablesColumnsList.h | 2 +- src/Databases/MySQL/MaterializeMetadata.h | 2 +- .../MySQL/MaterializedMySQLSyncThread.cpp | 2 +- .../MySQL/MaterializedMySQLSyncThread.h | 2 +- .../DatabaseMaterializedPostgreSQL.h | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.h | 2 +- .../fetchPostgreSQLTableStructure.h | 2 +- src/Databases/SQLite/DatabaseSQLite.h | 2 +- src/Databases/SQLite/SQLiteUtils.h | 2 +- .../SQLite/fetchSQLiteTableStructure.h | 2 +- src/Dictionaries/MySQLDictionarySource.h | 2 +- src/Dictionaries/PostgreSQLDictionarySource.h | 2 +- src/Functions/DivisionUtils.h | 2 +- src/Functions/FunctionsHashing.h | 2 +- src/Functions/GCDLCMImpl.h | 2 +- src/Functions/IFunction.h | 2 +- src/Functions/convertCharset.cpp | 2 +- src/Functions/lemmatize.cpp | 2 +- src/Functions/normalizeString.cpp | 2 +- src/Functions/registerFunctions.cpp | 2 +- src/Functions/stem.cpp | 2 +- src/Functions/synonyms.cpp | 2 +- src/Interpreters/ActionsDAG.h | 2 +- src/Interpreters/AsynchronousMetrics.cpp | 2 +- src/Interpreters/Context.h | 2 +- src/Interpreters/DatabaseCatalog.cpp | 2 +- src/Interpreters/ExpressionActions.h | 2 +- src/Interpreters/ExpressionJIT.cpp | 2 +- .../ExternalDictionariesLoader.cpp | 2 +- src/Interpreters/InterpreterDropQuery.cpp | 2 +- .../InterpreterExternalDDLQuery.cpp | 2 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Interpreters/JIT/CHJIT.h | 2 +- src/Interpreters/JIT/CompileDAG.h | 2 +- .../JIT/CompiledExpressionCache.h | 2 +- src/Interpreters/JIT/compileFunction.h | 2 +- src/Interpreters/Lemmatizers.cpp | 2 +- src/Interpreters/Lemmatizers.h | 2 +- .../MySQL/tests/gtest_create_rewritten.cpp | 2 +- src/Interpreters/SynonymsExtensions.cpp | 2 +- src/Interpreters/SynonymsExtensions.h | 2 +- src/Interpreters/examples/jit_example.cpp | 2 +- src/Parsers/ASTSystemQuery.h | 2 +- src/Parsers/ParserExternalDDLQuery.cpp | 2 +- .../Formats/Impl/AvroRowInputFormat.h | 2 +- .../Formats/Impl/MsgPackRowInputFormat.h | 2 +- .../Formats/Impl/MsgPackRowOutputFormat.h | 2 +- src/Processors/Sources/MySQLSource.cpp | 2 +- src/Processors/Sources/SQLiteSource.h | 2 +- src/Processors/Transforms/PostgreSQLSource.h | 2 +- src/Server/KeeperTCPHandler.h | 2 +- .../MergeTree/MergeTreeMetadataCache.h | 2 +- .../MergeTree/PartMetadataManagerWithCache.h | 2 +- .../tests/gtest_merge_tree_metadata_cache.cpp | 2 +- src/Storages/MySQL/MySQLHelpers.h | 2 +- .../MaterializedPostgreSQLSettings.h | 2 +- .../StorageMaterializedPostgreSQL.h | 2 +- .../ReadFinalForExternalReplicaStorage.h | 2 +- src/Storages/StorageExternalDistributed.h | 2 +- src/Storages/StorageMaterializedMySQL.cpp | 2 +- src/Storages/StorageMaterializedMySQL.h | 2 +- src/Storages/StorageMySQL.h | 2 +- src/Storages/StoragePostgreSQL.h | 2 +- src/Storages/StorageSQLite.h | 2 +- .../StorageSystemMergeTreeMetadataCache.h | 2 +- src/Storages/System/attachSystemTables.cpp | 2 +- src/Storages/registerStorages.cpp | 2 +- src/TableFunctions/TableFunctionMySQL.cpp | 2 +- src/TableFunctions/TableFunctionMySQL.h | 2 +- src/TableFunctions/TableFunctionPostgreSQL.h | 2 +- src/TableFunctions/TableFunctionSQLite.h | 2 +- src/TableFunctions/registerTableFunctions.h | 2 +- 105 files changed, 116 insertions(+), 127 deletions(-) delete mode 100644 src/Core/config_core.h.in diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index c4bfa92c72d..5077f59b7dd 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -24,7 +24,7 @@ #include #include -#include "config_core.h" +#include "config.h" #include "config_version.h" #if USE_SSL diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 612bf880b67..c9c7599a702 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -88,7 +88,7 @@ #include #include -#include "config_core.h" +#include "config.h" #include "config_version.h" #if defined(OS_LINUX) diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h index c128c78cb3a..d2c55b1290c 100644 --- a/src/Access/GSSAcceptor.h +++ b/src/Access/GSSAcceptor.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Access/KerberosInit.h b/src/Access/KerberosInit.h index 5a11a275529..79fc91d913d 100644 --- a/src/Access/KerberosInit.h +++ b/src/Access/KerberosInit.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #include diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index a975a0e1eb6..f228bac5926 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #include diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 7a4feebbe0f..d272eac9d10 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -10,7 +10,7 @@ #include #include -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f726ae77415..429568699cc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,7 +23,6 @@ message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSI include (configure_config.cmake) configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h) configure_file (Common/config_version.h.in ${CONFIG_INCLUDE_PATH}/config_version.h) -configure_file (Core/config_core.h.in "${CMAKE_CURRENT_BINARY_DIR}/Core/include/config_core.h") if (USE_DEBUG_HELPERS) get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES) diff --git a/src/Columns/Collator.cpp b/src/Columns/Collator.cpp index 953e35f40c5..00a8e3b5408 100644 --- a/src/Columns/Collator.cpp +++ b/src/Columns/Collator.cpp @@ -1,6 +1,6 @@ #include -#include "config_core.h" +#include "config.h" #if USE_ICU # include diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index bb4c881c54c..2d15442e583 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -6,7 +6,7 @@ #include #include -#include "config_core.h" +#include "config.h" class Collator; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 70a8a9bce4b..0f388ef8ac3 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -10,7 +10,7 @@ #include #include -#include "config_core.h" +#include "config.h" namespace DB diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 380eb36f87b..19f3dea4f82 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -7,7 +7,7 @@ #include #include -#include "config_core.h" +#include "config.h" class SipHash; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index e8573bcd343..8bd31681706 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -12,7 +12,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_JEMALLOC # include diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 601470347b0..7b2dcf5fc8b 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -36,3 +36,17 @@ #cmakedefine01 USE_ARROW #cmakedefine01 USE_PROTOBUF #cmakedefine01 USE_MSGPACK +#cmakedefine01 USE_ICU +#cmakedefine01 USE_MYSQL +#cmakedefine01 USE_RDKAFKA +#cmakedefine01 USE_AMQPCPP +#cmakedefine01 USE_NATSIO +#cmakedefine01 USE_EMBEDDED_COMPILER +#cmakedefine01 USE_LDAP +#cmakedefine01 USE_ROCKSDB +#cmakedefine01 USE_LIBPQXX +#cmakedefine01 USE_SQLITE +#cmakedefine01 USE_NURAFT +#cmakedefine01 USE_KRB5 +#cmakedefine01 USE_FILELOG +#cmakedefine01 USE_ODBC diff --git a/src/Common/examples/hashes_test.cpp b/src/Common/examples/hashes_test.cpp index e082d03c931..eccf7c9b2e6 100644 --- a/src/Common/examples/hashes_test.cpp +++ b/src/Common/examples/hashes_test.cpp @@ -5,7 +5,7 @@ #include #include #include -#include "config_core.h" +#include "config.h" #if USE_SSL # include #endif diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 7291d42f681..6869c9ccbeb 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 0b443d64786..e5ee4db02a4 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include "config_core.h" +#include "config.h" #if USE_NURAFT diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 08092cf68f1..7a0cee746c6 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -1,7 +1,7 @@ #include #include -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index fa6bfca7c7a..5bb1ecc7c85 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -6,7 +6,7 @@ #include "Coordination/KeeperStorage.h" #include "Core/Defines.h" #include "IO/WriteHelpers.h" -#include "config_core.h" +#include "config.h" #if USE_NURAFT #include diff --git a/src/Core/MySQL/Authentication.h b/src/Core/MySQL/Authentication.h index 26194236aa2..ee6aaac02bc 100644 --- a/src/Core/MySQL/Authentication.h +++ b/src/Core/MySQL/Authentication.h @@ -4,7 +4,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_SSL # include diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index d39659a9953..96cc19babea 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 2fd8717c643..16803c823ba 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 81c94d92141..f8525684e23 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index 1aa173d7137..f179ab14c89 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.h b/src/Core/PostgreSQL/insertPostgreSQLValue.h index dcdd9a4f9b8..b842d86ed47 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.h +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index d3c9a99b8d8..abd3e3c85f8 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -25,7 +25,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER #include diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in deleted file mode 100644 index 0624301d002..00000000000 --- a/src/Core/config_core.h.in +++ /dev/null @@ -1,24 +0,0 @@ -/// This file was autogenerated by CMake - -#pragma once - -#cmakedefine01 USE_ICU -#cmakedefine01 USE_MYSQL -#cmakedefine01 USE_RDKAFKA -#cmakedefine01 USE_AMQPCPP -#cmakedefine01 USE_NATSIO -#cmakedefine01 USE_EMBEDDED_COMPILER -#cmakedefine01 USE_SSL -#cmakedefine01 USE_LDAP -#cmakedefine01 USE_ROCKSDB -#cmakedefine01 USE_LIBPQXX -#cmakedefine01 USE_SQLITE -#cmakedefine01 USE_NURAFT -#cmakedefine01 USE_NLP -#cmakedefine01 USE_KRB5 -#cmakedefine01 USE_SIMDJSON -#cmakedefine01 USE_RAPIDJSON -#cmakedefine01 USE_FILELOG -#cmakedefine01 USE_ODBC -#cmakedefine01 USE_REPLXX -#cmakedefine01 USE_JEMALLOC diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index 1950b1407df..ab00ad0e2e0 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 96db2a17b72..5ce1dee4702 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -18,7 +18,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_MYSQL # include diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 91dbadca409..748cca377df 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 27a7ddc8acf..3698abf5542 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 01c342c1771..80301732ff8 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL # include diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index 5d0a366e5e6..a9c06074237 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index 529940f713c..e78f4aa2234 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.h b/src/Databases/MySQL/FetchTablesColumnsList.h index f039ccdbd69..736a0ffd607 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.h +++ b/src/Databases/MySQL/FetchTablesColumnsList.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index b828c901fbb..e78b7132c8d 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 29f86a05016..604dc220fed 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index 163a3732fb9..4abea5e72df 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 6363e8e07c4..edbef281da4 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index d70e529e4a6..18e4c949121 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h index 3be3aa79078..7cd21d353a2 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX #include diff --git a/src/Databases/SQLite/DatabaseSQLite.h b/src/Databases/SQLite/DatabaseSQLite.h index 8f0c9b4d720..a89fbc32c3d 100644 --- a/src/Databases/SQLite/DatabaseSQLite.h +++ b/src/Databases/SQLite/DatabaseSQLite.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_SQLITE #include diff --git a/src/Databases/SQLite/SQLiteUtils.h b/src/Databases/SQLite/SQLiteUtils.h index 09119b3f145..446ac5fdeef 100644 --- a/src/Databases/SQLite/SQLiteUtils.h +++ b/src/Databases/SQLite/SQLiteUtils.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_SQLITE #include diff --git a/src/Databases/SQLite/fetchSQLiteTableStructure.h b/src/Databases/SQLite/fetchSQLiteTableStructure.h index dbdf35be4bb..0275db1bb3d 100644 --- a/src/Databases/SQLite/fetchSQLiteTableStructure.h +++ b/src/Databases/SQLite/fetchSQLiteTableStructure.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_SQLITE diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h index 840345e3dc2..1d43ebfe2ba 100644 --- a/src/Dictionaries/MySQLDictionarySource.h +++ b/src/Dictionaries/MySQLDictionarySource.h @@ -2,7 +2,7 @@ #include -#include "config_core.h" +#include "config.h" #if USE_MYSQL # include diff --git a/src/Dictionaries/PostgreSQLDictionarySource.h b/src/Dictionaries/PostgreSQLDictionarySource.h index b6a604bc7d3..8ecf56a9430 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.h +++ b/src/Dictionaries/PostgreSQLDictionarySource.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #include "DictionaryStructure.h" #include "IDictionarySource.h" diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index b4809580f5d..5ca03aca8f5 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -6,7 +6,7 @@ #include #include -#include "config_core.h" +#include "config.h" #include "config.h" diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 1375d490cc4..096de7a354f 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -8,7 +8,7 @@ #include #include "config.h" -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h index f4988e3a585..fbe1f6a1a90 100644 --- a/src/Functions/GCDLCMImpl.h +++ b/src/Functions/GCDLCMImpl.h @@ -6,7 +6,7 @@ #include #include -#include "config_core.h" +#include "config.h" namespace DB diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 83b89b85b62..9078b58f6de 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -5,7 +5,7 @@ #include #include -#include "config_core.h" +#include "config.h" #include diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp index 49faae521f7..dbe23fc4bcb 100644 --- a/src/Functions/convertCharset.cpp +++ b/src/Functions/convertCharset.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_ICU # include diff --git a/src/Functions/lemmatize.cpp b/src/Functions/lemmatize.cpp index 873a12baf40..4a44c3a2509 100644 --- a/src/Functions/lemmatize.cpp +++ b/src/Functions/lemmatize.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Functions/normalizeString.cpp b/src/Functions/normalizeString.cpp index a6bec0878f7..3f704ee0613 100644 --- a/src/Functions/normalizeString.cpp +++ b/src/Functions/normalizeString.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_ICU #include diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 202ad1e3971..2fedf7bf098 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #include diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index 50293500b35..9c7ce895fce 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Functions/synonyms.cpp b/src/Functions/synonyms.cpp index 69310ed9680..4ebe61e4b2c 100644 --- a/src/Functions/synonyms.cpp +++ b/src/Functions/synonyms.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index f073dce65fb..76273463dce 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -5,7 +5,7 @@ #include #include -#include "config_core.h" +#include "config.h" namespace DB { diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index a40b1bbcbe9..23845e0424e 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -27,7 +27,7 @@ #include -#include "config_core.h" +#include "config.h" #if USE_JEMALLOC # include diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index a9984e32c1b..828c34d4e03 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -19,7 +19,7 @@ #include -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 67fb256b1c9..25ea31b093d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -21,7 +21,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_MYSQL # include diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 889617e0a22..be63b9e0d78 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -7,7 +7,7 @@ #include -#include "config_core.h" +#include "config.h" namespace DB diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index c37d4d5b6a2..3a2c2e333a9 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 4dd779e3a50..2de0ff13e2b 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -6,7 +6,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_MYSQL # include diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 71d65ee7fed..ebedefb082b 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -12,7 +12,7 @@ #include #include -#include "config_core.h" +#include "config.h" #if USE_MYSQL # include diff --git a/src/Interpreters/InterpreterExternalDDLQuery.cpp b/src/Interpreters/InterpreterExternalDDLQuery.cpp index adceb1fe419..61fbc34784f 100644 --- a/src/Interpreters/InterpreterExternalDDLQuery.cpp +++ b/src/Interpreters/InterpreterExternalDDLQuery.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 56e87d6a4fb..fd6b3de0d93 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -57,7 +57,7 @@ #include #include -#include "config_core.h" +#include "config.h" namespace DB { diff --git a/src/Interpreters/JIT/CHJIT.h b/src/Interpreters/JIT/CHJIT.h index 58da0fcffcc..3efbfa4daf9 100644 --- a/src/Interpreters/JIT/CHJIT.h +++ b/src/Interpreters/JIT/CHJIT.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER diff --git a/src/Interpreters/JIT/CompileDAG.h b/src/Interpreters/JIT/CompileDAG.h index 84bfc738cfc..a05fa629561 100644 --- a/src/Interpreters/JIT/CompileDAG.h +++ b/src/Interpreters/JIT/CompileDAG.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER diff --git a/src/Interpreters/JIT/CompiledExpressionCache.h b/src/Interpreters/JIT/CompiledExpressionCache.h index a2a8141759c..21f7c67226c 100644 --- a/src/Interpreters/JIT/CompiledExpressionCache.h +++ b/src/Interpreters/JIT/CompiledExpressionCache.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER # include diff --git a/src/Interpreters/JIT/compileFunction.h b/src/Interpreters/JIT/compileFunction.h index bcd82ae8bab..0e0a1106698 100644 --- a/src/Interpreters/JIT/compileFunction.h +++ b/src/Interpreters/JIT/compileFunction.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER diff --git a/src/Interpreters/Lemmatizers.cpp b/src/Interpreters/Lemmatizers.cpp index 72f0161b8fd..5044aae083c 100644 --- a/src/Interpreters/Lemmatizers.cpp +++ b/src/Interpreters/Lemmatizers.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Interpreters/Lemmatizers.h b/src/Interpreters/Lemmatizers.h index 936c796bd74..b90555840b2 100644 --- a/src/Interpreters/Lemmatizers.h +++ b/src/Interpreters/Lemmatizers.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 71578bd5db7..9f6e9b930fd 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #include diff --git a/src/Interpreters/SynonymsExtensions.cpp b/src/Interpreters/SynonymsExtensions.cpp index ddc90d04e02..7979c849975 100644 --- a/src/Interpreters/SynonymsExtensions.cpp +++ b/src/Interpreters/SynonymsExtensions.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Interpreters/SynonymsExtensions.h b/src/Interpreters/SynonymsExtensions.h index c4a70b28c60..2d461a0f091 100644 --- a/src/Interpreters/SynonymsExtensions.h +++ b/src/Interpreters/SynonymsExtensions.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_NLP diff --git a/src/Interpreters/examples/jit_example.cpp b/src/Interpreters/examples/jit_example.cpp index d4613b7fad7..c92edf5e12d 100644 --- a/src/Interpreters/examples/jit_example.cpp +++ b/src/Interpreters/examples/jit_example.cpp @@ -1,6 +1,6 @@ #include -#include "config_core.h" +#include "config.h" #if USE_EMBEDDED_COMPILER diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 33f2fcb708c..2498dfdc12b 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -3,7 +3,7 @@ #include #include -#include "config_core.h" +#include "config.h" namespace DB diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp index 4839ce73614..839838c4f54 100644 --- a/src/Parsers/ParserExternalDDLQuery.cpp +++ b/src/Parsers/ParserExternalDDLQuery.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 460dc4b1fb3..06a1a66aa9f 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include "config_core.h" +#include "config.h" #if USE_AVRO diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index dab6d7a3d5b..7e3cd0adefd 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include "config_core.h" +#include "config.h" #if USE_MSGPACK diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index 2de8e9cdc2f..8f63b124f53 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include "config_core.h" +#include "config.h" #if USE_MSGPACK diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index f643899d9fc..e868182f49b 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/Processors/Sources/SQLiteSource.h b/src/Processors/Sources/SQLiteSource.h index a55c8204a16..d792483c70f 100644 --- a/src/Processors/Sources/SQLiteSource.h +++ b/src/Processors/Sources/SQLiteSource.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_SQLITE #include diff --git a/src/Processors/Transforms/PostgreSQLSource.h b/src/Processors/Transforms/PostgreSQLSource.h index 292cfc78d34..312e9f5fb18 100644 --- a/src/Processors/Transforms/PostgreSQLSource.h +++ b/src/Processors/Transforms/PostgreSQLSource.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX #include diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 54978268c19..75a19deb8b2 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include "config_core.h" +#include "config.h" #if USE_NURAFT diff --git a/src/Storages/MergeTree/MergeTreeMetadataCache.h b/src/Storages/MergeTree/MergeTreeMetadataCache.h index 65c5eada200..57fb9ed88c4 100644 --- a/src/Storages/MergeTree/MergeTreeMetadataCache.h +++ b/src/Storages/MergeTree/MergeTreeMetadataCache.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_ROCKSDB #include diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.h b/src/Storages/MergeTree/PartMetadataManagerWithCache.h index 791681ee5bb..e4505fb9462 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.h +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_ROCKSDB #include diff --git a/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp b/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp index 33a82845545..d2b7561749d 100644 --- a/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp +++ b/src/Storages/MergeTree/tests/gtest_merge_tree_metadata_cache.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_ROCKSDB #include diff --git a/src/Storages/MySQL/MySQLHelpers.h b/src/Storages/MySQL/MySQLHelpers.h index 59052be5c2a..57b564c360c 100644 --- a/src/Storages/MySQL/MySQLHelpers.h +++ b/src/Storages/MySQL/MySQLHelpers.h @@ -1,5 +1,5 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h index da1ca46b5b6..b878493b04d 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX #include diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index f1eea33d4b0..d8e9e98c662 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX #include "PostgreSQLReplicationHandler.h" diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.h b/src/Storages/ReadFinalForExternalReplicaStorage.h index b922faa7361..178164b6643 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.h +++ b/src/Storages/ReadFinalForExternalReplicaStorage.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL || USE_LIBPQXX diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index 1fb67e4e96f..52a2a7a4106 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #include diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index a7e54960563..bb69f211a9e 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 18375f3915e..a66b7eba804 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index e3c59adf71c..e3c0712c179 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 92ae24d929b..0755e33269e 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX #include diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index d1181705b21..b0f209b5bc3 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_SQLITE #include diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.h b/src/Storages/System/StorageSystemMergeTreeMetadataCache.h index 505ebed01ba..4603583227e 100644 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.h +++ b/src/Storages/System/StorageSystemMergeTreeMetadataCache.h @@ -1,6 +1,6 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_ROCKSDB #include diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index ab1ffdf209a..d3b81f4d1f9 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #include #include diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index fb5b2efb0fc..526c2d72700 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -2,7 +2,7 @@ #include #include "config.h" -#include "config_core.h" +#include "config.h" #include "config.h" namespace DB diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index c67d6b3b652..ab1c23afa7a 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -1,4 +1,4 @@ -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/TableFunctions/TableFunctionMySQL.h b/src/TableFunctions/TableFunctionMySQL.h index 876dd43f598..794e8632ae2 100644 --- a/src/TableFunctions/TableFunctionMySQL.h +++ b/src/TableFunctions/TableFunctionMySQL.h @@ -1,5 +1,5 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_MYSQL #include diff --git a/src/TableFunctions/TableFunctionPostgreSQL.h b/src/TableFunctions/TableFunctionPostgreSQL.h index 7b33998a967..a5971b18d2f 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.h +++ b/src/TableFunctions/TableFunctionPostgreSQL.h @@ -1,5 +1,5 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_LIBPQXX #include diff --git a/src/TableFunctions/TableFunctionSQLite.h b/src/TableFunctions/TableFunctionSQLite.h index e80e04260bc..fded5646b39 100644 --- a/src/TableFunctions/TableFunctionSQLite.h +++ b/src/TableFunctions/TableFunctionSQLite.h @@ -1,5 +1,5 @@ #pragma once -#include "config_core.h" +#include "config.h" #if USE_SQLITE #include diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 25690c29b76..ad83c516f8e 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include "config_core.h" +#include "config.h" namespace DB { From f24fab774730acac005527b3d2a04de9d186f1d4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Sep 2022 13:49:28 +0000 Subject: [PATCH 068/266] Fix some #include atrocities --- src/Client/ClientBase.cpp | 4 +++- src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/Coordination/KeeperDispatcher.h | 1 - src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 2 +- src/Functions/DivisionUtils.h | 1 - src/Functions/FunctionsHashing.h | 1 - src/Functions/MatchImpl.h | 1 - src/Functions/MultiMatchAllIndicesImpl.h | 1 - src/Functions/MultiMatchAnyImpl.h | 1 - src/Functions/Regexps.h | 1 - src/Functions/ReplaceRegexpImpl.h | 1 - src/Processors/Formats/Impl/AvroRowInputFormat.h | 1 - src/Processors/Formats/Impl/MsgPackRowInputFormat.h | 1 - src/Processors/Formats/Impl/MsgPackRowOutputFormat.h | 1 - src/Server/KeeperTCPHandler.h | 1 - src/Storages/Kafka/StorageKafka.cpp | 3 ++- src/Storages/registerStorages.cpp | 2 -- src/TableFunctions/registerTableFunctions.h | 1 - 18 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 8925acdfa6d..c27effe20a0 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -6,6 +6,8 @@ #include #include +#include "config.h" + #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include "config.h" #include #include #include @@ -25,6 +26,7 @@ #include #include "config_version.h" + #include #include #include diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 965bed8e755..a9550c9e28d 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -1,5 +1,5 @@ -#include #include "config.h" +#include #include #include #include diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index e5ee4db02a4..3b524b24ed7 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include "config.h" #if USE_NURAFT diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index d5d78b2a324..a9a3585feff 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -1,12 +1,12 @@ #include "ThreadPoolRemoteFSReader.h" +#include "config.h" #include #include #include #include #include #include -#include "config.h" #include #include diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index 5ca03aca8f5..e120595c4d9 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -7,7 +7,6 @@ #include #include "config.h" -#include "config.h" namespace DB diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 096de7a354f..0ae6dea6506 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -7,7 +7,6 @@ #include #include -#include "config.h" #include "config.h" #include diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 92233790bb1..6862a097d0e 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -7,7 +7,6 @@ #include #include "Regexps.h" -#include "config.h" #include "config.h" #include diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index fa724bae005..3490c854f22 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -8,7 +8,6 @@ #include #include "Regexps.h" -#include "config.h" #include "config.h" #if USE_VECTORSCAN diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 32318a27ea1..2d4db261bb4 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -7,7 +7,6 @@ #include #include "Regexps.h" -#include "config.h" #include "config.h" #if USE_VECTORSCAN diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index f4e4f924678..1e40c845788 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -11,7 +11,6 @@ #include #include #include -#include "config.h" #include #include #include diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 72282278062..88bc48a6d8c 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -5,7 +5,6 @@ #include #include -#include "config.h" #include "config.h" #include diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 06a1a66aa9f..3a029232420 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include "config.h" #if USE_AVRO diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 7e3cd0adefd..64bb8b569e0 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include "config.h" #if USE_MSGPACK diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index 8f63b124f53..81943b5f73c 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include "config.h" #if USE_MSGPACK diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 75a19deb8b2..e9bd211628f 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include "config.h" #if USE_NURAFT diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 28eb85ab6ad..fa52850fb39 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -35,12 +35,13 @@ #include #include #include -#include "config_version.h" #include #include #include #include +#include "config_version.h" + #include #include #if USE_KRB5 diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 526c2d72700..f9c8a735616 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -1,8 +1,6 @@ #include #include -#include "config.h" -#include "config.h" #include "config.h" namespace DB diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index ad83c516f8e..03d9c30b258 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include "config.h" namespace DB From 4f427e5b805ce13b5220efb6320a1adeb11c4db5 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Sep 2022 16:34:26 +0200 Subject: [PATCH 069/266] Truncate files on first insert --- .../0_stateless/02267_file_globs_schema_inference.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index b51c0cf6fa1..b2a2997beab 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -1,11 +1,11 @@ -- Tags: no-fasttest, no-parallel -insert into function file('02267_data2.jsonl') select NULL as x; -insert into function file('02267_data3.jsonl') select * from numbers(0); -insert into function file('02267_data4.jsonl') select 1 as x; +insert into function file('02267_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1; +insert into function file('02267_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1; +insert into function file('02267_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1; select * from file('02267_data*.jsonl') order by x; insert into function file('02267_data4.jsonl', 'TSV') select 1 as x; -insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; +insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1; select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} From 9c1e65458426e2c9bf99938d4def19d88c8516f0 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Sep 2022 16:38:04 +0200 Subject: [PATCH 070/266] Fix typo --- docs/en/interfaces/formats.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 999d659329b..58e986cc2f3 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1030,7 +1030,7 @@ Let's say we have table `test` with two columns: โ”Œโ”€object_nameโ”€โ”ฌโ”€numberโ”€โ” โ”‚ first_obj โ”‚ 1 โ”‚ โ”‚ second_obj โ”‚ 2 โ”‚ -โ”‚ trhird_obj โ”‚ 3 โ”‚ +โ”‚ third_obj โ”‚ 3 โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` Let's output it in `JSONObjectEachRow` format and use `format_json_object_each_row_column_for_object_name` setting: @@ -1044,7 +1044,7 @@ The output: { "first_obj": {"number": 1}, "second_obj": {"number": 2}, - "trhird_obj": {"number": 3} + "third_obj": {"number": 3} } ``` @@ -1059,7 +1059,7 @@ select * from file('data.json', JSONObjectEachRow, 'object_name String, number U โ”Œโ”€object_nameโ”€โ”ฌโ”€numberโ”€โ” โ”‚ first_obj โ”‚ 1 โ”‚ โ”‚ second_obj โ”‚ 2 โ”‚ -โ”‚ trhird_obj โ”‚ 3 โ”‚ +โ”‚ third_obj โ”‚ 3 โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` From 4ac5df25400d23234fce6dbe7bad365c1350c5a3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Sep 2022 14:52:10 +0000 Subject: [PATCH 071/266] drop all fuzzed tables --- docker/test/fuzzer/allow-nullable-key.xml | 2 +- docker/test/fuzzer/run-fuzzer.sh | 2 +- programs/client/Client.cpp | 10 +++---- src/Client/QueryFuzzer.cpp | 32 ++++++++++++++++------- src/Client/QueryFuzzer.h | 2 ++ 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/docker/test/fuzzer/allow-nullable-key.xml b/docker/test/fuzzer/allow-nullable-key.xml index 5a0c2c20e1c..331012a2254 100644 --- a/docker/test/fuzzer/allow-nullable-key.xml +++ b/docker/test/fuzzer/allow-nullable-key.xml @@ -3,4 +3,4 @@ 1 - \ No newline at end of file + diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index fd7acf88e05..7248728864e 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -241,7 +241,7 @@ quit --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ - --create-query-fuzzer-runs=30 \ + --create-query-fuzzer-runs=50 \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 175b7a0fbd3..d44827d7bec 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -627,7 +627,7 @@ bool Client::processWithFuzzing(const String & full_query) } else if (const auto * create = orig_ast->as()) { - if (create->columns_list) + if (QueryFuzzer::isSuitableForFuzzing(*create)) this_query_runs = create_query_fuzzer_runs; else this_query_runs = 1; @@ -821,18 +821,18 @@ bool Client::processWithFuzzing(const String & full_query) } } - for (const auto & insert_query : queries_for_fuzzed_tables) + for (const auto & query : queries_for_fuzzed_tables) { std::cout << std::endl; WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*insert_query, ast_buf, false /*highlight*/); + formatAST(*query, ast_buf, false /*highlight*/); ast_buf.next(); std::cout << std::endl << std::endl; try { - query_to_execute = insert_query->formatForErrorMessage(); - if (auto res = processFuzzingStep(query_to_execute, insert_query)) + query_to_execute = query->formatForErrorMessage(); + if (auto res = processFuzzingStep(query_to_execute, query)) return *res; } catch (...) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 9986d81cd7b..3f786f84e2a 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -448,6 +448,11 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def) } } +bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create) +{ + return create.columns_list && create.columns_list->columns; +} + void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) { if (create.columns_list && create.columns_list->columns) @@ -485,6 +490,8 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) IAST::Hash hash; sip_hash.get128(hash); + + /// Save only tables with unique definition. if (created_tables_hashes.insert(hash).second) original_table_name_to_fuzzed[original_name].push_back(new_name); } @@ -502,7 +509,7 @@ void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column) DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) { - /// Do not replace Array with not Array to often. + /// Do not replace Array/Tuple/etc. with not Array/Tuple too often. const auto * type_array = typeid_cast(type.get()); if (type_array && fuzz_rand() % 5 != 0) return std::make_shared(fuzzDataType(type_array->getNestedType())); @@ -560,14 +567,17 @@ DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) } } - size_t tmp = fuzz_rand() % 10; + size_t tmp = fuzz_rand() % 8; + if (tmp == 0) + return std::make_shared(type); + if (tmp <= 1 && type->canBeInsideNullable()) return std::make_shared(type); - if (tmp <= 3 && type->canBeInsideLowCardinality()) + if (tmp <= 2 && type->canBeInsideLowCardinality()) return std::make_shared(type); - if (tmp == 4) + if (tmp <= 3) return getRandomType(); return type; @@ -594,7 +604,7 @@ DataTypePtr QueryFuzzer::getRandomType() if (type_id == TypeIndex::DECIMAL) \ return std::make_shared>( \ DataTypeDecimal::maxPrecision(), \ - fuzz_rand() % DataTypeDecimal::maxPrecision() + 1); + (fuzz_rand() % DataTypeDecimal::maxPrecision()) + 1); DISPATCH(Decimal32) DISPATCH(Decimal64) @@ -667,7 +677,7 @@ ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) { /// Parse query from scratch for each table instead of clone, /// to store proper pointers to inlined data, - /// which are not copies during clone. + /// which are not copied during clone. auto & query = queries.emplace_back(tryParseInsertQuery(full_query)); query->as()->setTable(fuzzed_name); } @@ -681,15 +691,19 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query) return {}; auto table_name = drop_query.getTable(); - auto it = original_table_name_to_fuzzed.find(table_name); - if (it == original_table_name_to_fuzzed.end()) + auto it = index_of_fuzzed_table.find(table_name); + if (it == index_of_fuzzed_table.end()) return {}; ASTs queries; - for (const auto & fuzzed_name : it->second) + /// Drop all created tables, not only unique ones. + for (size_t i = 0; i < it->second; ++i) { + auto fuzzed_name = table_name + "__fuzz_" + toString(i); auto & query = queries.emplace_back(drop_query.clone()); query->as()->setTable(fuzzed_name); + /// Just in case add IF EXISTS to avoid exceptions. + query->as()->if_exists = true; } return queries; diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h index ffe9fc7c91e..3771d2bc61a 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -91,6 +91,8 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } From 6bb166b79b520636ca67796bb52713b5b35a5a31 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Wed, 28 Sep 2022 17:41:51 +0200 Subject: [PATCH 072/266] exception replaced by nullptr --- src/Backups/BackupEntriesCollector.cpp | 11 +++------ src/Storages/IStorage.cpp | 4 ++-- src/Storages/System/StorageSystemColumns.cpp | 23 ++++--------------- .../System/StorageSystemPartsBase.cpp | 22 +++++------------- src/Storages/System/StorageSystemTables.cpp | 19 +++++---------- 5 files changed, 22 insertions(+), 57 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 22245f7056a..53a1892302a 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -27,7 +27,6 @@ namespace ErrorCodes { extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_BACKUP_TABLE; - extern const int TABLE_IS_DROPPED; extern const int UNKNOWN_TABLE; extern const int LOGICAL_ERROR; } @@ -526,14 +525,10 @@ void BackupEntriesCollector::lockTablesForReading() auto storage = table_info.storage; if (storage) { - try + table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + if (table_info.table_lock == nullptr) { - table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - } - catch (Exception & e) - { - if (e.code() != ErrorCodes::TABLE_IS_DROPPED) - throw; + // Table was dropped while acquiring the lock throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} was dropped during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)); } } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 8bbb2fa1c04..94d619a4b9b 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -55,8 +55,8 @@ TableLockHolder IStorage::lockForShare(const String & query_id, const std::chron if (is_dropped) { - auto table_id = getStorageID(); - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); + // Table was dropped while acquiring the lock + result = nullptr; } return result; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 81e3e6365a7..42eff5f5773 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -20,10 +20,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int TABLE_IS_DROPPED; -} StorageSystemColumns::StorageSystemColumns(const StorageID & table_id_) : IStorage(table_id_) @@ -113,21 +109,12 @@ protected: StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); TableLockHolder table_lock; - try + table_lock = storage->lockForShare(query_id, lock_acquire_timeout); + + if (table_lock == nullptr) { - table_lock = storage->lockForShare(query_id, lock_acquire_timeout); - } - catch (const Exception & e) - { - /** There are case when IStorage::drop was called, - * but we still own the object. - * Then table will throw exception at attempt to lock it. - * Just skip the table. - */ - if (e.code() == ErrorCodes::TABLE_IS_DROPPED) - continue; - else - throw; + // Table was dropped while acquiring the lock, skipping table + continue; } auto metadata_snapshot = storage->getInMemoryMetadataPtr(); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b272c080262..908463dbb61 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -24,7 +24,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TABLE_IS_DROPPED; } bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) @@ -220,22 +219,13 @@ StoragesInfo StoragesInfoStream::next() info.storage = storages.at(std::make_pair(info.database, info.table)); - try - { - /// For table not to be dropped and set of columns to remain constant. - info.table_lock = info.storage->lockForShare(query_id, settings.lock_acquire_timeout); - } - catch (const Exception & e) - { - /** There are case when IStorage::drop was called, - * but we still own the object. - * Then table will throw exception at attempt to lock it. - * Just skip the table. - */ - if (e.code() == ErrorCodes::TABLE_IS_DROPPED) - continue; + /// For table not to be dropped and set of columns to remain constant. + info.table_lock = info.storage->lockForShare(query_id, settings.lock_acquire_timeout); - throw; + if (info.table_lock == nullptr) + { + // Table was dropped while acquiring the lock, skipping table + continue; } info.engine = info.storage->getName(); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 5eb69f3442b..fabf4a73f6e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -24,11 +24,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int TABLE_IS_DROPPED; -} - StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) @@ -303,15 +298,13 @@ protected: // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) continue; } - try + + lock = table->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); + + if (lock == nullptr) { - lock = table->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::TABLE_IS_DROPPED) - continue; - throw; + // Table was dropped while acquiring the lock, skipping table + continue; } } From 34bc16cd5b4364c5531076eaeb3f18595582d4df Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Sep 2022 15:55:39 +0000 Subject: [PATCH 073/266] avoid more useless errors --- programs/client/Client.cpp | 1 + src/Client/QueryFuzzer.cpp | 38 +++++++++++++++++-- src/Client/QueryFuzzer.h | 3 +- .../MergeTree/registerStorageMergeTree.cpp | 2 +- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d44827d7bec..9171f58b2e8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -805,6 +805,7 @@ bool Client::processWithFuzzing(const String & full_query) // so that it doesn't influence the exit code. server_exception.reset(); client_exception.reset(); + fuzzer.notifyQueryFailed(ast_to_process); have_error = false; } else if (ast_to_process->formatForErrorMessage().size() > 500) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 3f786f84e2a..f7309695920 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -468,9 +468,21 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) if (create.storage && create.storage->engine) { + /// Replace ReplicatedMergeTree to ordinary MergeTree + /// to avoid inconsistency of metadata in zookeeper. auto & engine_name = create.storage->engine->name; if (startsWith(engine_name, "Replicated")) + { engine_name = engine_name.substr(strlen("Replicated")); + if (auto & arguments = create.storage->engine->arguments) + { + auto & children = arguments->children; + if (children.size() <= 2) + arguments.reset(); + else + children.erase(children.begin(), children.begin() + 2); + } + } } auto full_name = create.getTable(); @@ -493,7 +505,7 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) /// Save only tables with unique definition. if (created_tables_hashes.insert(hash).second) - original_table_name_to_fuzzed[original_name].push_back(new_name); + original_table_name_to_fuzzed[original_name].insert(new_name); } void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column) @@ -640,8 +652,9 @@ void QueryFuzzer::fuzzTableName(ASTTableExpression & table) auto it = original_table_name_to_fuzzed.find(table_id.getTableName()); if (it != original_table_name_to_fuzzed.end() && !it->second.empty()) { - const auto & new_table_name = it->second[fuzz_rand() % it->second.size()]; - StorageID new_table_id(table_id.database_name, new_table_name); + auto new_table_name = it->second.begin(); + std::advance(new_table_name, fuzz_rand() % it->second.size()); + StorageID new_table_id(table_id.database_name, *new_table_name); table.database_and_table_name = std::make_shared(new_table_id); } } @@ -709,6 +722,25 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query) return queries; } +void QueryFuzzer::notifyQueryFailed(ASTPtr ast) +{ + auto remove_fuzzed_table = [this](const auto & table_name) + { + auto pos = table_name.find("__fuzz_"); + if (pos != std::string::npos) + { + auto original_name = table_name.substr(0, pos); + original_table_name_to_fuzzed[original_name].erase(table_name); + } + }; + + if (const auto * create = ast->as()) + remove_fuzzed_table(create->getTable()); + + if (const auto * insert = ast->as()) + remove_fuzzed_table(insert->getTable()); +} + void QueryFuzzer::fuzz(ASTs & asts) { for (auto & ast : asts) diff --git a/src/Client/QueryFuzzer.h b/src/Client/QueryFuzzer.h index 3771d2bc61a..9afe7867dd2 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -60,7 +60,7 @@ struct QueryFuzzer std::unordered_set debug_visited_nodes; ASTPtr * debug_top_ast = nullptr; - std::unordered_map> original_table_name_to_fuzzed; + std::unordered_map> original_table_name_to_fuzzed; std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; @@ -76,6 +76,7 @@ struct QueryFuzzer DataTypePtr getRandomType(); ASTs getInsertQueriesForFuzzedTables(const String & full_query); ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 6982521f76a..7e2d5e1727b 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -265,7 +265,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (max_num_params == 0) msg += "no parameters"; - if (min_num_params == max_num_params) + else if (min_num_params == max_num_params) msg += fmt::format("{} parameters: {}", min_num_params, needed_params); else msg += fmt::format("{} to {} parameters: {}", min_num_params, max_num_params, needed_params); From 65b161341ce601f1a539f0487757298f7a50d305 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Wed, 28 Sep 2022 18:08:10 +0200 Subject: [PATCH 074/266] Replaced changed functions for tryLockForShare --- src/Backups/BackupEntriesCollector.cpp | 2 +- src/Storages/IStorage.cpp | 12 ++++++++++++ src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 53a1892302a..73f78a13765 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -525,7 +525,7 @@ void BackupEntriesCollector::lockTablesForReading() auto storage = table_info.storage; if (storage) { - table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + table_info.table_lock = storage->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); if (table_info.table_lock == nullptr) { // Table was dropped while acquiring the lock diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 94d619a4b9b..e4bcd2b5f04 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -53,6 +53,18 @@ TableLockHolder IStorage::lockForShare(const String & query_id, const std::chron { TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); + if (is_dropped) + { + auto table_id = getStorageID(); + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); + } + return result; +} + +TableLockHolder IStorage::tryLockForShare(const String & query_id, const std::chrono::milliseconds & acquire_timeout) +{ + TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); + if (is_dropped) { // Table was dropped while acquiring the lock diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 42eff5f5773..20cab9fdc47 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -109,7 +109,7 @@ protected: StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); TableLockHolder table_lock; - table_lock = storage->lockForShare(query_id, lock_acquire_timeout); + table_lock = storage->tryLockForShare(query_id, lock_acquire_timeout); if (table_lock == nullptr) { diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 908463dbb61..bcfd670ece9 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -220,7 +220,7 @@ StoragesInfo StoragesInfoStream::next() info.storage = storages.at(std::make_pair(info.database, info.table)); /// For table not to be dropped and set of columns to remain constant. - info.table_lock = info.storage->lockForShare(query_id, settings.lock_acquire_timeout); + info.table_lock = info.storage->tryLockForShare(query_id, settings.lock_acquire_timeout); if (info.table_lock == nullptr) { diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index fabf4a73f6e..e36b22a979e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -299,7 +299,7 @@ protected: continue; } - lock = table->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); + lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); if (lock == nullptr) { From 261ab8e1d084caa2f80e1257fda993a83bb8a67e Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Wed, 28 Sep 2022 18:59:07 +0200 Subject: [PATCH 075/266] Fixed style --- src/Storages/IStorage.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index e4bcd2b5f04..5b12b720f1c 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -54,10 +54,10 @@ TableLockHolder IStorage::lockForShare(const String & query_id, const std::chron TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); if (is_dropped) - { - auto table_id = getStorageID(); - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); - } + { + auto table_id = getStorageID(); + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); + } return result; } From 986f00f6d9228d2d5a989d6b463ae627d32ef00f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Sep 2022 21:06:50 +0000 Subject: [PATCH 076/266] clear fuzzed tables after drop --- src/Client/QueryFuzzer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index f7309695920..1fa7de65ce9 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -719,6 +719,9 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query) query->as()->if_exists = true; } + index_of_fuzzed_table.erase(it); + original_table_name_to_fuzzed.erase(table_name); + return queries; } From 14119fc30673e7073917604b4d7cb0a660fd754a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Sep 2022 21:10:41 +0000 Subject: [PATCH 077/266] more often mutations for complex type --- src/Client/QueryFuzzer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 1fa7de65ce9..a1e6565fe98 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -523,11 +523,11 @@ DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) { /// Do not replace Array/Tuple/etc. with not Array/Tuple too often. const auto * type_array = typeid_cast(type.get()); - if (type_array && fuzz_rand() % 5 != 0) + if (type_array && fuzz_rand() % 4 != 0) return std::make_shared(fuzzDataType(type_array->getNestedType())); const auto * type_tuple = typeid_cast(type.get()); - if (type_tuple && fuzz_rand() % 5 != 0) + if (type_tuple && fuzz_rand() % 4 != 0) { DataTypes elements; for (const auto & element : type_tuple->getElements()) @@ -539,7 +539,7 @@ DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) } const auto * type_map = typeid_cast(type.get()); - if (type_map && fuzz_rand() % 5 != 0) + if (type_map && fuzz_rand() % 4 != 0) { auto key_type = fuzzDataType(type_map->getKeyType()); auto value_type = fuzzDataType(type_map->getValueType()); From c97bec829aa5d30eca7378723bed93a506738af8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Sep 2022 21:47:10 +0000 Subject: [PATCH 078/266] slightly better --- programs/client/Client.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9171f58b2e8..cc0acfeab0b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -846,6 +846,7 @@ bool Client::processWithFuzzing(const String & full_query) { server_exception.reset(); client_exception.reset(); + fuzzer.notifyQueryFailed(query); have_error = false; } } From 5e40f2ebcaf222ea4bc82aca4314c65551245c92 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 29 Sep 2022 00:13:40 +0000 Subject: [PATCH 079/266] review suggestions --- programs/server/Server.cpp | 92 ++++++++++++------------- src/Server/TCPProtocolStackData.h | 5 ++ src/Server/TCPProtocolStackFactory.h | 3 +- src/Server/TCPProtocolStackHandler.h | 5 +- src/Server/TCPServerConnectionFactory.h | 2 +- 5 files changed, 56 insertions(+), 51 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 04c708e3a0f..3154af81ae8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1928,62 +1928,62 @@ void Server::createServers( std::string prefix = conf_name + "."; std::unordered_set pset {conf_name}; - if (config.has(prefix + "port")) + if (!config.has(prefix + "port")) + continue; + + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + std::string port_name = prefix + "port"; + bool is_secure = false; + auto stack = std::make_unique(*this, conf_name); + + while (true) { - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; - bool is_secure = false; - auto stack = std::make_unique(*this, conf_name); - - while (true) + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) + std::string type = config.getString(prefix + "type"); + if (type == "tls") { - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; - } - - stack->append(create_factory(type, conf_name)); + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; } - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + stack->append(create_factory(type, conf_name)); } - if (!stack || stack->size() == 0) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + if (!config.has(prefix + "impl")) + break; - createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, host, port, is_secure); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; - return ProtocolServerAdapter( - host, - port_name.c_str(), - description + ": " + address.toString(), - std::make_unique( - stack.release(), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); } + + if (stack->empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + + createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); } } diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h index f2d00d8a845..4ad401e723f 100644 --- a/src/Server/TCPProtocolStackData.h +++ b/src/Server/TCPProtocolStackData.h @@ -6,11 +6,16 @@ namespace DB { +// Data to communicate between protocol layers struct TCPProtocolStackData { + // socket implementation can be replaced by some layer - TLS as an example Poco::Net::StreamSocket socket; + // host from PROXY layer std::string forwarded_for; + // certificate path from TLS layer to TCP layer std::string certificate; + // default database from endpoint configuration to TCP layer std::string default_database; }; diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index c0ec29411d4..448b019b849 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -25,7 +25,7 @@ private: IServer & server [[maybe_unused]]; Poco::Logger * log; std::string conf_name; - std::list stack; + std::vector stack; AllowedClientHosts allowed_client_hosts; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -85,6 +85,7 @@ public: } size_t size() { return stack.size(); } + bool empty() { return stack.empty(); } }; diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h index 9ca388da17b..e16a6b6b2ca 100644 --- a/src/Server/TCPProtocolStackHandler.h +++ b/src/Server/TCPProtocolStackHandler.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -19,11 +18,11 @@ class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection private: IServer & server; TCPServer & tcp_server; - std::list stack; + std::vector stack; std::string conf_name; public: - TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::vector & stack_, const std::string & conf_name_) : TCPServerConnection(socket), server(server_), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) {} diff --git a/src/Server/TCPServerConnectionFactory.h b/src/Server/TCPServerConnectionFactory.h index ab9b0848ed7..18b30557b00 100644 --- a/src/Server/TCPServerConnectionFactory.h +++ b/src/Server/TCPServerConnectionFactory.h @@ -1,7 +1,7 @@ #pragma once #include -#include "Server/TCPProtocolStackData.h" +#include namespace Poco { From 981f2e119f4a54ccd8b18acfaa67c358b784f8b4 Mon Sep 17 00:00:00 2001 From: Alfonso Martinez Date: Thu, 29 Sep 2022 08:44:10 +0200 Subject: [PATCH 080/266] Forgot to add function to header --- src/Storages/IStorage.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index a61bfeaff57..242f17d6f20 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -256,12 +256,16 @@ protected: const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const; public: - /// Lock table for share. This lock must be acuqired if you want to be sure, + /// Lock table for share. This lock must be acquired if you want to be sure, /// that table will be not dropped while you holding this lock. It's used in /// variety of cases starting from SELECT queries to background merges in /// MergeTree. TableLockHolder lockForShare(const String & query_id, const std::chrono::milliseconds & acquire_timeout); + /// Similar to lockForShare, but returns a nullptr if the table is dropped while + /// acquiring the lock instead of raising a TABLE_IS_DROPPED exception + TableLockHolder tryLockForShare(const String & query_id, const std::chrono::milliseconds & acquire_timeout); + /// Lock table for alter. This lock must be acuqired in ALTER queries to be /// sure, that we execute only one simultaneous alter. Doesn't affect share lock. using AlterLockHolder = std::unique_lock; From a3009ed9e44c9878bc30b98174a84e50dee24ca1 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 30 Sep 2022 13:54:05 +0000 Subject: [PATCH 081/266] fix clang-tidy --- src/Client/QueryFuzzer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index a1e6565fe98..77b13eb0f48 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -611,7 +611,7 @@ DataTypePtr QueryFuzzer::getRandomType() if (type_id == TypeIndex::Array) return std::make_shared(getRandomType()); -/// NOLINTNEXTLINE +/// NOLINTBEGIN(bugprone-macro-parentheses) #define DISPATCH(DECIMAL) \ if (type_id == TypeIndex::DECIMAL) \ return std::make_shared>( \ @@ -623,6 +623,7 @@ DataTypePtr QueryFuzzer::getRandomType() DISPATCH(Decimal128) DISPATCH(Decimal256) #undef DISPATCH +/// NOLINTEND(bugprone-macro-parentheses) if (type_id == TypeIndex::FixedString) return std::make_shared(fuzz_rand() % 20); From bb1771e159760365ad1bd9ff221b9ce4b9246e31 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 30 Sep 2022 10:35:49 -0700 Subject: [PATCH 082/266] Update the macro ENABLE_CH_BUNDLE_BORINGSSLL to ENABLE_EXTERNAL_OPENSSL --- CMakeLists.txt | 8 ++++---- contrib/CMakeLists.txt | 2 +- contrib/krb5-cmake/CMakeLists.txt | 2 +- contrib/libpq-cmake/CMakeLists.txt | 2 +- src/CMakeLists.txt | 2 +- src/configure_config.cmake | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 52626c7badf..47bdbafb0d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -495,16 +495,16 @@ endif () enable_testing() # Enable for tests without binary -option(ENABLE_CH_BUNDLE_BORINGSSL "Provide the user to allow building of OpenSSL library. By default, uses in-house ClickHouse BoringSSL" ON) +option(ENABLE_EXTERNAL_OPENSSL "Provide the user to allow building of OpenSSL library. By default, uses in-house ClickHouse BoringSSL. It is not recommended and may be insecure" OFF) -message (STATUS "ENABLE_CH_BUNDLE_BORINGSSL: ${ENABLE_CH_BUNDLE_BORINGSSL}") -if (ENABLE_CH_BUNDLE_BORINGSSL) +message (STATUS "ENABLE_EXTERNAL_OPENSSL: ${ENABLE_EXTERNAL_OPENSSL}") +if (NOT ENABLE_EXTERNAL_OPENSSL) message (STATUS "Uses in-house ClickHouse BoringSSL library") else () message (STATUS "Build and uses OpenSSL library instead of BoringSSL") endif () -if (NOT ENABLE_CH_BUNDLE_BORINGSSL) +if (ENABLE_EXTERNAL_OPENSSL) set(ENABLE_SSL 1) target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index a1baca69c1c..06e697e9dbf 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -74,7 +74,7 @@ add_contrib (re2-cmake re2) add_contrib (xz-cmake xz) add_contrib (brotli-cmake brotli) add_contrib (double-conversion-cmake double-conversion) -if (ENABLE_CH_BUNDLE_BORINGSSL) +if (NOT ENABLE_EXTERNAL_OPENSSL) add_contrib (boringssl-cmake boringssl) else () add_contrib (openssl-cmake openssl) diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 95f8e7e0d21..8478def3cb1 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -578,7 +578,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c") endif() -if (NOT ENABLE_CH_BUNDLE_BORINGSSL) +if (ENABLE_EXTERNAL_OPENSSL) list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c") list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c") endif () diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 26ece28bd18..1f02f24bb24 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -59,7 +59,7 @@ set(SRCS add_library(_libpq ${SRCS}) -if (NOT ENABLE_CH_BUNDLE_BORINGSSL) +if (ENABLE_EXTERNAL_OPENSSL) add_definitions(-DHAVE_BIO_METH_NEW) add_definitions(-DHAVE_HMAC_CTX_NEW) add_definitions(-DHAVE_HMAC_CTX_FREE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0246486110..2cf83eb0d01 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -247,7 +247,7 @@ add_object_library(clickhouse_access Access) add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) add_object_library(clickhouse_core_mysql Core/MySQL) -if (ENABLE_CH_BUNDLE_BORINGSSL) +if (NOT ENABLE_EXTERNAL_OPENSSL) add_object_library(clickhouse_compression Compression) else () add_headers_and_sources(dbms Compression) diff --git a/src/configure_config.cmake b/src/configure_config.cmake index a2ab5bc82e5..9efa1127c64 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -103,6 +103,6 @@ endif() if (TARGET ch_contrib::jemalloc) set(USE_JEMALLOC 1) endif() -if (ENABLE_CH_BUNDLE_BORINGSSL) +if (NOT ENABLE_EXTERNAL_OPENSSL) set(USE_BORINGSSL 1) endif () From 1fd20ec8e2a9aa083406a2552f1e4b9d4b32c69d Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sun, 2 Oct 2022 14:48:20 +0800 Subject: [PATCH 083/266] add tryDecrypt to 02415 tests --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c7ac00ee18f..dea49449cf3 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -909,6 +909,7 @@ trimLeft trimRight trunc tryBase58Decode +tryDecrypt tumble tumbleEnd tumbleStart From acfc0cf0ab49f888a1e451a44d7ac856a3e9e1bb Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 3 Oct 2022 18:06:54 +0800 Subject: [PATCH 084/266] add doc for tryDecrypt --- src/Functions/decrypt.cpp | 8 ----- src/Functions/tryDecrypt.cpp | 32 +++++++++++++++++++ ...new_functions_must_be_documented.reference | 1 - 3 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 src/Functions/tryDecrypt.cpp diff --git a/src/Functions/decrypt.cpp b/src/Functions/decrypt.cpp index 664e071e858..db9e3e0eb67 100644 --- a/src/Functions/decrypt.cpp +++ b/src/Functions/decrypt.cpp @@ -15,13 +15,6 @@ struct DecryptImpl static constexpr bool use_null_when_decrypt_fail = false; }; -struct TryDecryptImpl -{ - static constexpr auto name = "tryDecrypt"; - static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL; - static constexpr bool use_null_when_decrypt_fail = true; -}; - } namespace DB @@ -30,7 +23,6 @@ namespace DB REGISTER_FUNCTION(Decrypt) { factory.registerFunction>(); - factory.registerFunction>(); } } diff --git a/src/Functions/tryDecrypt.cpp b/src/Functions/tryDecrypt.cpp new file mode 100644 index 00000000000..51097b0e8ec --- /dev/null +++ b/src/Functions/tryDecrypt.cpp @@ -0,0 +1,32 @@ +#include +#include + +#if USE_SSL + +# include +# include + +namespace +{ + +struct TryDecryptImpl +{ + static constexpr auto name = "tryDecrypt"; + static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL; + static constexpr bool use_null_when_decrypt_fail = true; +}; + +} + +namespace DB +{ + +REGISTER_FUNCTION(Decrypt) +{ + factory.registerFunction>( + "Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key."); +} + +} + +#endif diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index dea49449cf3..c7ac00ee18f 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -909,7 +909,6 @@ trimLeft trimRight trunc tryBase58Decode -tryDecrypt tumble tumbleEnd tumbleStart From 528591245f950f92aeefd6ffea5e769febe0733c Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 3 Oct 2022 18:46:20 +0800 Subject: [PATCH 085/266] Fix register name --- src/Functions/tryDecrypt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/tryDecrypt.cpp b/src/Functions/tryDecrypt.cpp index 51097b0e8ec..0ef962eb98b 100644 --- a/src/Functions/tryDecrypt.cpp +++ b/src/Functions/tryDecrypt.cpp @@ -21,7 +21,7 @@ struct TryDecryptImpl namespace DB { -REGISTER_FUNCTION(Decrypt) +REGISTER_FUNCTION(TryDecrypt) { factory.registerFunction>( "Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key."); From 34f598e09b7ac23c8fae234a3a132a5a85c45efa Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 3 Oct 2022 19:55:37 +0800 Subject: [PATCH 086/266] fix rankcorr size overflow --- src/AggregateFunctions/AggregateFunctionRankCorrelation.h | 2 +- .../queries/0_stateless/02347_rank_corr_size_overflow.reference | 1 + tests/queries/0_stateless/02347_rank_corr_size_overflow.sql | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02347_rank_corr_size_overflow.reference create mode 100644 tests/queries/0_stateless/02347_rank_corr_size_overflow.sql diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index 670dd5948f7..e05b6dc1608 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -32,7 +32,7 @@ struct RankCorrelationData : public StatisticalSample std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y); /// Sizes can be non-equal due to skipped NaNs. - const auto size = std::min(this->size_x, this->size_y); + const Float32 size = static_cast(std::min(this->size_x, this->size_y)); /// Count d^2 sum Float64 answer = 0; diff --git a/tests/queries/0_stateless/02347_rank_corr_size_overflow.reference b/tests/queries/0_stateless/02347_rank_corr_size_overflow.reference new file mode 100644 index 00000000000..3a2e3f4984a --- /dev/null +++ b/tests/queries/0_stateless/02347_rank_corr_size_overflow.reference @@ -0,0 +1 @@ +-1 diff --git a/tests/queries/0_stateless/02347_rank_corr_size_overflow.sql b/tests/queries/0_stateless/02347_rank_corr_size_overflow.sql new file mode 100644 index 00000000000..3ca1ced8dd5 --- /dev/null +++ b/tests/queries/0_stateless/02347_rank_corr_size_overflow.sql @@ -0,0 +1 @@ +SELECT round(rankCorr(number, -number)) FROM numbers(5000000); From 369f0840701bd7904c8f4644c585681ab9c7dded Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 3 Oct 2022 20:15:31 +0800 Subject: [PATCH 087/266] Update src/AggregateFunctions/AggregateFunctionRankCorrelation.h Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/AggregateFunctions/AggregateFunctionRankCorrelation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index e05b6dc1608..4a81c6cda82 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -32,7 +32,7 @@ struct RankCorrelationData : public StatisticalSample std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y); /// Sizes can be non-equal due to skipped NaNs. - const Float32 size = static_cast(std::min(this->size_x, this->size_y)); + const Float64 size = static_cast(std::min(this->size_x, this->size_y)); /// Count d^2 sum Float64 answer = 0; From ea06e20caecc4461ca1e92a79d4c950db0e54b68 Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 29 Sep 2022 04:06:16 +0000 Subject: [PATCH 088/266] remove unused code --- src/Columns/ColumnAggregateFunction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index d3e2d2f94de..b51b21307f3 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -453,7 +453,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size /// Must create new state of aggregate function and take ownership of it, /// because ownership of states of aggregate function cannot be shared for individual rows, /// (only as a whole, see comment above). - ensureOwnership(); + /// ensureOwnership() will execute in insertDefault() insertDefault(); insertMergeFrom(from, n); } @@ -465,7 +465,7 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place) { - ensureOwnership(); + /// ensureOwnership() will execute in insertDefault() insertDefault(); insertMergeFrom(place); } From 620b0673d0b8a4c37cbf48ff3bd22327338b6db9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Oct 2022 17:24:57 +0200 Subject: [PATCH 089/266] Update AMQP --- contrib/AMQP-CPP | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/AMQP-CPP b/contrib/AMQP-CPP index 1a6c51f4ac5..818c2d8ad96 160000 --- a/contrib/AMQP-CPP +++ b/contrib/AMQP-CPP @@ -1 +1 @@ -Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a +Subproject commit 818c2d8ad96a08a5d20fece7d1e1e8855a2b0860 From 3114ddcad2c04c536283f6baa3a5bb38ab772d01 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 3 Oct 2022 19:06:12 +0200 Subject: [PATCH 090/266] Fix failure during table drop --- src/Storages/StorageReplicatedMergeTree.cpp | 34 ++++++++++++++++----- src/Storages/StorageReplicatedMergeTree.h | 2 +- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3ce20fff239..1e31bb9fa22 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7585,6 +7585,8 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer if (!settings->allow_remote_fs_zero_copy_replication) return std::make_pair(true, NameSet{}); + bool only_zero_copy = part.isStoredOnRemoteDiskWithZeroCopySupport() && getDisks().size() == 1; + if (!part.data_part_storage) LOG_WARNING(log, "Datapart storage for part {} (temp: {}) is not initialzied", part.name, part.is_temp); @@ -7630,14 +7632,15 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer else zookeeper = getZooKeeper(); - return unlockSharedDataByID(part.getUniqueId(), getTableSharedID(), part.name, replica_name, part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log, - zookeeper_path); + return unlockSharedDataByID( + part.getUniqueId(), getTableSharedID(), part.name, replica_name, + part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, only_zero_copy); } std::pair StorageReplicatedMergeTree::unlockSharedDataByID( String part_id, const String & table_uuid, const String & part_name, const String & replica_name_, std::string disk_type, zkutil::ZooKeeperPtr zookeeper_ptr, const MergeTreeSettings & settings, - Poco::Logger * logger, const String & zookeeper_path_old) + Poco::Logger * logger, const String & zookeeper_path_old, bool only_zero_copy) { boost::replace_all(part_id, "/", "_"); @@ -7731,8 +7734,18 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists", - zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); + + if (only_zero_copy) + { + LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists, will not remove blobs", + zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); + part_has_no_more_locks = false; + } + else + { + LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists", + zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); + } } } @@ -8268,9 +8281,14 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St { String id = disk->getUniqueId(checksums); bool can_remove = false; - std::tie(can_remove, files_not_to_remove) = StorageReplicatedMergeTree::unlockSharedDataByID(id, table_uuid, part_name, - detached_replica_name, toString(disk->getDataSourceDescription().type), zookeeper, local_context->getReplicatedMergeTreeSettings(), &Poco::Logger::get("StorageReplicatedMergeTree"), - detached_zookeeper_path); + std::tie(can_remove, files_not_to_remove) = StorageReplicatedMergeTree::unlockSharedDataByID( + id, table_uuid, part_name, + detached_replica_name, + toString(disk->getDataSourceDescription().type), + zookeeper, local_context->getReplicatedMergeTreeSettings(), + &Poco::Logger::get("StorageReplicatedMergeTree"), + detached_zookeeper_path, + false); keep_shared = !can_remove; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 86d78b788f1..7f672118d8b 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -280,7 +280,7 @@ public: /// Return false if data is still used by another node static std::pair unlockSharedDataByID(String part_id, const String & table_uuid, const String & part_name, const String & replica_name_, std::string disk_type, zkutil::ZooKeeperPtr zookeeper_, const MergeTreeSettings & settings, Poco::Logger * logger, - const String & zookeeper_path_old); + const String & zookeeper_path_old, bool only_zero_copy); /// Fetch part only if some replica has it on shared storage like S3 DataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; From 71e1b1df730159bf5c56589b281a30ebe6ac8a6b Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 3 Oct 2022 19:09:24 +0200 Subject: [PATCH 091/266] add comment --- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1e31bb9fa22..886c5e7964f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7734,9 +7734,10 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - if (only_zero_copy) { + /// If we have only_zero_copy configuration it means, that part was actually created on the same disk. It can happen + /// in extremely rare cases when both replicas decide to merge something due to one of tables in-progress drop. LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists, will not remove blobs", zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); part_has_no_more_locks = false; From 6028643e049ae1f0f115e8688e5ceefa988b6c8e Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 28 Sep 2022 12:30:45 +0000 Subject: [PATCH 092/266] Add function viewExplain --- src/Parsers/ASTExplainQuery.h | 52 ++++++++- src/Parsers/ExpressionElementParsers.cpp | 70 +++++++++++- src/Parsers/ParserExplainQuery.cpp | 9 ++ src/Parsers/ParserExplainQuery.h | 7 ++ src/TableFunctions/TableFunctionExplain.cpp | 106 ++++++++++++++++++ src/TableFunctions/TableFunctionExplain.h | 35 ++++++ src/TableFunctions/registerTableFunctions.cpp | 1 + src/TableFunctions/registerTableFunctions.h | 2 + .../02421_explain_subquery.reference | 4 + .../0_stateless/02421_explain_subquery.sql | 4 + 10 files changed, 284 insertions(+), 6 deletions(-) create mode 100644 src/TableFunctions/TableFunctionExplain.cpp create mode 100644 src/TableFunctions/TableFunctionExplain.h create mode 100644 tests/queries/0_stateless/02421_explain_subquery.reference create mode 100644 tests/queries/0_stateless/02421_explain_subquery.sql diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index 3f169a93bad..986d9ba4d56 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -7,6 +7,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + /// AST, EXPLAIN or other query with meaning of explanation query instead of execution class ASTExplainQuery : public ASTQueryWithOutput { @@ -24,7 +29,7 @@ public: explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {} - String getID(char delim) const override { return "Explain" + (delim + toString(kind)); } + String getID(char delim) const override { return "Explain" + (delim + formatString(kind)); } ExplainKind getKind() const { return kind; } ASTPtr clone() const override { @@ -67,7 +72,7 @@ public: protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << formatString(kind) << (settings.hilite ? hilite_none : ""); if (ast_settings) { @@ -102,7 +107,8 @@ private: ASTPtr table_function; ASTPtr table_override; - static String toString(ExplainKind kind) + /// format as it appears in the query text + static String formatString(ExplainKind kind) { switch (kind) { @@ -117,6 +123,46 @@ private: __builtin_unreachable(); } + +public: + static String kindToString(ExplainKind kind) + { + switch (kind) + { + case ParsedAST: return "AST"; + case AnalyzedSyntax: return "SYNTAX"; + case QueryTree: return "QUERY TREE"; + case QueryPlan: return "PLAN"; + case QueryPipeline: return "PIPELINE"; + case QueryEstimates: return "ESTIMATE"; + case TableOverride: return "TABLE OVERRIDE"; + case CurrentTransaction: return "CURRENT TRANSACTION"; + } + + __builtin_unreachable(); + } + + static ExplainKind kindFromString(const String & kind) + { + if (kind == "ast" || kind == "AST") + return ExplainKind::ParsedAST; + if (kind == "syntax" || kind == "SYNTAX") + return ExplainKind::AnalyzedSyntax; + if (kind == "query tree" || kind == "QUERY TREE") + return ExplainKind::QueryTree; + if (kind == "plan" || kind == "PLAN") + return ExplainKind::QueryPlan; + if (kind == "pipeline" || kind == "PIPELINE") + return ExplainKind::QueryPipeline; + if (kind == "estimate" || kind == "ESTIMATE") + return ExplainKind::QueryEstimates; + if (kind == "table override" || kind == "TABLE OVERRIDE") + return ExplainKind::TableOverride; + if (kind == "current transaction" || kind == "CURRENT TRANSACTION") + return ExplainKind::CurrentTransaction; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal explain kind '{}'", kind); + } }; + } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 81d6f34aced..e3cf36efd0f 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -27,6 +27,11 @@ #include #include #include +#include + +#include +#include + #include #include @@ -36,6 +41,7 @@ #include #include +#include #include @@ -52,25 +58,83 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +/* + * Build an AST with the following structure: + * + * ``` + * SelectWithUnionQuery (children 1) + * ExpressionList (children 1) + * SelectQuery (children 2) + * ExpressionList (children 1) + * Asterisk + * TablesInSelectQuery (children 1) + * TablesInSelectQueryElement (children 1) + * TableExpression (children 1) + * Function <...> + * ``` + */ +static ASTPtr buildSelectFromTableFunction(const std::shared_ptr & ast_function) +{ + auto result_select_query = std::make_shared(); + + { + auto select_ast = std::make_shared(); + select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared()); + select_ast->select()->children.push_back(std::make_shared()); + + auto list_of_selects = std::make_shared(); + list_of_selects->children.push_back(select_ast); + + result_select_query->children.push_back(std::move(list_of_selects)); + result_select_query->list_of_selects = result_select_query->children.back(); + + { + auto tables = std::make_shared(); + select_ast->setExpression(ASTSelectQuery::Expression::TABLES, tables); + auto tables_elem = std::make_shared(); + auto table_expr = std::make_shared(); + tables->children.push_back(tables_elem); + tables_elem->table_expression = table_expr; + tables_elem->children.push_back(table_expr); + + table_expr->table_function = ast_function; + table_expr->children.push_back(table_expr->table_function); + } + } + + return result_select_query; +} bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr select_node; ParserSelectWithUnionQuery select; + ParserExplainQuery explain; if (pos->type != TokenType::OpeningRoundBracket) return false; ++pos; - if (!select.parse(pos, select_node, expected)) + ASTPtr result_node = nullptr; + + if (ASTPtr select_node; select.parse(pos, select_node, expected)) + { + result_node = std::move(select_node); + } + else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + { + result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node)); + } + else + { return false; + } if (pos->type != TokenType::ClosingRoundBracket) return false; ++pos; node = std::make_shared(); - node->children.push_back(select_node); + node->children.push_back(result_node); return true; } diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index 4547cb6045f..d32d4444c36 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -88,11 +88,20 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { /// Nothing to parse } + else if (select_only) + { + if (select_p.parse(pos, query, expected)) + explain_query->setExplainedQuery(std::move(query)); + else + return false; + } else if (select_p.parse(pos, query, expected) || create_p.parse(pos, query, expected) || insert_p.parse(pos, query, expected) || system_p.parse(pos, query, expected)) + { explain_query->setExplainedQuery(std::move(query)); + } else return false; diff --git a/src/Parsers/ParserExplainQuery.h b/src/Parsers/ParserExplainQuery.h index 1a415a04dde..00e140e9c77 100644 --- a/src/Parsers/ParserExplainQuery.h +++ b/src/Parsers/ParserExplainQuery.h @@ -11,6 +11,7 @@ class ParserExplainQuery : public IParserBase protected: const char * end; bool allow_settings_after_format_in_insert; + bool select_only; const char * getName() const override { return "EXPLAIN"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -18,7 +19,13 @@ public: explicit ParserExplainQuery(const char* end_, bool allow_settings_after_format_in_insert_) : end(end_) , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + , select_only(false) {} + + explicit ParserExplainQuery() + : end(nullptr) , allow_settings_after_format_in_insert(false) , select_only(true) + {} + }; } diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp new file mode 100644 index 00000000000..f35319684db --- /dev/null +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + +void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) +{ + const auto * function = ast_function->as(); + + + if (function && function->arguments && function->arguments->children.size() == 2) + { + const auto * kind_literal = function->arguments->children[0]->as(); + if (!kind_literal) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires `kind` argument to be a string literal, got: '{}'", + getName(), queryToString(function->arguments->children[0])); + + auto kind = ASTExplainQuery::kindFromString(kind_literal->value.safeGet()); + std::shared_ptr explain_query = std::make_shared(kind); + + const auto * select_query = function->arguments->children[1]->as(); + if (!select_query) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires `query` argument to be a select query, got: '{}'", + getName(), queryToString(function->arguments->children[1])); + + explain_query->setExplainedQuery(select_query->clone()); + + query = std::move(explain_query); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires `kind` and `query` arguments", getName()); + } +} + +ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const +{ + Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); + ColumnsDescription columns_description; + for (const auto & column : sample_block.getColumnsWithTypeAndName()) + columns_description.add(ColumnDescription(column.name, column.type)); + return columns_description; +} + +static Block executeMonoBlock(QueryPipeline & pipeline) +{ + if (!pipeline.pulling()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected pulling pipeline"); + + PullingPipelineExecutor pulling_executor(pipeline); + std::vector blocks; + while (true) + { + Block block; + if (pulling_executor.pull(block)) + blocks.push_back(std::move(block)); + else + break; + } + + if (blocks.size() == 1) + return blocks[0]; + + return concatenateBlocks(std::move(blocks)); +} + +StoragePtr TableFunctionExplain::executeImpl( + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +{ + BlockIO blockio = getInterpreter(context).execute(); + Block block = executeMonoBlock(blockio.pipeline); + + StorageID storage_id(getDatabaseName(), table_name); + auto storage = std::make_shared(storage_id, getActualTableStructure(context), std::move(block)); + storage->startup(); + return storage; +} + +InterpreterExplainQuery TableFunctionExplain::getInterpreter(ContextPtr context) const +{ + if (!query) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' requires a explain query argument", getName()); + + return InterpreterExplainQuery(query, context); +} + +void registerTableFunctionExplain(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/TableFunctions/TableFunctionExplain.h b/src/TableFunctions/TableFunctionExplain.h new file mode 100644 index 00000000000..1cb61b3b047 --- /dev/null +++ b/src/TableFunctions/TableFunctionExplain.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/* Invoked via `SELECT * FROM (EXPLAIN )` + * Return result of EXPLAIN in a single string column. + * Can be used to further processing of the result of EXPLAIN using SQL (e.g. in tests). + */ +class TableFunctionExplain : public ITableFunction +{ +public: + static constexpr auto name = "viewExplain"; + std::string getName() const override { return name; } + +private: + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + const char * getStorageTypeName() const override { return "Explain"; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + ColumnsDescription getActualTableStructure(ContextPtr context) const override; + + InterpreterExplainQuery getInterpreter(ContextPtr context) const; + + ASTPtr query = nullptr; +}; + + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 3ef93c9b69d..9328c12c122 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -59,6 +59,7 @@ void registerTableFunctions() registerTableFunctionDictionary(factory); registerTableFunctionFormat(factory); + registerTableFunctionExplain(factory); } } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index d7e38403cae..e4127edb2c6 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -58,6 +58,8 @@ void registerTableFunctionDictionary(TableFunctionFactory & factory); void registerTableFunctionFormat(TableFunctionFactory & factory); +void registerTableFunctionExplain(TableFunctionFactory & factory); + void registerTableFunctions(); } diff --git a/tests/queries/0_stateless/02421_explain_subquery.reference b/tests/queries/0_stateless/02421_explain_subquery.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/02421_explain_subquery.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02421_explain_subquery.sql b/tests/queries/0_stateless/02421_explain_subquery.sql new file mode 100644 index 00000000000..431c4168749 --- /dev/null +++ b/tests/queries/0_stateless/02421_explain_subquery.sql @@ -0,0 +1,4 @@ +SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: number UInt64%'; +SELECT count() > 0 FROM (EXPLAIN PLAN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%'; +SELECT count() > 0 FROM (EXPLAIN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%'; +SELECT trim(explain) == 'Asterisk' FROM (EXPLAIN AST SELECT * FROM system.numbers LIMIT 10) WHERE explain LIKE '%Asterisk%'; From 3fdea908ffc87a11b25c45b4ed144215944e1832 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 28 Sep 2022 13:09:53 +0000 Subject: [PATCH 093/266] Use ASTExplain in viewExplain --- src/Parsers/ASTExplainQuery.h | 55 ++----------------- src/Parsers/ExpressionElementParsers.cpp | 1 + src/TableFunctions/TableFunctionExplain.cpp | 27 +++------ .../02421_explain_subquery.reference | 4 ++ .../0_stateless/02421_explain_subquery.sql | 19 +++++++ 5 files changed, 38 insertions(+), 68 deletions(-) diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index 986d9ba4d56..a1b6c9a8de6 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -7,11 +7,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - /// AST, EXPLAIN or other query with meaning of explanation query instead of execution class ASTExplainQuery : public ASTQueryWithOutput { @@ -29,13 +24,14 @@ public: explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {} - String getID(char delim) const override { return "Explain" + (delim + formatString(kind)); } + String getID(char delim) const override { return "Explain" + (delim + toString(kind)); } ExplainKind getKind() const { return kind; } ASTPtr clone() const override { auto res = std::make_shared(*this); res->children.clear(); - res->children.push_back(children[0]->clone()); + if (!children.empty()) + res->children.push_back(children[0]->clone()); cloneOutputOptions(*res); return res; } @@ -72,7 +68,7 @@ public: protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << formatString(kind) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : ""); if (ast_settings) { @@ -107,8 +103,7 @@ private: ASTPtr table_function; ASTPtr table_override; - /// format as it appears in the query text - static String formatString(ExplainKind kind) + static String toString(ExplainKind kind) { switch (kind) { @@ -123,46 +118,6 @@ private: __builtin_unreachable(); } - -public: - static String kindToString(ExplainKind kind) - { - switch (kind) - { - case ParsedAST: return "AST"; - case AnalyzedSyntax: return "SYNTAX"; - case QueryTree: return "QUERY TREE"; - case QueryPlan: return "PLAN"; - case QueryPipeline: return "PIPELINE"; - case QueryEstimates: return "ESTIMATE"; - case TableOverride: return "TABLE OVERRIDE"; - case CurrentTransaction: return "CURRENT TRANSACTION"; - } - - __builtin_unreachable(); - } - - static ExplainKind kindFromString(const String & kind) - { - if (kind == "ast" || kind == "AST") - return ExplainKind::ParsedAST; - if (kind == "syntax" || kind == "SYNTAX") - return ExplainKind::AnalyzedSyntax; - if (kind == "query tree" || kind == "QUERY TREE") - return ExplainKind::QueryTree; - if (kind == "plan" || kind == "PLAN") - return ExplainKind::QueryPlan; - if (kind == "pipeline" || kind == "PIPELINE") - return ExplainKind::QueryPipeline; - if (kind == "estimate" || kind == "ESTIMATE") - return ExplainKind::QueryEstimates; - if (kind == "table override" || kind == "TABLE OVERRIDE") - return ExplainKind::TableOverride; - if (kind == "current transaction" || kind == "CURRENT TRANSACTION") - return ExplainKind::CurrentTransaction; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal explain kind '{}'", kind); - } }; - } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e3cf36efd0f..88784329ece 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -122,6 +122,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) { + /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...) result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node)); } else diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index f35319684db..d46a7c69f22 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -20,30 +20,21 @@ namespace ErrorCodes void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) { const auto * function = ast_function->as(); - - - if (function && function->arguments && function->arguments->children.size() == 2) + if (function && function->arguments && function->arguments->children.size() == 1) { - const auto * kind_literal = function->arguments->children[0]->as(); - if (!kind_literal) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires `kind` argument to be a string literal, got: '{}'", - getName(), queryToString(function->arguments->children[0])); + const auto & query_arg = function->arguments->children[0]; - auto kind = ASTExplainQuery::kindFromString(kind_literal->value.safeGet()); - std::shared_ptr explain_query = std::make_shared(kind); + if (!query_arg->as()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table function '{}' requires a explain query argument, got '{}'", + getName(), queryToString(query_arg)); - const auto * select_query = function->arguments->children[1]->as(); - if (!select_query) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires `query` argument to be a select query, got: '{}'", - getName(), queryToString(function->arguments->children[1])); - - explain_query->setExplainedQuery(select_query->clone()); - - query = std::move(explain_query); + query = query_arg; } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires `kind` and `query` arguments", getName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table function '{}' cannot be called directly, use `SELECT * FROM (EXPLAIN ...)` syntax", getName()); } } diff --git a/tests/queries/0_stateless/02421_explain_subquery.reference b/tests/queries/0_stateless/02421_explain_subquery.reference index 98fb6a68656..c18b4e9b082 100644 --- a/tests/queries/0_stateless/02421_explain_subquery.reference +++ b/tests/queries/0_stateless/02421_explain_subquery.reference @@ -2,3 +2,7 @@ 1 1 1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02421_explain_subquery.sql b/tests/queries/0_stateless/02421_explain_subquery.sql index 431c4168749..af80e51bca3 100644 --- a/tests/queries/0_stateless/02421_explain_subquery.sql +++ b/tests/queries/0_stateless/02421_explain_subquery.sql @@ -1,4 +1,23 @@ SELECT count() > 3 FROM (EXPLAIN PIPELINE header = 1 SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain LIKE '%Header: number UInt64%'; SELECT count() > 0 FROM (EXPLAIN PLAN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%'; SELECT count() > 0 FROM (EXPLAIN SELECT * FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE '%Sort%'; +SELECT count() > 0 FROM (EXPLAIN CURRENT TRANSACTION); +SELECT count() == 1 FROM (EXPLAIN SYNTAX SELECT number FROM system.numbers ORDER BY number DESC) WHERE explain ILIKE 'SELECT%'; SELECT trim(explain) == 'Asterisk' FROM (EXPLAIN AST SELECT * FROM system.numbers LIMIT 10) WHERE explain LIKE '%Asterisk%'; + +SELECT * FROM ( + EXPLAIN AST SELECT * FROM ( + EXPLAIN PLAN SELECT * FROM ( + EXPLAIN SYNTAX SELECT trim(explain) == 'Asterisk' FROM ( + EXPLAIN AST SELECT * FROM system.numbers LIMIT 10 + ) WHERE explain LIKE '%Asterisk%' + ) + ) +) FORMAT Null; + +CREATE TABLE t1 ( a UInt64 ) Engine = MergeTree ORDER BY tuple() AS SELECT number AS a FROM system.numbers LIMIT 100000; + +SELECT rows > 1000 FROM (EXPLAIN ESTIMATE SELECT sum(a) FROM t1); +SELECT count() == 1 FROM (EXPLAIN ESTIMATE SELECT sum(a) FROM t1); + +DROP TABLE IF EXISTS t1; From 06f504c16697ad625f3c8ffd84e03f493f8b5e22 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Sep 2022 09:49:00 +0000 Subject: [PATCH 094/266] doc viewExplain --- src/TableFunctions/TableFunctionExplain.cpp | 15 ++++++++++++++- src/TableFunctions/TableFunctionExplain.h | 4 ---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index d46a7c69f22..6546a6cff9b 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -91,7 +91,20 @@ InterpreterExplainQuery TableFunctionExplain::getInterpreter(ContextPtr context) void registerTableFunctionExplain(TableFunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction({R"( +Returns result of EXPLAIN query. + +The function should not be called directly but can be invoked via `SELECT * FROM (EXPLAIN )`. + +You can use this query to process the result of EXPLAIN further using SQL (e.g., in tests). + +Example: +[example:1] + +)", +{{"1", "SELECT explain FROM (EXPLAIN AST SELECT * FROM system.numbers) WHERE explain LIKE '%Asterisk%'"}} +}); + } } diff --git a/src/TableFunctions/TableFunctionExplain.h b/src/TableFunctions/TableFunctionExplain.h index 1cb61b3b047..9d6dde4760c 100644 --- a/src/TableFunctions/TableFunctionExplain.h +++ b/src/TableFunctions/TableFunctionExplain.h @@ -9,10 +9,6 @@ namespace DB { -/* Invoked via `SELECT * FROM (EXPLAIN )` - * Return result of EXPLAIN in a single string column. - * Can be used to further processing of the result of EXPLAIN using SQL (e.g. in tests). - */ class TableFunctionExplain : public ITableFunction { public: From fa254bcec3934ea9b4db3fa65782bb31485538d1 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Mon, 3 Oct 2022 19:14:30 +0200 Subject: [PATCH 095/266] Fix clang-tidy in TableFunctionExplain.cpp --- src/TableFunctions/TableFunctionExplain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 6546a6cff9b..02493aa9b0e 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -66,7 +66,7 @@ static Block executeMonoBlock(QueryPipeline & pipeline) if (blocks.size() == 1) return blocks[0]; - return concatenateBlocks(std::move(blocks)); + return concatenateBlocks(blocks); } StoragePtr TableFunctionExplain::executeImpl( From 3dfbd5eb153cf2db3ab2ef3f0362cae46ff4fef0 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 3 Oct 2022 18:07:19 +0000 Subject: [PATCH 096/266] Fix test --- .../02451_async_insert_user_level_settings.python | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02451_async_insert_user_level_settings.python b/tests/queries/0_stateless/02451_async_insert_user_level_settings.python index 8c75f4898c4..1660fb78d2b 100644 --- a/tests/queries/0_stateless/02451_async_insert_user_level_settings.python +++ b/tests/queries/0_stateless/02451_async_insert_user_level_settings.python @@ -41,6 +41,10 @@ client.query( "CREATE TABLE t_async_insert_user_settings (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory" ) +VALUE_IN_CI = int(client.query( + "select value from system.settings where name='async_insert_busy_timeout_ms';" +)) + start_ms = time.time() * 1000.0 for i in range(NUM_RUNS): client.query(query = insert_query.format(i,i,i), settings=settings) @@ -48,8 +52,15 @@ end_ms = time.time() * 1000.0 duration = end_ms - start_ms -expected = (NUM_RUNS - 1) * TIME_TO_WAIT_MS -if duration >= expected: +expected = (NUM_RUNS - 1) * VALUE_IN_CI + +def check_inequality(duration, expected): + if TIME_TO_WAIT_MS >= VALUE_IN_CI: + return duration >= expected + else: + return duration < expected + +if check_inequality(duration, expected): print("Ok.") else: print(f"Fail. Duration: {duration}. Expected: {expected}") From 2ab884359ebbd2e4091252312096ef6f10fc9b90 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Oct 2022 22:15:04 +0200 Subject: [PATCH 097/266] More efficient WriteBufferFromAzureBlobStorage --- .../IO/WriteBufferFromAzureBlobStorage.cpp | 54 +++++++++++++------ .../IO/WriteBufferFromAzureBlobStorage.h | 7 +++ .../configs/config.d/storage_conf.xml | 9 +--- .../test.py | 34 +++++++++++- 4 files changed, 78 insertions(+), 26 deletions(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 3b5ca89d224..8a049725e3f 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -66,38 +66,60 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() { execWithRetry([this](){ next(); }, DEFAULT_RETRY_NUM); + if (tmp_buffer_write_offset > 0) + uploadBlock(tmp_buffer->data(), tmp_buffer_write_offset); + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } +void WriteBufferFromAzureBlobStorage::uploadBlock(const char * data, size_t size) +{ + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); + + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(data), size); + execWithRetry([&](){ block_blob_client.StageBlock(block_id, memory_stream); }, DEFAULT_RETRY_NUM); + tmp_buffer_write_offset = 0; + + LOG_TRACE(log, "Staged block (id: {}) of size {} (blob path: {}).", block_id, size, blob_path); +} + +WriteBufferFromAzureBlobStorage::MemoryBufferPtr WriteBufferFromAzureBlobStorage::allocateBuffer() const +{ + return std::make_unique>(max_single_part_upload_size); +} + void WriteBufferFromAzureBlobStorage::nextImpl() { - if (!offset()) + size_t size_to_upload = offset(); + + if (size_to_upload == 0) return; - char * buffer_begin = working_buffer.begin(); - size_t total_size = offset(); + if (!tmp_buffer) + tmp_buffer = allocateBuffer(); - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); - - size_t current_size = 0; - - while (current_size < total_size) + size_t uploaded_size = 0; + while (uploaded_size != size_to_upload) { - size_t part_len = std::min(total_size - current_size, max_single_part_upload_size); - const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); + size_t memory_buffer_remaining_size = max_single_part_upload_size - tmp_buffer_write_offset; + if (memory_buffer_remaining_size == 0) + uploadBlock(tmp_buffer->data(), tmp_buffer->size()); - Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast(buffer_begin + current_size), part_len); - execWithRetry([&](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, DEFAULT_RETRY_NUM); - - current_size += part_len; - LOG_TRACE(log, "Staged block (id: {}) of size {} (written {}/{}, blob path: {}).", block_id, part_len, current_size, total_size, blob_path); + size_t size = std::min(memory_buffer_remaining_size, size_to_upload - uploaded_size); + memcpy(tmp_buffer->data() + tmp_buffer_write_offset, working_buffer.begin() + uploaded_size, size); + uploaded_size += size; + tmp_buffer_write_offset += size; } + if (tmp_buffer_write_offset == max_single_part_upload_size) + uploadBlock(tmp_buffer->data(), tmp_buffer->size()); + if (write_settings.remote_throttler) - write_settings.remote_throttler->add(total_size); + write_settings.remote_throttler->add(size_to_upload); } } diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index e7eaef86fa0..ebefe19dade 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -40,6 +40,7 @@ public: private: void finalizeImpl() override; void execWithRetry(std::function func, size_t num_tries); + void uploadBlock(const char * data, size_t size); Poco::Logger * log; @@ -49,6 +50,12 @@ private: AzureClientPtr blob_container_client; std::vector block_ids; + + using MemoryBufferPtr = std::unique_ptr>; + MemoryBufferPtr tmp_buffer; + size_t tmp_buffer_write_offset = 0; + + MemoryBufferPtr allocateBuffer() const; }; } diff --git a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml index 09fa0d6c767..02945b619c2 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml @@ -10,12 +10,8 @@ devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== - 33554432 + 100000 - - local - / - @@ -23,9 +19,6 @@
blob_storage_disk
- - hdd -
diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 68a783d2427..1f2290f0351 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -6,6 +6,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.utility import generate_values, replace_config, SafeThread +from azure.storage.blob import BlobServiceClient SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -571,10 +572,39 @@ def test_restart_during_load(cluster): def test_big_insert(cluster): node = cluster.instances[NODE_NAME] create_table(node, TABLE_NAME) + + check_query = "SELECT '2020-01-03', number, toString(number) FROM numbers(1000000)" + azure_query( node, - f"INSERT INTO {TABLE_NAME} SELECT '2020-01-03', number, toString(number) FROM numbers(1000000)", + f"INSERT INTO {TABLE_NAME} {check_query}", ) assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query( - "SELECT '2020-01-03', number, toString(number) FROM numbers(1000000)" + check_query ) + + blob_container_client = cluster.blob_service_client.get_container_client( + CONTAINER_NAME + ) + + blobs = blob_container_client.list_blobs() + max_single_part_upload_size = 100000 + + for blob in blobs: + blob_client = cluster.blob_service_client.get_blob_client( + CONTAINER_NAME, blob.name + ) + committed, uncommited = blob_client.get_block_list() + + blocks = committed + assert len(blocks) > 1 + last_id = len(blocks) + id = 1 + + for block in blocks: + print(f"blob: {blob.name}, block size: {block.size}") + if id == last_id: + assert max_single_part_upload_size >= block.size + else: + assert max_single_part_upload_size == block.size + id += 1 From 210882b9c4dfaf8a47de8d31b6d5c7b81d57da62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 3 Oct 2022 23:30:50 +0200 Subject: [PATCH 098/266] Better fix --- src/Storages/StorageReplicatedMergeTree.cpp | 67 ++++++++++++++----- src/Storages/StorageReplicatedMergeTree.h | 2 +- .../02456_test_zero_copy_mutation.reference | 3 + .../02456_test_zero_copy_mutation.sql | 36 ++++++++++ 4 files changed, 89 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02456_test_zero_copy_mutation.reference create mode 100644 tests/queries/0_stateless/02456_test_zero_copy_mutation.sql diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 886c5e7964f..365eacc4404 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1,5 +1,6 @@ #include +#include #include "Common/hex.h" #include #include @@ -7585,8 +7586,6 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer if (!settings->allow_remote_fs_zero_copy_replication) return std::make_pair(true, NameSet{}); - bool only_zero_copy = part.isStoredOnRemoteDiskWithZeroCopySupport() && getDisks().size() == 1; - if (!part.data_part_storage) LOG_WARNING(log, "Datapart storage for part {} (temp: {}) is not initialzied", part.name, part.is_temp); @@ -7634,13 +7633,53 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer return unlockSharedDataByID( part.getUniqueId(), getTableSharedID(), part.name, replica_name, - part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, only_zero_copy); + part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, format_version); +} + +namespace +{ + +NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version) +{ + NameSet files_not_to_remove; + + MergeTreePartInfo part_info = MergeTreePartInfo::fromPartName(part_info_str, format_version); + if (part_info.mutation == 0) + return files_not_to_remove; + + auto parts_str = zookeeper_ptr->getChildren(zero_copy_part_path_prefix); + + + std::vector parts_infos; + for (const auto & part_str : parts_str) + { + MergeTreePartInfo parent_candidate_info = MergeTreePartInfo::fromPartName(part_str, format_version); + parts_infos.push_back(parent_candidate_info); + } + + std::sort(parts_infos.begin(), parts_infos.end()); + + for (const auto & parent_candidate_info : parts_infos | std::views::reverse) + { + if (parent_candidate_info == part_info) + continue; + + if (part_info.isMutationChildOf(parent_candidate_info)) + { + String files_not_to_remove_str = zookeeper_ptr->get(fs::path(zero_copy_part_path_prefix) / parent_candidate_info.getPartName()); + boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); + break; + } + } + return files_not_to_remove; +} + } std::pair StorageReplicatedMergeTree::unlockSharedDataByID( String part_id, const String & table_uuid, const String & part_name, const String & replica_name_, std::string disk_type, zkutil::ZooKeeperPtr zookeeper_ptr, const MergeTreeSettings & settings, - Poco::Logger * logger, const String & zookeeper_path_old, bool only_zero_copy) + Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) { boost::replace_all(part_id, "/", "_"); @@ -7658,6 +7697,9 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( if (!files_not_to_remove_str.empty()) boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); + auto parent_not_to_remove = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version); + files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end()); + String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / part_id; /// Delete our replica node for part from zookeeper (we are not interested in it anymore) @@ -7734,19 +7776,8 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - if (only_zero_copy) - { - /// If we have only_zero_copy configuration it means, that part was actually created on the same disk. It can happen - /// in extremely rare cases when both replicas decide to merge something due to one of tables in-progress drop. - LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists, will not remove blobs", - zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); - part_has_no_more_locks = false; - } - else - { - LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists", - zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); - } + LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists", + zookeeper_part_node, part_name, children.size(), fmt::join(children, ", ")); } } @@ -8289,7 +8320,7 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St zookeeper, local_context->getReplicatedMergeTreeSettings(), &Poco::Logger::get("StorageReplicatedMergeTree"), detached_zookeeper_path, - false); + MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING); keep_shared = !can_remove; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 7f672118d8b..e10ffcce22c 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -280,7 +280,7 @@ public: /// Return false if data is still used by another node static std::pair unlockSharedDataByID(String part_id, const String & table_uuid, const String & part_name, const String & replica_name_, std::string disk_type, zkutil::ZooKeeperPtr zookeeper_, const MergeTreeSettings & settings, Poco::Logger * logger, - const String & zookeeper_path_old, bool only_zero_copy); + const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version); /// Fetch part only if some replica has it on shared storage like S3 DataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; diff --git a/tests/queries/0_stateless/02456_test_zero_copy_mutation.reference b/tests/queries/0_stateless/02456_test_zero_copy_mutation.reference new file mode 100644 index 00000000000..e75733cff47 --- /dev/null +++ b/tests/queries/0_stateless/02456_test_zero_copy_mutation.reference @@ -0,0 +1,3 @@ +1 Hello +1 Hello +1 Hello diff --git a/tests/queries/0_stateless/02456_test_zero_copy_mutation.sql b/tests/queries/0_stateless/02456_test_zero_copy_mutation.sql new file mode 100644 index 00000000000..99234585d83 --- /dev/null +++ b/tests/queries/0_stateless/02456_test_zero_copy_mutation.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS mutation_1; +DROP TABLE IF EXISTS mutation_2; + +CREATE TABLE mutation_1 +( + a UInt64, + b String +) +ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t', '1') +ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0, allow_remote_fs_zero_copy_replication=1; + +CREATE TABLE mutation_2 +( + a UInt64, + b String +) +ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t', '2') +ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0, allow_remote_fs_zero_copy_replication=1; + +INSERT INTO mutation_1 VALUES (1, 'Hello'); + +SYSTEM SYNC REPLICA mutation_2; + +SYSTEM STOP REPLICATION QUEUES mutation_2; + +ALTER TABLE mutation_1 UPDATE a = 2 WHERE b = 'xxxxxx' SETTINGS mutations_sync=1; + +SELECT * from mutation_1; +SELECT * from mutation_2; + +DROP TABLE mutation_1 SYNC; + +SELECT * from mutation_2; + +DROP TABLE IF EXISTS mutation_1; +DROP TABLE IF EXISTS mutation_2; From 148995894f57d4a4fc828afa942084178e514d36 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 3 Oct 2022 18:52:14 +0000 Subject: [PATCH 099/266] add system table asynchronous_inserts_log --- programs/server/config.xml | 12 +++ src/Common/DateLUT.h | 20 ++++ src/Common/SystemLogBase.cpp | 1 + src/Common/SystemLogBase.h | 3 +- src/Interpreters/AsynchronousInsertLog.cpp | 82 ++++++++++++++++ src/Interpreters/AsynchronousInsertLog.h | 50 ++++++++++ src/Interpreters/AsynchronousInsertQueue.cpp | 94 +++++++++++++++++-- src/Interpreters/AsynchronousInsertQueue.h | 1 + src/Interpreters/Context.cpp | 10 ++ src/Interpreters/Context.h | 3 +- src/Interpreters/InterpreterSystemQuery.cpp | 4 +- src/Interpreters/MetricLog.cpp | 20 +--- src/Interpreters/PartLog.cpp | 14 +-- src/Interpreters/SessionLog.cpp | 12 +-- src/Interpreters/SystemLog.cpp | 4 + src/Interpreters/SystemLog.h | 2 + src/Interpreters/ThreadStatusExt.cpp | 35 ++----- src/Interpreters/executeQuery.cpp | 39 +++----- src/Storages/MergeTree/MergeTreeData.cpp | 18 +--- .../02456_async_inserts_logs.reference | 5 + .../0_stateless/02456_async_inserts_logs.sh | 34 +++++++ 21 files changed, 349 insertions(+), 114 deletions(-) create mode 100644 src/Interpreters/AsynchronousInsertLog.cpp create mode 100644 src/Interpreters/AsynchronousInsertLog.h create mode 100644 tests/queries/0_stateless/02456_async_inserts_logs.reference create mode 100755 tests/queries/0_stateless/02456_async_inserts_logs.sh diff --git a/programs/server/config.xml b/programs/server/config.xml index dcb8ac0804c..7f3a749b629 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1173,6 +1173,18 @@ 7500 + + + system + asynchronous_insert_log
+ + 7500 + event_date + event_date + INTERVAL 3 DAY +
+ diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md deleted file mode 100644 index dc05a7b4896..00000000000 --- a/docs/en/operations/requirements.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -slug: /en/operations/requirements -sidebar_position: 44 -sidebar_label: Requirements ---- - -# Requirements - -## CPU - -For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources. - -ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz. - -It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload. - -## RAM {#ram} - -We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries. - -The required volume of RAM depends on: - -- The complexity of queries. -- The amount of data that is processed in queries. - -To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) and other operations you use. - -ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. - -## Swap File {#swap-file} - -Disable the swap file for production environments. - -## Storage Subsystem {#storage-subsystem} - -You need to have 2GB of free disk space to install ClickHouse. - -The volume of storage required for your data should be calculated separately. Assessment should include: - -- Estimation of the data volume. - - You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. - -- The data compression coefficient. - - To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. - -To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. - -## Network {#network} - -If possible, use networks of 10G or higher class. - -The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. - -## Software {#software} - -ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. - -ClickHouse can also work in other operating system families. See details in the [install guide](../getting-started/install.md) section of the documentation. From 63be310441b3f1cf4d2b6c0089dd36ad0dc25f25 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Oct 2022 16:12:03 +0200 Subject: [PATCH 146/266] Update src/Storages/StorageReplicatedMergeTree.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 11c66783843..42512684f3b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7646,7 +7646,7 @@ namespace /// all_0_0_0_1: ^ ^ /// a.bin a.mrk2 columns.txt /// So when we deleting all_0_0_0 it doesn't remove blobs for a.bin and a.mrk2 because all_0_0_0_1 use them. -/// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this replica. +/// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part. /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas /// still need parent part. NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version) From e6fe71b385a52331b2df340a850894149da2078f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Oct 2022 16:12:08 +0200 Subject: [PATCH 147/266] Update src/Storages/StorageReplicatedMergeTree.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 42512684f3b..f2529213569 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7672,7 +7672,7 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri /// Sort is important. We need to find our closest parent, like: /// for part all_0_0_0_64 we can have parents - /// all_0_0_0_6 < we need closes parent, not others + /// all_0_0_0_6 < we need the closest parent, not others /// all_0_0_0_1 /// all_0_0_0 std::sort(parts_infos.begin(), parts_infos.end()); From d5249fb72ef5388a0f22be4afe0a97bd5c773eb3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Oct 2022 16:35:09 +0200 Subject: [PATCH 148/266] Update 02456_bloom_filter_assert.sql.j2 --- tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 b/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 index b6e218105ca..8322d3d29e7 100644 --- a/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 +++ b/tests/queries/0_stateless/02456_bloom_filter_assert.sql.j2 @@ -25,4 +25,4 @@ FROM numbers(1000); DROP TABLE IF EXISTS bftest__fuzz_21; -{% endfor -%} \ No newline at end of file +{% endfor -%} From 12acfb99436367c8d47c02b374d29411b9f9fc0c Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 5 Oct 2022 12:04:00 -0400 Subject: [PATCH 149/266] code review, preserve link --- docs/en/getting-started/install.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index c7eeea7f58b..922ab7e3141 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -89,6 +89,8 @@ ClickHouse is developed primarily for the Linux family of operating systems. The ## Self-Managed Install +### Available Installation Options {#available-installation-options} + ### From DEB Packages {#install-from-deb-packages} It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu. Run these commands to install packages: From e2786c51fcdd5142ce217a0a0b997cf1e5239d3a Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 5 Oct 2022 17:09:58 +0000 Subject: [PATCH 150/266] Disable 02122_join_group_by_timeout for debug --- tests/queries/0_stateless/02122_join_group_by_timeout.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 4116453b69a..59719f75d7c 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -1,4 +1,10 @@ #!/usr/bin/env bash +# Tags: no-debug + +# no-debug: Query is canceled by timeout after max_execution_time, +# but sending an exception to the client may hang +# for more than MAX_PROCESS_WAIT seconds in a slow debug build, +# and test will fail. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 45bc243b10be26ab8cb453d93878541eb3851b30 Mon Sep 17 00:00:00 2001 From: Aleksandr Musorin Date: Tue, 4 Oct 2022 14:21:54 +0200 Subject: [PATCH 151/266] tests: cover progress for INSERT SELECT over HTTP --- .../02457_insert_select_progress_http.reference | 1 + .../0_stateless/02457_insert_select_progress_http.sh | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/02457_insert_select_progress_http.reference create mode 100755 tests/queries/0_stateless/02457_insert_select_progress_http.sh diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.reference b/tests/queries/0_stateless/02457_insert_select_progress_http.reference new file mode 100644 index 00000000000..942a2df48cf --- /dev/null +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.reference @@ -0,0 +1 @@ +< X-ClickHouse-Summary: {"read_rows":"1000","read_bytes":"8000","written_rows":"1000","written_bytes":"2000","total_rows_to_read":"1000","result_rows":"1000","result_bytes":"2000"} diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.sh b/tests/queries/0_stateless/02457_insert_select_progress_http.sh new file mode 100755 index 00000000000..507cf913e3f --- /dev/null +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<< "drop table if exists insert_select_progress_http" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<< "create table insert_select_progress_http(n UInt16) engine = MergeTree order by n" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1" -d @- <<< "insert into insert_select_progress_http select * from numbers(1e3)" -v |& grep X-ClickHouse-Summary + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<< "drop table insert_select_progress_http" From 7c1a3996e80c40eefb2b4dc1aab4ffdc244579c5 Mon Sep 17 00:00:00 2001 From: Aleksandr Musorin Date: Tue, 4 Oct 2022 14:22:21 +0200 Subject: [PATCH 152/266] tests: cover progress for INSERT SELECT over TCP --- .../02458_insert_select_progress_tcp.python | 230 ++++++++++++++++++ ...02458_insert_select_progress_tcp.reference | 1 + .../02458_insert_select_progress_tcp.sh | 15 ++ 3 files changed, 246 insertions(+) create mode 100644 tests/queries/0_stateless/02458_insert_select_progress_tcp.python create mode 100644 tests/queries/0_stateless/02458_insert_select_progress_tcp.reference create mode 100755 tests/queries/0_stateless/02458_insert_select_progress_tcp.sh diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python new file mode 100644 index 00000000000..ec4c76b50ae --- /dev/null +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 + +import socket +import os +import uuid +import json + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) +CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') + +def writeVarUInt(x, ba): + for _ in range(0, 9): + + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, 'utf-8') + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size = 1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + +def readUInt8(s): + return readUInt(s) + +def readUInt16(s): + return readUInt(s, 2) + +def readUInt32(s): + return readUInt(s, 4) + +def readUInt64(s): + return readUInt(s, 8) + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode('utf-8') + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary('simple native protocol', ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary('default', ba) # user + writeStringBinary('', ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert (p_type == 0) # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + + +def serializeClientInfo(ba, query_id): + writeStringBinary('default', ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary('127.0.0.1:9000', ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary('os_user', ba) # os_user + writeStringBinary('client_hostname', ba) # client_hostname + writeStringBinary('client_name', ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('', ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + writeStringBinary('', ba) # No settings + writeStringBinary('', ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def assertPacket(packet, expected): + assert(packet == expected), packet + + +class Progress(): + def __init__(self): + # NOTE: this is done in ctor to initialize __dict__ + self.read_rows = 0 + self.read_bytes = 0 + self.total_rows_to_read = 0 + self.written_rows = 0 + self.written_bytes = 0 + + def __str__(self): + return json.dumps(self.__dict__) + +def readProgress(s, progress): + packet_type = readVarUInt(s) + if packet_type == 2: # Exception + raise RuntimeError(readException(s)) + + if packet_type == 5: # End stream + return False + + assertPacket(packet_type, 3) # Progress + progress.read_rows += readVarUInt(s) + progress.read_bytes += readVarUInt(s) + progress.total_rows_to_read += readVarUInt(s) + + progress.written_rows += readVarUInt(s) + progress.written_bytes += readVarUInt(s) + return True + +def readException(s): + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + + +def main(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, 'insert into insert_select_progress_tcp select * from numbers(1000000)') + + # external tables + sendEmptyBlock(s) + + query_progress = Progress() + while readProgress(s, query_progress): + pass + print(f'{query_progress}') + + s.close() + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference new file mode 100644 index 00000000000..2634b378178 --- /dev/null +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference @@ -0,0 +1 @@ +{"read_rows": 1000000, "read_bytes": 8000000, "total_rows_to_read": 1000000, "written_rows": 1000000, "written_bytes": 2000000} diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh new file mode 100755 index 00000000000..ae3ea017fbb --- /dev/null +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists insert_select_progress_tcp; + create table insert_select_progress_tcp(s UInt16) engine = MergeTree order by s; +" + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02458_insert_select_progress_tcp.python + +$CLICKHOUSE_CLIENT -q "drop table insert_select_progress_tcp" From 3efd9412331910341a6ebd765d1c75e7b6ab0992 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 4 Oct 2022 22:49:43 +0200 Subject: [PATCH 153/266] Fix final progress for INSERT SELECT over TCP Move sending of progress and profile events after calling BlockIO::onFinish() since it will call on_finish callback, that will do the final flush of progress (at least WriteProgress). Signed-off-by: Azat Khuzhin --- src/Server/TCPHandler.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 550ae1bff31..ac179472425 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -369,10 +369,12 @@ void TCPHandler::runImpl() { state.need_receive_data_for_insert = true; processInsertQuery(); + state.io.onFinish(); } else if (state.io.pipeline.pulling()) { processOrdinaryQueryWithProcessors(); + state.io.onFinish(); } else if (state.io.pipeline.completed()) { @@ -398,7 +400,8 @@ void TCPHandler::runImpl() } executor.execute(); - /// Send final progress + state.io.onFinish(); + /// Send final progress after calling onFinish(), since it will update the progress. /// /// NOTE: we cannot send Progress for regular INSERT (with VALUES) /// without breaking protocol compatibility, but it can be done @@ -406,8 +409,10 @@ void TCPHandler::runImpl() sendProgress(); sendSelectProfileEvents(); } - - state.io.onFinish(); + else + { + state.io.onFinish(); + } /// Do it before sending end of stream, to have a chance to show log message in client. query_scope->logPeakMemoryUsage(); From 5166a1d58ec30c59acc7b6a3db0fc88f3b496d26 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 Oct 2022 16:54:56 +0200 Subject: [PATCH 154/266] Fix written_bytes/written_rows in INSERT SELECT intermediate progress packets Signed-off-by: Azat Khuzhin --- src/Interpreters/InterpreterInsertQuery.cpp | 1 + src/Interpreters/executeQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 6e4efdc5167..51a3dde261a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -297,6 +297,7 @@ Chain InterpreterInsertQuery::buildChainImpl( auto counting = std::make_shared(out.getInputHeader(), thread_status, getContext()->getQuota()); counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); out.addSource(std::move(counting)); return out; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 6e17e05a754..69f0d5ad2b5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -892,7 +892,7 @@ static std::tuple executeQueryImpl( auto progress_callback = context->getProgressCallback(); if (progress_callback) { - Progress p(WriteProgress{info.written_rows, info.written_bytes}); + Progress p; p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); progress_callback(p); } From c5a4d618757dc94dc6dc563ecd90c51b1eaa006b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 Oct 2022 18:06:39 +0200 Subject: [PATCH 155/266] tests: improve INSERT SELECT over HTTP coverage Signed-off-by: Azat Khuzhin --- .../02457_insert_select_progress_http.reference | 14 +++++++++++++- .../02457_insert_select_progress_http.sh | 9 +++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.reference b/tests/queries/0_stateless/02457_insert_select_progress_http.reference index 942a2df48cf..5f13f4b9285 100644 --- a/tests/queries/0_stateless/02457_insert_select_progress_http.reference +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.reference @@ -1 +1,13 @@ -< X-ClickHouse-Summary: {"read_rows":"1000","read_bytes":"8000","written_rows":"1000","written_bytes":"2000","total_rows_to_read":"1000","result_rows":"1000","result_bytes":"2000"} +< X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","written_rows":"1","written_bytes":"4","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","written_rows":"1","written_bytes":"4","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","written_rows":"2","written_bytes":"8","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","written_rows":"2","written_bytes":"8","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","written_rows":"3","written_bytes":"12","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"4","read_bytes":"32","written_rows":"3","written_bytes":"12","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"4","read_bytes":"32","written_rows":"4","written_bytes":"16","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"4","written_bytes":"16","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"5","written_bytes":"20","total_rows_to_read":"5","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"5","read_bytes":"40","written_rows":"5","written_bytes":"20","total_rows_to_read":"5","result_rows":"5","result_bytes":"20"} +< X-ClickHouse-Summary: {"read_rows":"5","read_bytes":"40","written_rows":"5","written_bytes":"20","total_rows_to_read":"5","result_rows":"5","result_bytes":"20"} diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.sh b/tests/queries/0_stateless/02457_insert_select_progress_http.sh index 507cf913e3f..656ab3dc403 100755 --- a/tests/queries/0_stateless/02457_insert_select_progress_http.sh +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.sh @@ -4,9 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<< "drop table if exists insert_select_progress_http" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<< "create table insert_select_progress_http(n UInt16) engine = MergeTree order by n" - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1" -d @- <<< "insert into insert_select_progress_http select * from numbers(1e3)" -v |& grep X-ClickHouse-Summary - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<< "drop table insert_select_progress_http" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "insert into function null('_ Int') select * from numbers(5) settings max_block_size=1" -v |& { + grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: +} From 318042c1178f08f5c405a63a430089784fa19feb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 Oct 2022 18:41:40 +0200 Subject: [PATCH 156/266] tests: improve INSERT SELECT over TCP coverage Signed-off-by: Azat Khuzhin --- .../02458_insert_select_progress_tcp.python | 31 ++++++++++++++----- ...02458_insert_select_progress_tcp.reference | 4 ++- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index ec4c76b50ae..faa7c8779dd 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -181,22 +181,32 @@ class Progress(): def __str__(self): return json.dumps(self.__dict__) -def readProgress(s, progress): + def __bool__(self): + return ( + self.read_rows > 0 or + self.read_bytes > 0 or + self.total_rows_to_read > 0 or + self.written_rows > 0 or + self.written_bytes > 0) + +def readProgress(s): packet_type = readVarUInt(s) if packet_type == 2: # Exception raise RuntimeError(readException(s)) if packet_type == 5: # End stream - return False + return None assertPacket(packet_type, 3) # Progress + + progress = Progress() progress.read_rows += readVarUInt(s) progress.read_bytes += readVarUInt(s) progress.total_rows_to_read += readVarUInt(s) progress.written_rows += readVarUInt(s) progress.written_bytes += readVarUInt(s) - return True + return progress def readException(s): code = readUInt32(s) @@ -213,15 +223,20 @@ def main(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into insert_select_progress_tcp select * from numbers(1000000)') + # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. + # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. + sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000") # external tables sendEmptyBlock(s) - query_progress = Progress() - while readProgress(s, query_progress): - pass - print(f'{query_progress}') + while True: + progress = readProgress(s) + if progress is None: + break + # Print only non empty progress packets, eventually we should have 3 of them + if progress: + print(progress) s.close() diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference index 2634b378178..81f61f0d08f 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference @@ -1 +1,3 @@ -{"read_rows": 1000000, "read_bytes": 8000000, "total_rows_to_read": 1000000, "written_rows": 1000000, "written_bytes": 2000000} +{"read_rows": 1, "read_bytes": 8, "total_rows_to_read": 2, "written_rows": 0, "written_bytes": 0} +{"read_rows": 1, "read_bytes": 8, "total_rows_to_read": 0, "written_rows": 1, "written_bytes": 4} +{"read_rows": 0, "read_bytes": 0, "total_rows_to_read": 0, "written_rows": 1, "written_bytes": 4} From 644376aa2c120fb68080e934cc69eba85474a0d1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Oct 2022 20:32:43 +0200 Subject: [PATCH 157/266] Better fix --- src/Storages/StorageReplicatedMergeTree.cpp | 29 ++++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7b463361dd4..3c0242cba3a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -136,6 +136,7 @@ namespace ErrorCodes extern const int ABORTED; extern const int REPLICA_IS_NOT_IN_QUORUM; extern const int TABLE_IS_READ_ONLY; + extern const int TABLE_IS_DROPPED; extern const int NOT_FOUND_NODE; extern const int NO_ACTIVE_REPLICAS; extern const int NOT_A_LEADER; @@ -443,7 +444,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( createNewZooKeeperNodes(); syncPinnedPartUUIDs(); - createTableSharedID(); + if (!has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper) + createTableSharedID(); initialization_done = true; } @@ -7442,10 +7444,21 @@ String StorageReplicatedMergeTree::getTableSharedID() const /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); - bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil) - createTableSharedID(); + if (table_shared_id == UUIDHelpers::Nil) + { + if (has_metadata_in_zookeeper.has_value()) + { + if (*has_metadata_in_zookeeper) + createTableSharedID(); + else + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is already dropped", getStorageID().getNameForLogs()); + } + else + { + throw Exception(ErrorCodes::NO_ZOOKEEPER, "No connection to ZooKeeper, cannot get shared table ID. It will resolve automatically when connection will be established"); + } + } return toString(table_shared_id); } @@ -7625,6 +7638,9 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer return std::make_pair(true, NameSet{}); } + if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper) + return std::make_pair(true, NameSet{}); + /// We remove parts during table shutdown. If exception happen, restarting thread will be already turned /// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from /// context. @@ -7634,6 +7650,11 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer else zookeeper = getZooKeeper(); + /// It can happen that we didn't had the connection to zookeeper during table creation, but actually + /// table is completely dropped, so we can drop it without any additional checks. + if (!has_metadata_in_zookeeper.has_value() && !zookeeper->exists(zookeeper_path)) + return std::make_pair(true, NameSet{}); + return unlockSharedDataByID(part.getUniqueId(), getTableSharedID(), part.name, replica_name, part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log, zookeeper_path); } From cf145910e3d39787c49bcd89d7b35cde4dc1bacb Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Oct 2022 20:34:48 +0200 Subject: [PATCH 158/266] Revert first fix --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3c0242cba3a..04ab02f71a3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7492,10 +7492,6 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } - else if (code == Coordination::Error::ZNONODE) - { - LOG_DEBUG(log, "Shared ID on path {} cannot be created, table is completely dropped, will use {}", zookeeper_table_id_path, id); - } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); From 4ed522d2a09bd95024694ae6b8b22430d38bca18 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Oct 2022 20:36:55 +0200 Subject: [PATCH 159/266] Fix --- src/Storages/StorageReplicatedMergeTree.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 04ab02f71a3..69c0ff76ff3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7634,6 +7634,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer return std::make_pair(true, NameSet{}); } + /// If table was completely dropped (no meta in zookeeper) we can safely remove parts if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper) return std::make_pair(true, NameSet{}); From 1541d6860717ac21cae6b2b638839e57cc3d743e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Oct 2022 20:40:35 +0200 Subject: [PATCH 160/266] Better exception --- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 69c0ff76ff3..2d3fd9734e0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7456,7 +7456,8 @@ String StorageReplicatedMergeTree::getTableSharedID() const } else { - throw Exception(ErrorCodes::NO_ZOOKEEPER, "No connection to ZooKeeper, cannot get shared table ID. It will resolve automatically when connection will be established"); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "No connection to ZooKeeper, cannot get shared table ID for table {}. " + "It will be resolve automatically when connection will be established", getStorageID().getNameForLogs()); } } From 25df3efca5652624471caa370695bedc33566fda Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 5 Oct 2022 20:57:55 +0200 Subject: [PATCH 161/266] check dependencies before table shutdown --- src/Interpreters/InterpreterDropQuery.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 71d65ee7fed..77a7cd9d067 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -233,6 +233,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue else table->checkTableCanBeDropped(); + /// Check dependencies before shutting table down + if (context_->getSettingsRef().check_table_dependencies) + DatabaseCatalog::instance().checkTableCanBeRemovedOrRenamed(table_id); + table->flushAndShutdown(); TableExclusiveLockHolder table_lock; From 690ec74bf235bd7c84b88acb8820f8d6efff9ec2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 5 Oct 2022 20:58:27 +0200 Subject: [PATCH 162/266] better handling for expressions in dictGet --- src/Databases/DDLDependencyVisitor.cpp | 73 ++++++++++++++++--- src/Databases/DDLDependencyVisitor.h | 31 +++++++- .../getDictionaryConfigurationFromAST.cpp | 5 ++ src/Interpreters/InterpreterCreateQuery.cpp | 20 +++++ ..._dependencies_and_table_shutdown.reference | 6 ++ ..._check_dependencies_and_table_shutdown.sql | 40 ++++++++++ 6 files changed, 163 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference create mode 100644 tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 532691f7978..a9131635775 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,8 @@ namespace DB { +using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor; + TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) { assert(global_context == global_context->getGlobalContext()); @@ -35,7 +38,7 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) visit(*storage, data); } -bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & child) +bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child) { if (node->as()) return false; @@ -49,20 +52,26 @@ bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & ch return true; } -void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) +ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function) { if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet")) - { - extractTableNameFromArgument(function, data, 0); - } - else if (Poco::toLower(function.name) == "in") - { - extractTableNameFromArgument(function, data, 1); - } + return 0; + if (Poco::toLower(function.name) == "in") + return 1; + + return -1; +} + +void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) +{ + ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function); + if (table_name_arg_idx < 0) + return; + extractTableNameFromArgument(function, data, table_name_arg_idx); } void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) @@ -140,4 +149,50 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func data.dependencies.emplace(std::move(qualified_name)); } + +void NormalizeAndEvaluateConstants::visit(const ASTPtr & ast, Data & data) +{ + assert(data.create_query_context->hasQueryContext()); + + /// Looking for functions in column default expressions and dictionary source definition + if (const auto * function = ast->as()) + visit(*function, data); + else if (const auto * dict_source = ast->as()) + visit(*dict_source, data); +} + +void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & data) +{ + /// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))" + /// with "dictGet('db_name.dict', 'value', toUInt32(1))" + ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function); + if (table_name_arg_idx < 0) + return; + + if (!function.arguments || function.arguments->children.size() <= static_cast(table_name_arg_idx)) + return; + + auto & arg = function.arguments->as().children[table_name_arg_idx]; + if (arg->as()) + arg = evaluateConstantExpressionAsLiteral(arg, data.create_query_context); +} + + +void NormalizeAndEvaluateConstants::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) +{ + if (!dict_source.elements) + return; + + auto & expr_list = dict_source.elements->as(); + for (auto & child : expr_list.children) + { + ASTPair * pair = child->as(); + if (const auto * func = pair->second->as()) + { + auto ast_literal = evaluateConstantExpressionAsLiteral(pair->children[0], data.create_query_context); + pair->replace(pair->second, ast_literal); + } + } +} + } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index ae7f7aa94d9..d23a7a697a9 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -14,11 +14,19 @@ using TableNamesSet = std::unordered_set; TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); + +class DDLMatcherBase +{ +public: + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); + static ssize_t getPositionOfTableNameArgument(const ASTFunction & function); +}; + /// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies /// from column default expressions (joinGet, dictGet, etc) /// or dictionary source (for dictionaries from local ClickHouse table). /// Does not validate AST, works a best-effort way. -class DDLDependencyVisitor +class DDLDependencyVisitor : public DDLMatcherBase { public: struct Data @@ -32,7 +40,6 @@ public: using Visitor = ConstInDepthNodeVisitor; static void visit(const ASTPtr & ast, Data & data); - static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); private: static void visit(const ASTFunction & function, Data & data); @@ -42,6 +49,24 @@ private: static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx); }; -using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor; +class NormalizeAndEvaluateConstants : public DDLMatcherBase +{ +public: + struct Data + { + ContextPtr create_query_context; + }; + + using Visitor = ConstInDepthNodeVisitor; + + static void visit(const ASTPtr & ast, Data & data); + +private: + static void visit(const ASTFunction & function, Data & data); + static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); + +}; + +using NormalizeAndEvaluateConstantsVisitor = NormalizeAndEvaluateConstants::Visitor; } diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index e19495a27a3..4868413dabd 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -452,6 +452,11 @@ void buildConfigurationFromFunctionWithKeyValueArguments( } else if (const auto * func = pair->second->as()) { + /// This branch exists only for compatibility. + /// It's not possible to have a function in a dictionary definition since 22.10, + /// because query must be normalized on dictionary creation. It's possible only when we load old metadata. + /// For debug builds allow it only during server startup to avoid crash in BC check in Stress Tests. + assert(!Context::getGlobalContextInstance()->isServerCompletelyStarted()); auto builder = FunctionFactory::instance().tryGet(func->name, context); auto function = builder->build({}); function->prepare({}); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 41c378babcd..30763509b09 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -582,6 +582,15 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_expression) { + if (context_->hasQueryContext() && context_->getQueryContext().get() == context_.get()) + { + /// Normalize query only for original CREATE query, not on metadata loading. + /// And for CREATE query we can pass local context, because result will not change after restart. + NormalizeAndEvaluateConstantsVisitor::Data visitor_data{context_}; + NormalizeAndEvaluateConstantsVisitor visitor(visitor_data); + visitor.visit(col_decl.default_expression); + } + ASTPtr default_expr = col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as()->value.isNull() ? std::make_shared(DataTypeFactory::instance().get(col_decl.type)->getDefault()) : @@ -664,6 +673,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (create.as_table_function && (create.columns_list->indices || create.columns_list->constraints)) throw Exception("Indexes and constraints are not supported for table functions", ErrorCodes::INCORRECT_QUERY); + /// Dictionaries have dictionary_attributes_list instead of columns_list + assert(!create.is_dictionary); + if (create.columns_list->columns) { properties.columns = getColumnsDescription(*create.columns_list->columns, getContext(), create.attach); @@ -725,6 +737,14 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else if (create.is_dictionary) { + if (!create.dictionary || !create.dictionary->source) + return {}; + + /// Evaluate expressions (like currentDatabase() or tcpPort()) in dictionary source definition. + NormalizeAndEvaluateConstantsVisitor::Data visitor_data{getContext()}; + NormalizeAndEvaluateConstantsVisitor visitor(visitor_data); + visitor.visit(create.dictionary->source->ptr()); + return {}; } /// We can have queries like "CREATE TABLE ENGINE=" if diff --git a/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference new file mode 100644 index 00000000000..0ecea03c64e --- /dev/null +++ b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.reference @@ -0,0 +1,6 @@ +CREATE DICTIONARY default.dict\n(\n `id` UInt32,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' DB \'default\' TABLE \'view\'))\nLIFETIME(MIN 0 MAX 600)\nLAYOUT(HASHED()) +CREATE TABLE default.table\n(\n `col` String MATERIALIZED dictGet(\'default.dict\', \'value\', toUInt32(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +1 v +1 v +1 v +2 a diff --git a/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql new file mode 100644 index 00000000000..49e1e36acc9 --- /dev/null +++ b/tests/queries/0_stateless/02449_check_dependencies_and_table_shutdown.sql @@ -0,0 +1,40 @@ +DROP TABLE IF EXISTS table; +DROP DICTIONARY IF EXISTS dict; +DROP TABLE IF EXISTS view; + +CREATE TABLE view (id UInt32, value String) ENGINE=ReplicatedMergeTree('/test/2449/{database}', '1') ORDER BY id; +INSERT INTO view VALUES (1, 'v'); + +CREATE DICTIONARY dict (id UInt32, value String) +PRIMARY KEY id +SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db currentDatabase() table 'view')) +LAYOUT (HASHED()) LIFETIME (600); + +SHOW CREATE dict; + +CREATE TABLE table +( + col MATERIALIZED dictGet(currentDatabase() || '.dict', 'value', toUInt32(1)) +) +ENGINE = MergeTree() +ORDER BY tuple(); + +SHOW CREATE TABLE table; + +SELECT * FROM dictionary('dict'); + +DROP TABLE view; -- {serverError HAVE_DEPENDENT_OBJECTS} + +-- check that table is not readonly +INSERT INTO view VALUES (2, 'a'); + +DROP DICTIONARY dict; -- {serverError HAVE_DEPENDENT_OBJECTS} + +-- check that dictionary was not detached +SELECT * FROM dictionary('dict'); +SYSTEM RELOAD DICTIONARY dict; +SELECT * FROM dictionary('dict') ORDER BY id; + +DROP TABLE table; +DROP DICTIONARY dict; +DROP TABLE view; From 89b1a5188f3890af9bd8a39d5e51f51ab6307ca5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Oct 2022 22:59:44 +0300 Subject: [PATCH 163/266] Update CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f1a22fe717b..7764c2b72e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -495,7 +495,7 @@ endif () enable_testing() # Enable for tests without binary -option(ENABLE_EXTERNAL_OPENSSL "Provide the user to allow building of OpenSSL library. By default, uses in-house ClickHouse BoringSSL. It is not recommended and may be insecure" OFF) +option(ENABLE_EXTERNAL_OPENSSL "This option is insecure and not recommended for any occasions. If it is enabled, it allows building with alternative OpenSSL library. By default, ClickHouse is using BoringSSL, which is better. Do not use this option." OFF) message (STATUS "ENABLE_EXTERNAL_OPENSSL: ${ENABLE_EXTERNAL_OPENSSL}") if (NOT ENABLE_EXTERNAL_OPENSSL) From a46206571e8ff7abc2b2f075cffc31989e02f0ab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Oct 2022 23:00:49 +0300 Subject: [PATCH 164/266] Update CMakeLists.txt --- CMakeLists.txt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7764c2b72e7..0c802728061 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -497,14 +497,8 @@ enable_testing() # Enable for tests without binary option(ENABLE_EXTERNAL_OPENSSL "This option is insecure and not recommended for any occasions. If it is enabled, it allows building with alternative OpenSSL library. By default, ClickHouse is using BoringSSL, which is better. Do not use this option." OFF) -message (STATUS "ENABLE_EXTERNAL_OPENSSL: ${ENABLE_EXTERNAL_OPENSSL}") -if (NOT ENABLE_EXTERNAL_OPENSSL) - message (STATUS "Uses in-house ClickHouse BoringSSL library") -else () - message (STATUS "Build and uses OpenSSL library instead of BoringSSL") -endif () - if (ENABLE_EXTERNAL_OPENSSL) + message (STATUS "Build and uses OpenSSL library instead of BoringSSL. This is strongly discouraged. Your build of ClickHouse will be unsupported.") set(ENABLE_SSL 1) target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") endif () From d682c2ccdbafa43b3e335d44ef659011c07c1ffc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 5 Oct 2022 22:13:16 +0200 Subject: [PATCH 165/266] fix --- src/Interpreters/DatabaseCatalog.cpp | 36 ++++++++++++------- src/Interpreters/DatabaseCatalog.h | 3 +- src/Interpreters/InterpreterDropQuery.cpp | 2 +- ...tionary_layout_without_arguments.reference | 2 +- .../01190_full_attach_syntax.reference | 4 +-- ...01224_no_superfluous_dict_reload.reference | 4 +-- .../01509_dictionary_preallocate.reference | 4 +-- ...4_create_dictionary_with_comment.reference | 2 +- 8 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 67fb256b1c9..72adf4c8816 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1103,16 +1103,7 @@ TableNamesSet DatabaseCatalog::tryRemoveLoadingDependenciesUnlocked(const Qualif /// For DROP DATABASE we should ignore dependent tables from the same database. /// TODO unload tables in reverse topological order and remove this code if (check_dependencies) - { - TableNames from_other_databases; - for (const auto & table : dependent) - if (table.database != removing_table.database) - from_other_databases.push_back(table); - - if (!from_other_databases.empty()) - throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}", - removing_table, fmt::join(from_other_databases, ", ")); - } + checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database); for (const auto & table : dependent) { @@ -1133,7 +1124,7 @@ TableNamesSet DatabaseCatalog::tryRemoveLoadingDependenciesUnlocked(const Qualif return dependencies; } -void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id) const +void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool is_drop_database) const { QualifiedTableName removing_table = table_id.getQualifiedName(); std::lock_guard lock{databases_mutex}; @@ -1142,9 +1133,28 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id return; const TableNamesSet & dependent = it->second.dependent_database_objects; - if (!dependent.empty()) + checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database); +} + +void DatabaseCatalog::checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database) const +{ + if (!is_drop_database) + { + if (!dependent.empty()) + throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}", + removing_table, fmt::join(dependent, ", ")); + } + + /// For DROP DATABASE we should ignore dependent tables from the same database. + /// TODO unload tables in reverse topological order and remove this code + TableNames from_other_databases; + for (const auto & table : dependent) + if (table.database != removing_table.database) + from_other_databases.push_back(table); + + if (!from_other_databases.empty()) throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}", - table_id.getNameForLogs(), fmt::join(dependent, ", ")); + removing_table, fmt::join(from_other_databases, ", ")); } void DatabaseCatalog::updateLoadingDependencies(const StorageID & table_id, TableNamesSet && new_dependencies) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 0b3daefb258..9fab2ee09f2 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -229,7 +229,7 @@ public: TableNamesSet tryRemoveLoadingDependencies(const StorageID & table_id, bool check_dependencies, bool is_drop_database = false); TableNamesSet tryRemoveLoadingDependenciesUnlocked(const QualifiedTableName & removing_table, bool check_dependencies, bool is_drop_database = false) TSA_REQUIRES(databases_mutex); - void checkTableCanBeRemovedOrRenamed(const StorageID & table_id) const; + void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool is_drop_database = false) const; void updateLoadingDependencies(const StorageID & table_id, TableNamesSet && new_dependencies); @@ -245,6 +245,7 @@ private: void shutdownImpl(); + void checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database) const; struct UUIDToStorageMapPart { diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 77a7cd9d067..5698d10621b 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -235,7 +235,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue /// Check dependencies before shutting table down if (context_->getSettingsRef().check_table_dependencies) - DatabaseCatalog::instance().checkTableCanBeRemovedOrRenamed(table_id); + DatabaseCatalog::instance().checkTableCanBeRemovedOrRenamed(table_id, is_drop_or_detach_database); table->flushAndShutdown(); diff --git a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference index 0a935516722..69018bef2ef 100644 --- a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference +++ b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference @@ -1,3 +1,3 @@ World -CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) +CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) Hello diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.reference b/tests/queries/0_stateless/01190_full_attach_syntax.reference index 9d74a8cb3ce..f924c2ec780 100644 --- a/tests/queries/0_stateless/01190_full_attach_syntax.reference +++ b/tests/queries/0_stateless/01190_full_attach_syntax.reference @@ -1,5 +1,5 @@ -CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) -CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log test diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference index 5a05edcad58..d80501b3f4d 100644 --- a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference @@ -6,7 +6,7 @@ CREATE DICTIONARY dict_db_01224.dict `val` UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()) NOT_LOADED @@ -17,7 +17,7 @@ CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict` ) ENGINE = Dictionary(`dict_db_01224.dict`) NOT_LOADED -Dictionary 1 CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()) +Dictionary 1 CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()) NOT_LOADED key UInt64 val UInt64 diff --git a/tests/queries/0_stateless/01509_dictionary_preallocate.reference b/tests/queries/0_stateless/01509_dictionary_preallocate.reference index 2f1e1d2c386..fe42689bc81 100644 --- a/tests/queries/0_stateless/01509_dictionary_preallocate.reference +++ b/tests/queries/0_stateless/01509_dictionary_preallocate.reference @@ -1,5 +1,5 @@ -CREATE DICTIONARY default.dict_01509\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 0)) -CREATE DICTIONARY default.dict_01509_preallocate\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 1)) +CREATE DICTIONARY default.dict_01509\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 0)) +CREATE DICTIONARY default.dict_01509_preallocate\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 1)) HashedDictionary: Preallocated 10000 elements - 0 diff --git a/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference b/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference index 45f2c41f0b0..dd04f942e41 100644 --- a/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference +++ b/tests/queries/0_stateless/02024_create_dictionary_with_comment.reference @@ -1,2 +1,2 @@ -CREATE DICTIONARY default.`2024_dictionary_with_comment`\n(\n `id` UInt64,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'source_table\'))\nLIFETIME(MIN 0 MAX 1000)\nLAYOUT(FLAT())\nCOMMENT \'Test dictionary with comment\' +CREATE DICTIONARY default.`2024_dictionary_with_comment`\n(\n `id` UInt64,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'source_table\'))\nLIFETIME(MIN 0 MAX 1000)\nLAYOUT(FLAT())\nCOMMENT \'Test dictionary with comment\' Test dictionary with comment From 87f06bbe0b18799a08ebb2ad17714fda963a74df Mon Sep 17 00:00:00 2001 From: BoloniniD Date: Wed, 5 Oct 2022 23:38:44 +0300 Subject: [PATCH 166/266] Fix [[noreturn]] function variant --- src/Functions/FunctionsHashing.h | 43 ++++++++++++++------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index d6e5c523998..f970c98fe0b 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -627,30 +627,29 @@ struct ImplBLAKE3 enum { length = 32 }; #if !USE_BLAKE3 - [[ noreturn ]] - #endif + [[noreturn]] static void apply(const char * begin, const size_t size, unsigned char* out_char_data) { + (void) begin; + (void) size; + (void) out_char_data; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled."); + } + #else static void apply(const char * begin, const size_t size, unsigned char* out_char_data) { - #if USE_BLAKE3 - #if defined(MEMORY_SANITIZER) - auto err_msg = blake3_apply_shim_msan_compat(begin, size, out_char_data); - __msan_unpoison(out_char_data, length); - #else - auto err_msg = blake3_apply_shim(begin, size, out_char_data); - #endif - if (err_msg != nullptr) - { - auto err_st = std::string(err_msg); - blake3_free_char_pointer(err_msg); - throw Exception("Function returned error message: " + std::string(err_msg), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } + #if defined(MEMORY_SANITIZER) + auto err_msg = blake3_apply_shim_msan_compat(begin, size, out_char_data); + __msan_unpoison(out_char_data, length); #else - (void) begin; - (void) size; - (void) out_char_data; - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled."); + auto err_msg = blake3_apply_shim(begin, size, out_char_data); #endif + if (err_msg != nullptr) + { + auto err_st = std::string(err_msg); + blake3_free_char_pointer(err_msg); + throw Exception("Function returned error message: " + std::string(err_msg), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } } + #endif }; @@ -1513,9 +1512,5 @@ using FunctionXxHash32 = FunctionAnyHash; using FunctionXxHash64 = FunctionAnyHash; using FunctionWyHash64 = FunctionAnyHash; - -#if USE_BLAKE3 - using FunctionBLAKE3 = FunctionStringHashFixedString; -#endif - +using FunctionBLAKE3 = FunctionStringHashFixedString; } From fd65257153d72abf25bbeb89a92a22ecf59f22a9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 6 Oct 2022 08:36:14 +0000 Subject: [PATCH 167/266] Update version_date.tsv and changelogs after v22.6.9.11-stable --- docs/changelogs/v22.6.9.11-stable.md | 23 +++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 24 insertions(+) create mode 100644 docs/changelogs/v22.6.9.11-stable.md diff --git a/docs/changelogs/v22.6.9.11-stable.md b/docs/changelogs/v22.6.9.11-stable.md new file mode 100644 index 00000000000..ab2ff27f9eb --- /dev/null +++ b/docs/changelogs/v22.6.9.11-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.6.9.11-stable (9ec61dcac49) FIXME as compared to v22.6.8.35-stable (b91dc59a565) + +#### Improvement +* Backported in [#42089](https://github.com/ClickHouse/ClickHouse/issues/42089): Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#41558](https://github.com/ClickHouse/ClickHouse/issues/41558): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#41504](https://github.com/ClickHouse/ClickHouse/issues/41504): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index ecd098a5108..65ec5ddec01 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -13,6 +13,7 @@ v22.7.4.16-stable 2022-08-23 v22.7.3.5-stable 2022-08-10 v22.7.2.15-stable 2022-08-03 v22.7.1.2484-stable 2022-07-21 +v22.6.9.11-stable 2022-10-06 v22.6.8.35-stable 2022-09-19 v22.6.7.7-stable 2022-08-29 v22.6.6.16-stable 2022-08-23 From 3a934591e4ea22faf487f6f3db4e9711696edd71 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Oct 2022 13:11:54 +0200 Subject: [PATCH 168/266] fix --- tests/queries/0_stateless/01747_join_view_filter_dictionary.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql index 11ecaf1ca2e..050aa33464e 100644 --- a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql +++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql @@ -47,5 +47,5 @@ SELECT field2 FROM agg_view01747 WHERE field1 = 'test'; drop table summing_table01747; drop view rates01747; drop view agg_view01747; -drop table dictst01747; drop DICTIONARY default.dict01747; +drop table dictst01747; From 766107df0a2cbd1f3e8d2afad62476d67e827d90 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 6 Oct 2022 13:18:46 +0200 Subject: [PATCH 169/266] Fixed the restriction on maximum size of replicated fetches (#42090) --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- .../configs/custom_settings.xml | 6 +----- tests/integration/test_limited_replicated_fetches/test.py | 4 ++-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 46b368ccdfe..c12e9d0270a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -280,7 +280,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , restarting_thread(*this) , part_moves_between_shards_orchestrator(*this) , renaming_restrictions(renaming_restrictions_) - , replicated_fetches_pool_size(getContext()->getSettingsRef().background_fetches_pool_size) + , replicated_fetches_pool_size(getContext()->getFetchesExecutor()->getMaxTasksCount()) , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { diff --git a/tests/integration/test_limited_replicated_fetches/configs/custom_settings.xml b/tests/integration/test_limited_replicated_fetches/configs/custom_settings.xml index 96301816401..443bc45d870 100644 --- a/tests/integration/test_limited_replicated_fetches/configs/custom_settings.xml +++ b/tests/integration/test_limited_replicated_fetches/configs/custom_settings.xml @@ -1,7 +1,3 @@ - - - 3 - - + 3 diff --git a/tests/integration/test_limited_replicated_fetches/test.py b/tests/integration/test_limited_replicated_fetches/test.py index e3271100b74..bec575df7cd 100644 --- a/tests/integration/test_limited_replicated_fetches/test.py +++ b/tests/integration/test_limited_replicated_fetches/test.py @@ -11,10 +11,10 @@ import os cluster = ClickHouseCluster(__file__) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) node1 = cluster.add_instance( - "node1", user_configs=["configs/custom_settings.xml"], with_zookeeper=True + "node1", main_configs=["configs/custom_settings.xml"], with_zookeeper=True ) node2 = cluster.add_instance( - "node2", user_configs=["configs/custom_settings.xml"], with_zookeeper=True + "node2", main_configs=["configs/custom_settings.xml"], with_zookeeper=True ) MAX_THREADS_FOR_FETCH = 3 From 860e34e76007976f5f1ad0702ab7f7a972800b1f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 6 Oct 2022 13:47:32 +0200 Subject: [PATCH 170/266] Resurrect parallel distributed insert select with s3Cluster (#41535) --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Storages/HDFS/StorageHDFSCluster.cpp | 26 ++- src/Storages/HDFS/StorageHDFSCluster.h | 6 +- src/Storages/IStorageCluster.h | 29 +++ src/Storages/StorageDistributed.cpp | 181 +++++++++++---- src/Storages/StorageDistributed.h | 4 + src/Storages/StorageReplicatedMergeTree.cpp | 105 +++++++++ src/Storages/StorageReplicatedMergeTree.h | 5 + src/Storages/StorageS3Cluster.cpp | 25 ++- src/Storages/StorageS3Cluster.h | 7 +- .../test_s3_cluster/configs/cluster.xml | 17 +- tests/integration/test_s3_cluster/test.py | 211 +++++++++++++++--- 12 files changed, 528 insertions(+), 90 deletions(-) create mode 100644 src/Storages/IStorageCluster.h diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 6e4efdc5167..841b0d946cb 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -331,7 +331,7 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && table->isRemote() && settings.parallel_distributed_insert_select) + if (query.select && settings.parallel_distributed_insert_select) // Distributed INSERT SELECT distributed_pipeline = table->distributedWrite(query, getContext()); diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 47a6fbf5eaa..4372455c58f 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -1,4 +1,5 @@ #include +#include "Interpreters/Context_fwd.h" #if USE_HDFS @@ -41,7 +42,7 @@ StorageHDFSCluster::StorageHDFSCluster( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & compression_method_) - : IStorage(table_id_) + : IStorageCluster(table_id_) , cluster_name(cluster_name_) , uri(uri_) , format_name(format_name_) @@ -74,13 +75,8 @@ Pipe StorageHDFSCluster::read( size_t /*max_block_size*/, unsigned /*num_streams*/) { - auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); - - auto iterator = std::make_shared(context, uri); - auto callback = std::make_shared([iterator]() mutable -> String - { - return iterator->next(); - }); + auto cluster = getCluster(context); + auto extension = getTaskIteratorExtension(query_info.query, context); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) Block header = @@ -117,7 +113,7 @@ Pipe StorageHDFSCluster::read( scalars, Tables(), processed_stage, - RemoteQueryExecutor::Extension{.task_iterator = callback}); + extension); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); } @@ -140,6 +136,18 @@ QueryProcessingStage::Enum StorageHDFSCluster::getQueryProcessingStage( } +ClusterPtr StorageHDFSCluster::getCluster(ContextPtr context) const +{ + return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); +} + +RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, ContextPtr context) const +{ + auto iterator = std::make_shared(context, uri); + auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); + return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; +} + NamesAndTypesList StorageHDFSCluster::getVirtuals() const { return NamesAndTypesList{ diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 21ae73c11ea..2bd9dbc0f47 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -9,6 +9,7 @@ #include #include +#include #include namespace DB @@ -16,7 +17,7 @@ namespace DB class Context; -class StorageHDFSCluster : public IStorage +class StorageHDFSCluster : public IStorageCluster { public: StorageHDFSCluster( @@ -39,6 +40,9 @@ public: NamesAndTypesList getVirtuals() const override; + ClusterPtr getCluster(ContextPtr context) const override; + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override; + private: String cluster_name; String uri; diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h new file mode 100644 index 00000000000..35d297428ba --- /dev/null +++ b/src/Storages/IStorageCluster.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + + +/** + * Base cluster for Storages used in table functions like s3Cluster and hdfsCluster + * Needed for code simplification around parallel_distributed_insert_select + */ +class IStorageCluster : public IStorage +{ +public: + + explicit IStorageCluster(const StorageID & table_id_) : IStorage(table_id_) {} + + virtual ClusterPtr getCluster(ContextPtr context) const = 0; + /// Query is needed for pruning by virtual columns (_file, _path) + virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const = 0; + + bool isRemote() const override { return true; } +}; + + +} diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c39f235c46c..d7af9790a85 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -59,6 +59,8 @@ #include #include +#include + #include #include #include @@ -759,55 +761,35 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata } -std::optional StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) +std::optional StorageDistributed::distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr local_context) const { - QueryPipeline pipeline; - - const Settings & settings = local_context->getSettingsRef(); - if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); - - std::shared_ptr storage_src; - auto & select = query.select->as(); + const auto & settings = local_context->getSettingsRef(); auto new_query = std::dynamic_pointer_cast(query.clone()); - if (select.list_of_selects->children.size() == 1) + + /// Unwrap view() function. + if (src_distributed.remote_table_function_ptr) { - if (auto * select_query = select.list_of_selects->children.at(0)->as()) - { - JoinedTables joined_tables(Context::createCopy(local_context), *select_query); + const TableFunctionPtr src_table_function = + TableFunctionFactory::instance().get(src_distributed.remote_table_function_ptr, local_context); + const TableFunctionView * view_function = + assert_cast(src_table_function.get()); + new_query->select = view_function->getSelectQuery().clone(); + } + else + { + const auto select_with_union_query = std::make_shared(); + select_with_union_query->list_of_selects = std::make_shared(); - if (joined_tables.tablesCount() == 1) - { - storage_src = std::dynamic_pointer_cast(joined_tables.getLeftTableStorage()); - if (storage_src) - { - /// Unwrap view() function. - if (storage_src->remote_table_function_ptr) - { - const TableFunctionPtr src_table_function = - TableFunctionFactory::instance().get(storage_src->remote_table_function_ptr, local_context); - const TableFunctionView * view_function = - assert_cast(src_table_function.get()); - new_query->select = view_function->getSelectQuery().clone(); - } - else - { - const auto select_with_union_query = std::make_shared(); - select_with_union_query->list_of_selects = std::make_shared(); + auto * select = query.select->as().list_of_selects->children.at(0)->as(); + auto new_select_query = std::dynamic_pointer_cast(select->clone()); + select_with_union_query->list_of_selects->children.push_back(new_select_query); - auto new_select_query = std::dynamic_pointer_cast(select_query->clone()); - select_with_union_query->list_of_selects->children.push_back(new_select_query); + new_select_query->replaceDatabaseAndTable(src_distributed.getRemoteDatabaseName(), src_distributed.getRemoteTableName()); - new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName()); - - new_query->select = select_with_union_query; - } - } - } - } + new_query->select = select_with_union_query; } - const Cluster::AddressesWithFailover & src_addresses = storage_src ? storage_src->getCluster()->getShardsAddresses() : Cluster::AddressesWithFailover{}; + const Cluster::AddressesWithFailover & src_addresses = src_distributed.getCluster()->getShardsAddresses(); const Cluster::AddressesWithFailover & dst_addresses = getCluster()->getShardsAddresses(); /// Compare addresses instead of cluster name, to handle remote()/cluster(). /// (since for remote()/cluster() the getClusterName() is empty string) @@ -822,7 +804,7 @@ std::optional StorageDistributed::distributedWrite(const ASTInser LOG_WARNING(log, "Parallel distributed INSERT SELECT is not possible " "(source cluster={} ({} addresses), destination cluster={} ({} addresses))", - storage_src ? storage_src->getClusterName() : "", + src_distributed.getClusterName(), src_addresses.size(), getClusterName(), dst_addresses.size()); @@ -849,6 +831,7 @@ std::optional StorageDistributed::distributedWrite(const ASTInser new_query_str = buf.str(); } + QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); ++query_context->getClientInfo().distributed_depth; @@ -882,6 +865,120 @@ std::optional StorageDistributed::distributedWrite(const ASTInser } +std::optional StorageDistributed::distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) const +{ + const auto & settings = local_context->getSettingsRef(); + auto & select = query.select->as(); + /// Select query is needed for pruining on virtual columns + auto extension = src_storage_cluster.getTaskIteratorExtension( + select.list_of_selects->children.at(0)->as()->clone(), + local_context); + + auto dst_cluster = getCluster(); + + auto new_query = std::dynamic_pointer_cast(query.clone()); + if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL) + { + new_query->table_id = StorageID(getRemoteDatabaseName(), getRemoteTableName()); + /// Reset table function for INSERT INTO remote()/cluster() + new_query->table_function.reset(); + } + + String new_query_str; + { + WriteBufferFromOwnString buf; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); + ast_format_settings.always_quote_identifiers = true; + new_query->IAST::format(ast_format_settings); + new_query_str = buf.str(); + } + + QueryPipeline pipeline; + ContextMutablePtr query_context = Context::createCopy(local_context); + ++query_context->getClientInfo().distributed_depth; + + /// Here we take addresses from destination cluster and assume source table exists on these nodes + for (const auto & replicas : getCluster()->getShardsAddresses()) + { + /// There will be only one replica, because we consider each replica as a shard + for (const auto & node : replicas) + { + auto connection = std::make_shared( + node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), + node.user, node.password, node.quota_key, node.cluster, node.cluster_secret, + "ParallelInsertSelectInititiator", + node.compression, + node.secure + ); + + auto remote_query_executor = std::make_shared( + connection, + new_query_str, + Block{}, + query_context, + /*throttler=*/nullptr, + Scalars{}, + Tables{}, + QueryProcessingStage::Complete, + extension); + + QueryPipeline remote_pipeline(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote)); + remote_pipeline.complete(std::make_shared(remote_query_executor->getHeader())); + + pipeline.addCompletedPipeline(std::move(remote_pipeline)); + } + } + + return pipeline; +} + + +std::optional StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) +{ + const Settings & settings = local_context->getSettingsRef(); + if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) + throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + + auto & select = query.select->as(); + + StoragePtr src_storage; + + /// Distributed write only works in the most trivial case INSERT ... SELECT + /// without any unions or joins on the right side + if (select.list_of_selects->children.size() == 1) + { + if (auto * select_query = select.list_of_selects->children.at(0)->as()) + { + JoinedTables joined_tables(Context::createCopy(local_context), *select_query); + + if (joined_tables.tablesCount() == 1) + { + src_storage = joined_tables.getLeftTableStorage(); + } + } + } + + if (!src_storage) + return {}; + + if (auto src_distributed = std::dynamic_pointer_cast(src_storage)) + { + return distributedWriteBetweenDistributedTables(*src_distributed, query, local_context); + } + if (auto src_storage_cluster = std::dynamic_pointer_cast(src_storage)) + { + return distributedWriteFromClusterStorage(*src_storage_cluster, query, local_context); + } + if (local_context->getClientInfo().distributed_depth == 0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. "\ + "Reason: distributed reading is supported only from Distributed engine or *Cluster table functions, but got {} storage", src_storage->getName()); + } + + return {}; +} + + void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const { auto name_deps = getDependentViewsByColumn(local_context); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 7cb25ae46ab..3161f4b50f6 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -207,6 +208,9 @@ private: void delayInsertOrThrowIfNeeded() const; + std::optional distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context) const; + std::optional distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr context) const; + String remote_database; String remote_table; ASTPtr remote_table_function_ptr; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c12e9d0270a..305d44df5a9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -46,11 +46,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -60,6 +62,7 @@ #include #include #include +#include #include #include @@ -74,6 +77,7 @@ #include #include #include +#include #include #include @@ -162,6 +166,7 @@ namespace ErrorCodes extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; extern const int CHECKSUM_DOESNT_MATCH; extern const int NOT_INITIALIZED; + extern const int TOO_LARGE_DISTRIBUTED_DEPTH; } namespace ActionLocks @@ -4452,6 +4457,106 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con } +std::optional StorageReplicatedMergeTree::distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) +{ + const auto & settings = local_context->getSettingsRef(); + auto extension = src_storage_cluster->getTaskIteratorExtension(nullptr, local_context); + + /// Here we won't check that the cluster formed from table replicas is a subset of a cluster specified in s3Cluster/hdfsCluster table function + auto src_cluster = src_storage_cluster->getCluster(local_context); + + /// Actually the query doesn't change, we just serialize it to string + String query_str; + { + WriteBufferFromOwnString buf; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); + ast_format_settings.always_quote_identifiers = true; + query.IAST::format(ast_format_settings); + query_str = buf.str(); + } + + QueryPipeline pipeline; + ContextMutablePtr query_context = Context::createCopy(local_context); + ++query_context->getClientInfo().distributed_depth; + + for (const auto & replicas : src_cluster->getShardsAddresses()) + { + /// There will be only one replica, because we consider each replica as a shard + for (const auto & node : replicas) + { + auto connection = std::make_shared( + node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), + node.user, node.password, node.quota_key, node.cluster, node.cluster_secret, + "ParallelInsertSelectInititiator", + node.compression, + node.secure + ); + + auto remote_query_executor = std::make_shared( + connection, + query_str, + Block{}, + query_context, + /*throttler=*/nullptr, + Scalars{}, + Tables{}, + QueryProcessingStage::Complete, + extension); + + QueryPipeline remote_pipeline(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote)); + remote_pipeline.complete(std::make_shared(remote_query_executor->getHeader())); + + pipeline.addCompletedPipeline(std::move(remote_pipeline)); + } + } + + return pipeline; +} + +std::optional StorageReplicatedMergeTree::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) +{ + /// Do not enable parallel distributed INSERT SELECT in case when query probably comes from another server + if (local_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY) + return {}; + + const Settings & settings = local_context->getSettingsRef(); + if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) + throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + + auto & select = query.select->as(); + + StoragePtr src_storage; + + if (select.list_of_selects->children.size() == 1) + { + if (auto * select_query = select.list_of_selects->children.at(0)->as()) + { + JoinedTables joined_tables(Context::createCopy(local_context), *select_query); + + if (joined_tables.tablesCount() == 1) + { + src_storage = joined_tables.getLeftTableStorage(); + } + } + } + + if (!src_storage) + return {}; + + if (auto src_distributed = std::dynamic_pointer_cast(src_storage)) + { + return distributedWriteFromClusterStorage(src_distributed, query, local_context); + } + else if (local_context->getClientInfo().distributed_depth == 0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. Reason: distributed " + "reading into Replicated table is supported only from *Cluster table functions, but got {} storage", src_storage->getName()); + } + + return {}; +} + + bool StorageReplicatedMergeTree::optimize( const ASTPtr &, const StorageMetadataPtr &, diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 86d78b788f1..5924819070c 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -139,6 +140,8 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; + std::optional distributedWrite(const ASTInsertQuery & /*query*/, ContextPtr /*context*/) override; + bool optimize( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, @@ -483,6 +486,8 @@ private: std::mutex last_broken_disks_mutex; std::set last_broken_disks; + static std::optional distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context); + template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 800bce0afde..c8843ccc045 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -51,7 +51,7 @@ StorageS3Cluster::StorageS3Cluster( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_) - : IStorage(table_id_) + : IStorageCluster(table_id_) , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} , filename(configuration_.url) , cluster_name(configuration_.cluster_name) @@ -101,11 +101,8 @@ Pipe StorageS3Cluster::read( { StorageS3::updateS3Configuration(context, s3_configuration); - auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); - - auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.uri, query_info.query, virtual_block, context); - auto callback = std::make_shared([iterator]() mutable -> String { return iterator->next(); }); + auto cluster = getCluster(context); + auto extension = getTaskIteratorExtension(query_info.query, context); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) Block header = @@ -130,7 +127,6 @@ Pipe StorageS3Cluster::read( node.secure ); - /// For unknown reason global context is passed to IStorage::read() method /// So, task_identifier is passed as constructor argument. It is more obvious. auto remote_query_executor = std::make_shared( @@ -142,7 +138,7 @@ Pipe StorageS3Cluster::read( scalars, Tables(), processed_stage, - RemoteQueryExecutor::Extension{.task_iterator = callback}); + extension); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); } @@ -165,6 +161,19 @@ QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage( } +ClusterPtr StorageS3Cluster::getCluster(ContextPtr context) const +{ + return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); +} + +RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, ContextPtr context) const +{ + auto iterator = std::make_shared( + *s3_configuration.client, s3_configuration.uri, query, virtual_block, context); + auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + NamesAndTypesList StorageS3Cluster::getVirtuals() const { return virtual_columns; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index e5ca3b58123..deccf4c3421 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -10,6 +10,7 @@ #include "Client/Connection.h" #include #include +#include #include namespace DB @@ -17,7 +18,7 @@ namespace DB class Context; -class StorageS3Cluster : public IStorage +class StorageS3Cluster : public IStorageCluster { public: StorageS3Cluster( @@ -37,9 +38,11 @@ public: NamesAndTypesList getVirtuals() const override; + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override; + ClusterPtr getCluster(ContextPtr context) const override; + private: StorageS3::S3Configuration s3_configuration; - String filename; String cluster_name; String format_name; diff --git a/tests/integration/test_s3_cluster/configs/cluster.xml b/tests/integration/test_s3_cluster/configs/cluster.xml index 18f15763633..39275e99abd 100644 --- a/tests/integration/test_s3_cluster/configs/cluster.xml +++ b/tests/integration/test_s3_cluster/configs/cluster.xml @@ -20,8 +20,23 @@ + + + + + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + + cluster_simple - \ No newline at end of file + diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 2cbb36fcf06..f1251719faf 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -1,5 +1,9 @@ +from email.errors import HeaderParseError import logging import os +import csv +import shutil +import time import pytest from helpers.cluster import ClickHouseCluster @@ -19,6 +23,21 @@ S3_DATA = [ def create_buckets_s3(cluster): minio = cluster.minio_client + + for file_number in range(100): + file_name = f"data/generated/file_{file_number}.csv" + os.makedirs(os.path.join(SCRIPT_DIR, "data/generated/"), exist_ok=True) + S3_DATA.append(file_name) + with open(os.path.join(SCRIPT_DIR, file_name), "w+", encoding="utf-8") as f: + # a String, b UInt64 + data = [] + + for number in range(100): + data.append([str(number) * 10, number]) + + writer = csv.writer(f) + writer.writerows(data) + for file in S3_DATA: minio.fput_object( bucket_name=cluster.minio_bucket, @@ -34,10 +53,24 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "s0_0_0", main_configs=["configs/cluster.xml"], with_minio=True + "s0_0_0", + main_configs=["configs/cluster.xml"], + macros={"replica": "node1", "shard": "shard1"}, + with_minio=True, + with_zookeeper=True, + ) + cluster.add_instance( + "s0_0_1", + main_configs=["configs/cluster.xml"], + macros={"replica": "replica2", "shard": "shard1"}, + with_zookeeper=True, + ) + cluster.add_instance( + "s0_1_0", + main_configs=["configs/cluster.xml"], + macros={"replica": "replica1", "shard": "shard2"}, + with_zookeeper=True, ) - cluster.add_instance("s0_0_1", main_configs=["configs/cluster.xml"]) - cluster.add_instance("s0_1_0", main_configs=["configs/cluster.xml"]) logging.info("Starting cluster...") cluster.start() @@ -47,6 +80,7 @@ def started_cluster(): yield cluster finally: + shutil.rmtree(os.path.join(SCRIPT_DIR, "data/generated/")) cluster.shutdown() @@ -55,17 +89,17 @@ def test_select_all(started_cluster): pure_s3 = node.query( """ SELECT * from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', - 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)""" ) # print(pure_s3) s3_distibuted = node.query( """ SELECT * from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)""" ) # print(s3_distibuted) @@ -78,15 +112,15 @@ def test_count(started_cluster): pure_s3 = node.query( """ SELECT count(*) from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" ) # print(pure_s3) s3_distibuted = node.query( """ SELECT count(*) from s3Cluster( - 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" ) @@ -125,13 +159,13 @@ def test_union_all(started_cluster): SELECT * FROM ( SELECT * from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', - 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') UNION ALL SELECT * from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ) ORDER BY (name, value, polygon) @@ -143,13 +177,13 @@ def test_union_all(started_cluster): SELECT * FROM ( SELECT * from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') UNION ALL SELECT * from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ) ORDER BY (name, value, polygon) @@ -166,12 +200,12 @@ def test_wrong_cluster(started_cluster): """ SELECT count(*) from s3Cluster( 'non_existent_cluster', - 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') UNION ALL SELECT count(*) from s3Cluster( 'non_existent_cluster', - 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') """ ) @@ -184,14 +218,139 @@ def test_ambiguous_join(started_cluster): result = node.query( """ SELECT l.name, r.value from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as l JOIN s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as r ON l.name = r.name """ ) assert "AMBIGUOUS_COLUMN_NAME" not in result + + +def test_distributed_insert_select(started_cluster): + first_replica_first_shard = started_cluster.instances["s0_0_0"] + second_replica_first_shard = started_cluster.instances["s0_0_1"] + first_replica_second_shard = started_cluster.instances["s0_1_0"] + + first_replica_first_shard.query( + """DROP TABLE IF EXISTS insert_select_local ON CLUSTER 'cluster_simple';""" + ) + first_replica_first_shard.query( + """DROP TABLE IF EXISTS insert_select_distributed ON CLUSTER 'cluster_simple';""" + ) + + first_replica_first_shard.query( + """ + CREATE TABLE insert_select_local ON CLUSTER 'cluster_simple' (a String, b UInt64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/insert_select', '{replica}') + ORDER BY (a, b); + """ + ) + + first_replica_first_shard.query( + """ + CREATE TABLE insert_select_distributed ON CLUSTER 'cluster_simple' as insert_select_local + ENGINE = Distributed('cluster_simple', default, insert_select_local, b % 2); + """ + ) + + first_replica_first_shard.query( + """ + INSERT INTO insert_select_distributed SETTINGS insert_distributed_sync=1 SELECT * FROM s3Cluster( + 'cluster_simple', + 'http://minio1:9001/root/data/generated/*.csv', 'minio', 'minio123', 'CSV','a String, b UInt64' + ) SETTINGS parallel_distributed_insert_select=1, insert_distributed_sync=1; + """ + ) + + for line in ( + first_replica_first_shard.query("""SELECT * FROM insert_select_local;""") + .strip() + .split("\n") + ): + _, b = line.split() + assert int(b) % 2 == 0 + + for line in ( + second_replica_first_shard.query("""SELECT * FROM insert_select_local;""") + .strip() + .split("\n") + ): + _, b = line.split() + assert int(b) % 2 == 0 + + for line in ( + first_replica_second_shard.query("""SELECT * FROM insert_select_local;""") + .strip() + .split("\n") + ): + _, b = line.split() + assert int(b) % 2 == 1 + + first_replica_first_shard.query( + """DROP TABLE IF EXISTS insert_select_local ON CLUSTER 'cluster_simple';""" + ) + first_replica_first_shard.query( + """DROP TABLE IF EXISTS insert_select_distributed ON CLUSTER 'cluster_simple';""" + ) + + +def test_distributed_insert_select_with_replicated(started_cluster): + first_replica_first_shard = started_cluster.instances["s0_0_0"] + second_replica_first_shard = started_cluster.instances["s0_0_1"] + + first_replica_first_shard.query( + """DROP TABLE IF EXISTS insert_select_replicated_local ON CLUSTER 'first_shard';""" + ) + + first_replica_first_shard.query( + """ + CREATE TABLE insert_select_replicated_local ON CLUSTER 'first_shard' (a String, b UInt64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/insert_select_with_replicated', '{replica}') + ORDER BY (a, b); + """ + ) + + for replica in [first_replica_first_shard, second_replica_first_shard]: + replica.query( + """ + SYSTEM STOP FETCHES; + """ + ) + replica.query( + """ + SYSTEM STOP MERGES; + """ + ) + + first_replica_first_shard.query( + """ + INSERT INTO insert_select_replicated_local SELECT * FROM s3Cluster( + 'first_shard', + 'http://minio1:9001/root/data/generated_replicated/*.csv', 'minio', 'minio123', 'CSV','a String, b UInt64' + ) SETTINGS parallel_distributed_insert_select=1; + """ + ) + + for replica in [first_replica_first_shard, second_replica_first_shard]: + replica.query( + """ + SYSTEM FLUSH LOGS; + """ + ) + + second = int( + second_replica_first_shard.query( + """SELECT count(*) FROM system.query_log WHERE not is_initial_query and query like '%s3Cluster%';""" + ).strip() + ) + + assert second != 0 + + first_replica_first_shard.query( + """DROP TABLE IF EXISTS insert_select_replicated_local ON CLUSTER 'first_shard';""" + ) From adbaaca2f50194fd504bf3ee768a0365d0abff65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 6 Oct 2022 14:22:44 +0200 Subject: [PATCH 171/266] QOL log improvements (#41947) * Uniformize disk reservation logs * Remove log about destroying stuff that appears all the time * More tweaks on disk reservation logs * Reorder logs in hash join * Remove log that provides little information * Collapse part removal logs Co-authored-by: Sergei Trifonov --- src/Disks/DiskLocal.cpp | 14 ++++++-- .../ObjectStorages/DiskObjectStorage.cpp | 14 ++++++-- src/Disks/StoragePolicy.cpp | 7 ++-- src/Interpreters/Aggregator.cpp | 2 -- src/Interpreters/Cache/FileSegment.cpp | 1 - src/Interpreters/HashJoin.cpp | 3 +- src/Interpreters/HashJoin.h | 6 ++-- src/Storages/MergeTree/MergeTreeData.cpp | 33 +++++++++++++------ 8 files changed, 52 insertions(+), 28 deletions(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 750d08ef80c..afd6a1b7b58 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -230,19 +230,27 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) if (bytes == 0) { - LOG_DEBUG(logger, "Reserving 0 bytes on disk {}", backQuote(name)); + LOG_TRACE(logger, "Reserved 0 bytes on local disk {}", backQuote(name)); ++reservation_count; return {unreserved_space}; } if (unreserved_space >= bytes) { - LOG_DEBUG(logger, "Reserving {} on disk {}, having unreserved {}.", - ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); + LOG_TRACE( + logger, + "Reserved {} on local disk {}, having unreserved {}.", + ReadableSize(bytes), + backQuote(name), + ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; return {unreserved_space - bytes}; } + else + { + LOG_TRACE(logger, "Could not reserve {} on local disk {}. Not enough unreserved space", ReadableSize(bytes), backQuote(name)); + } return {}; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index d58d462a8d5..fb13ed7eec8 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -459,19 +459,27 @@ std::optional DiskObjectStorage::tryReserve(UInt64 bytes) if (bytes == 0) { - LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); + LOG_TRACE(log, "Reserved 0 bytes on remote disk {}", backQuote(name)); ++reservation_count; return {unreserved_space}; } if (unreserved_space >= bytes) { - LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", - ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); + LOG_TRACE( + log, + "Reserved {} on remote disk {}, having unreserved {}.", + ReadableSize(bytes), + backQuote(name), + ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; return {unreserved_space - bytes}; } + else + { + LOG_TRACE(log, "Could not reserve {} on remote disk {}. Not enough unreserved space", ReadableSize(bytes), backQuote(name)); + } return {}; } diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 3662a9732b3..10513c6beae 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -212,17 +213,15 @@ UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const ReservationPtr StoragePolicy::reserve(UInt64 bytes, size_t min_volume_index) const { - LOG_TRACE(log, "Reserving bytes {} from volume index {}, total volumes {}", bytes, min_volume_index, volumes.size()); for (size_t i = min_volume_index; i < volumes.size(); ++i) { const auto & volume = volumes[i]; auto reservation = volume->reserve(bytes); if (reservation) - { - LOG_TRACE(log, "Successfully reserved {} bytes on volume index {}", bytes, i); return reservation; - } } + LOG_TRACE(log, "Could not reserve {} from volume index {}, total volumes {}", ReadableSize(bytes), min_volume_index, volumes.size()); + return {}; } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 4399faac5d2..3d68351110d 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -3275,8 +3275,6 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons if (result.empty()) return; - LOG_TRACE(log, "Destroying aggregate states"); - /// In what data structure is the data aggregated? if (result.type == AggregatedDataVariants::Type::without_key || params.overflow_row) destroyWithoutKey(result); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 708d60f56dc..cbfc5e50ae4 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -249,7 +249,6 @@ FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() return nullptr; } - LOG_TRACE(log, "Extracted reader from file segment"); return std::move(remote_file_reader); } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index e559977be49..aa606ce1ec2 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -224,6 +224,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , right_sample_block(right_sample_block_) , log(&Poco::Logger::get("HashJoin")) { + LOG_DEBUG(log, "HashJoin. Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness); LOG_DEBUG(log, "Right sample block: {}", right_sample_block.dumpStructure()); if (isCrossOrComma(kind)) @@ -303,8 +304,6 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s for (auto & maps : data->maps) dataMapInit(maps); - - LOG_DEBUG(log, "Join type: {}, kind: {}, strictness: {}", data->type, kind, strictness); } HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 33955333aa2..50544ae9039 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -360,15 +360,15 @@ private: friend class JoinSource; std::shared_ptr table_join; - JoinKind kind; - JoinStrictness strictness; + const JoinKind kind; + const JoinStrictness strictness; /// This join was created from StorageJoin and it is already filled. bool from_storage_join = false; bool any_take_last_row; /// Overwrite existing values when encountering the same key again std::optional asof_type; - ASOFJoinInequality asof_inequality; + const ASOFJoinInequality asof_inequality; /// Right table data. StorageJoin shares it between many Join objects. /// Flags that indicate that particular row already used in join. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f519fd75ecb..4a7d2b2dd63 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -91,6 +91,18 @@ #include #include +#include + +template <> +struct fmt::formatter : fmt::formatter +{ + template + auto format(const DB::DataPartPtr & part, FormatCtx & ctx) const + { + return fmt::formatter::format(part->name, ctx); + } +}; + namespace fs = std::filesystem; @@ -1905,6 +1917,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t ThreadPool pool(num_threads); /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. + LOG_DEBUG(log, "Removing {} parts from filesystem: {} (concurrently)", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { pool.scheduleOrThrowOnError([&, thread_group = CurrentThread::getGroup()] @@ -1912,7 +1925,6 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t if (thread_group) CurrentThread::attachToIfDetached(thread_group); - LOG_DEBUG(log, "Removing part from filesystem {} (concurrently)", part->name); part->remove(); if (part_names_succeed) { @@ -1924,11 +1936,11 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t pool.wait(); } - else + else if (!parts_to_remove.empty()) { + LOG_DEBUG(log, "Removing {} parts from filesystem: {}", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { - LOG_DEBUG(log, "Removing part from filesystem {}", part->name); part->remove(); if (part_names_succeed) part_names_succeed->insert(part->name); @@ -4911,15 +4923,13 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( DiskPtr selected_disk) const { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - - LOG_TRACE(log, "Trying reserve {} bytes preffering TTL rules", expected_size); ReservationPtr reservation; auto move_ttl_entry = selectTTLDescriptionForTTLInfos(metadata_snapshot->getMoveTTLs(), ttl_infos.moves_ttl, time_of_move, true); if (move_ttl_entry) { - LOG_TRACE(log, "Got move TTL entry, will try to reserver destination for move"); + LOG_TRACE(log, "Trying to reserve {} to apply a TTL rule. Will try to reserve in the destination", ReadableSize(expected_size)); SpacePtr destination_ptr = getDestinationForMoveTTL(*move_ttl_entry); bool perform_ttl_move_on_insert = is_insert && destination_ptr && shouldPerformTTLMoveOnInsert(destination_ptr); @@ -4949,11 +4959,9 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( } else { - LOG_TRACE(log, "Reserving bytes on selected destination"); reservation = destination_ptr->reserve(expected_size); if (reservation) { - LOG_TRACE(log, "Reservation successful"); return reservation; } else @@ -4977,13 +4985,18 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( // Prefer selected_disk if (selected_disk) { - LOG_DEBUG(log, "Disk for reservation provided: {} (with type {})", selected_disk->getName(), toString(selected_disk->getDataSourceDescription().type)); + LOG_TRACE( + log, + "Trying to reserve {} on the selected disk: {} (with type {})", + ReadableSize(expected_size), + selected_disk->getName(), + toString(selected_disk->getDataSourceDescription().type)); reservation = selected_disk->reserve(expected_size); } if (!reservation) { - LOG_DEBUG(log, "No reservation, reserving using storage policy from min volume index {}", min_volume_index); + LOG_TRACE(log, "Trying to reserve {} using storage policy from min volume index {}", ReadableSize(expected_size), min_volume_index); reservation = getStoragePolicy()->reserve(expected_size, min_volume_index); } From 72015dca4de214e81e0da4523aed781b337ef7d7 Mon Sep 17 00:00:00 2001 From: Jus <40656180+jus1096@users.noreply.github.com> Date: Thu, 6 Oct 2022 16:45:18 +0400 Subject: [PATCH 172/266] add %z offset Add placeholder %z --- docs/ru/sql-reference/functions/date-time-functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 897c4b3e86a..a7d2ce49fae 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -1053,6 +1053,7 @@ formatDateTime(Time, Format[, Timezone]) | %w | ะฝะพะผะตั€ ะดะฝั ะฝะตะดะตะปะธ, ะฝะฐั‡ะธะฝะฐั ั ะฒะพัะบั€ะตัะตะฝัŒั (0-6) | 2 | | %y | ะณะพะด, ะฟะพัะปะตะดะฝะธะต 2 ั†ะธั„ั€ั‹ (00-99) | 18 | | %Y | ะณะพะด, 4 ั†ะธั„ั€ั‹ | 2018 | +| %z | ะกะผะตั‰ะตะฝะธะต ะฒั€ะตะผะตะฝะธ ะพั‚ UTC +HHMM ะธะปะธ -HHMM | -0500 | | %% | ัะธะผะฒะพะป % | % | **ะŸั€ะธะผะตั€** From 696a294ef30ef049cb623d26dacd04f4a34d1ea5 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 6 Oct 2022 05:48:04 -0700 Subject: [PATCH 173/266] Add null pointer checks --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 41c378babcd..080f464cf08 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -729,7 +729,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } /// We can have queries like "CREATE TABLE
ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). - else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) // NOLINT + else if (create.storage && create.storage->engine && !StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) // NOLINT throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); /// Even if query has list of columns, canonicalize it (unfold Nested columns). From 655d2b298d089c127042497e4b19928e08bb61c3 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 6 Oct 2022 09:38:30 -0400 Subject: [PATCH 174/266] adjust heading level --- docs/en/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 922ab7e3141..61303eddab9 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -89,7 +89,7 @@ ClickHouse is developed primarily for the Linux family of operating systems. The ## Self-Managed Install -### Available Installation Options {#available-installation-options} +## Available Installation Options {#available-installation-options} ### From DEB Packages {#install-from-deb-packages} From 6b0ad8564074de7e1090a9877bf2b9e8a3f857fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 6 Oct 2022 16:26:48 +0200 Subject: [PATCH 175/266] Fix build without TSA --- base/base/defines.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index 671253ed9e8..786a9c9813f 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -159,9 +159,9 @@ # define TSA_REQUIRES_SHARED(...) # define TSA_NO_THREAD_SAFETY_ANALYSIS -# define TSA_SUPPRESS_WARNING_FOR_READ(x) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) -# define TSA_READ_ONE_THREAD(x) +# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() -> const auto & { return (x); }()) +# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() -> auto & { return (x); }()) +# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) #endif /// A template function for suppressing warnings about unused variables or function results. From 2d96c81f4ce96aaea0b4f446e94dff89c4fa9e4e Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 6 Oct 2022 17:09:20 +0200 Subject: [PATCH 176/266] Fix --- ...chronousReadIndirectBufferFromRemoteFS.cpp | 4 ++- ...ynchronousReadIndirectBufferFromRemoteFS.h | 4 ++- .../IO/ReadIndirectBufferFromRemoteFS.cpp | 29 ++++++++++--------- src/Disks/IO/ReadIndirectBufferFromRemoteFS.h | 4 ++- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index 2717826f6ac..e60fea46ed4 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace CurrentMetrics @@ -42,6 +41,7 @@ AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRe std::shared_ptr impl_, size_t min_bytes_for_seek_) : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0) + , read_settings(settings_) , reader(reader_) , priority(settings_.priority) , impl(impl_) @@ -125,6 +125,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::prefetch() return; /// Prefetch even in case hasPendingData() == true. + chassert(prefetch_buffer.size() == read_settings.remote_fs_buffer_size); prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size()); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } @@ -199,6 +200,7 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() { ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); + chassert(memory.size() == read_settings.remote_fs_buffer_size); auto result = asyncReadInto(memory.data(), memory.size()).get(); size = result.size; auto offset = result.offset; diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index cf7feb416b2..594799b5ca6 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace Poco { class Logger; } @@ -11,7 +12,6 @@ namespace DB { class ReadBufferFromRemoteFSGather; -struct ReadSettings; /** * Reads data from S3/HDFS/Web using stored paths in metadata. @@ -64,6 +64,8 @@ private: std::future asyncReadInto(char * data, size_t size); + ReadSettings read_settings; + IAsynchronousReader & reader; Int32 priority; diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index 26947af23ec..9e688587b05 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -1,7 +1,6 @@ #include "ReadIndirectBufferFromRemoteFS.h" #include -#include namespace DB @@ -17,6 +16,7 @@ ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS( std::shared_ptr impl_, const ReadSettings & settings) : ReadBufferFromFileBase(settings.remote_fs_buffer_size, nullptr, 0) , impl(impl_) + , read_settings(settings) { } @@ -92,24 +92,27 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) bool ReadIndirectBufferFromRemoteFS::nextImpl() { - /// Transfer current position and working_buffer to actual ReadBuffer - swap(*impl); + chassert(internal_buffer.size() == read_settings.remote_fs_buffer_size); + chassert(file_offset_of_buffer_end <= impl->getFileSize()); - assert(!impl->hasPendingData()); - /// Position and working_buffer will be updated in next() call - auto result = impl->next(); - /// and assigned to current buffer. - swap(*impl); + auto [size, offset] = impl->readInto(internal_buffer.begin(), internal_buffer.size(), file_offset_of_buffer_end, /* ignore */0); - if (result) + chassert(offset <= size); + chassert(size <= internal_buffer.size()); + + if (size) { - file_offset_of_buffer_end += available(); - BufferBase::set(working_buffer.begin() + offset(), available(), 0); + file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); + working_buffer = Buffer(internal_buffer.begin() + offset, internal_buffer.begin() + size); } - assert(file_offset_of_buffer_end == impl->file_offset_of_buffer_end); + /// In case of multiple files for the same file in clickhouse (i.e. log family) + /// file_offset_of_buffer_end will not match getImplementationBufferOffset() + /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()] + chassert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset()); + chassert(file_offset_of_buffer_end <= impl->getFileSize()); - return result; + return size; } } diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index 996e69296a6..6c3d05a8ac8 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -9,7 +10,6 @@ namespace DB { class ReadBufferFromRemoteFSGather; -struct ReadSettings; /** * Reads data from S3/HDFS/Web using stored paths in metadata. @@ -40,6 +40,8 @@ private: std::shared_ptr impl; + ReadSettings read_settings; + size_t file_offset_of_buffer_end = 0; }; From c1cc04d44d6d5404df63577623a98c4b37f2fd89 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 6 Oct 2022 20:27:55 +0200 Subject: [PATCH 177/266] Revert #27787 --- src/Storages/AlterCommands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index d68252679a7..dcd7abae68a 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -755,9 +755,9 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) const auto * nullable_from = typeid_cast(from); const auto * nullable_to = typeid_cast(to); - if (nullable_to) + if (nullable_from && nullable_to) { - from = nullable_from ? nullable_from->getNestedType().get() : from; + from = nullable_from->getNestedType().get(); to = nullable_to->getNestedType().get(); continue; } From d1886018818acb399e3a266f4dac659bc9359a6d Mon Sep 17 00:00:00 2001 From: BoloniniD Date: Thu, 6 Oct 2022 21:41:26 +0300 Subject: [PATCH 178/266] Fix style --- src/Functions/FunctionsHashing.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index f970c98fe0b..62262faf7c1 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -620,17 +620,17 @@ struct ImplXxHash64 static constexpr bool use_int_hash_for_pods = false; }; - struct ImplBLAKE3 { static constexpr auto name = "BLAKE3"; enum { length = 32 }; #if !USE_BLAKE3 - [[noreturn]] static void apply(const char * begin, const size_t size, unsigned char* out_char_data) { - (void) begin; - (void) size; - (void) out_char_data; + [[noreturn]] static void apply(const char * begin, const size_t size, unsigned char* out_char_data) + { + UNUSED(begin); + UNUSED(size); + UNUSED(out_char_data); throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "BLAKE3 is not available. Rust code or BLAKE3 itself may be disabled."); } #else @@ -652,8 +652,6 @@ struct ImplBLAKE3 #endif }; - - template class FunctionStringHashFixedString : public IFunction { From ef62f32dd0576612edd6b21fdd15d5a33756a9c9 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Thu, 6 Oct 2022 14:42:42 -0400 Subject: [PATCH 179/266] Update codespell-ignore-words.list --- utils/check-style/codespell-ignore-words.list | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index fc021920041..98a23e8bce9 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -21,3 +21,4 @@ rightt iiterator hastable nam +ubuntu From 700deb152f65eb8b39f4badc491836c08b43027a Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 6 Oct 2022 14:47:12 -0400 Subject: [PATCH 180/266] add toolchain to ignore --- utils/check-style/codespell-ignore-words.list | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index 98a23e8bce9..f331e222541 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -22,3 +22,4 @@ iiterator hastable nam ubuntu +toolchain From e3f341e97845f733258fd34e2bc1532cbddfe571 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 6 Oct 2022 20:02:23 +0000 Subject: [PATCH 181/266] Added test. --- .../02456_alter-nullable-column-bag.reference | 1 + .../02456_alter-nullable-column-bag.sql | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/02456_alter-nullable-column-bag.reference create mode 100644 tests/queries/0_stateless/02456_alter-nullable-column-bag.sql diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag.reference b/tests/queries/0_stateless/02456_alter-nullable-column-bag.reference new file mode 100644 index 00000000000..fa033ae7677 --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag.reference @@ -0,0 +1 @@ +1,"one",1,0 diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag.sql b/tests/queries/0_stateless/02456_alter-nullable-column-bag.sql new file mode 100644 index 00000000000..6fab3fa3735 --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS column_modify_test; + +CREATE TABLE column_modify_test (id UInt64, val String, other_col UInt64) engine=MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part=0; +INSERT INTO column_modify_test VALUES (1,'one',0); +INSERT INTO column_modify_test VALUES (2,'two',0); + +-- on 21.9 that was done via mutations mechanism +ALTER TABLE column_modify_test MODIFY COLUMN val Nullable(String); + +-- but since 21.10 it only applies that to new part, so old parts keep the old schema +--SELECT * FROM system.mutations; + +INSERT INTO column_modify_test VALUES (3,Null,0); + +--select name, path, type, active, modification_time from system.parts_columns where table='column_modify_test' and column='val'; + +-- till now everythings looks ok +--SELECT * FROM column_modify_test; + +-- now we do mutation. It will affect one of the parts +-- and it what part it will update columns.txt to the latest 'correct' state w/o updating the column file! +alter table column_modify_test update other_col=1 where id = 1 SETTINGS mutations_sync=1; + +-- row 1 is damaged now: the column files of val columns & columns.txt is out of sync! +SELECT *, throwIf(val <> 'one') FROM column_modify_test WHERE id = 1 FORMAT CSV; + From fc1de0a56a192f62598d447137457724231f09ee Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Oct 2022 01:10:33 +0000 Subject: [PATCH 182/266] move some functionality to Server::buildProtocolStackFromConfig --- programs/server/Server.cpp | 109 +++++++++++++++++++++---------------- programs/server/Server.h | 9 +++ 2 files changed, 70 insertions(+), 48 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3154af81ae8..f0e15ea536d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -85,7 +85,6 @@ #include #include #include -#include #include #include #include @@ -1858,27 +1857,13 @@ int Server::main(const std::vector & /*args*/) return Application::EXIT_OK; } - -void Server::createServers( - Poco::Util::AbstractConfiguration & config, - const Strings & listen_hosts, - const Strings & interserver_listen_hosts, - bool listen_try, - Poco::ThreadPool & server_pool, +std::unique_ptr Server::buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, AsynchronousMetrics & async_metrics, - std::vector & servers, - bool start_servers) + bool & is_secure) { - const Settings & settings = global_context->getSettingsRef(); - - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); - - Poco::Util::AbstractConfiguration::Keys protocols; - config.keys("protocols", protocols); - auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") @@ -1914,6 +1899,61 @@ void Server::createServers( throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); }; + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset {conf_name}; + + auto stack = std::make_unique(*this, conf_name); + + while (true) + { + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + + return stack; +} + +void Server::createServers( + Poco::Util::AbstractConfiguration & config, + const Strings & listen_hosts, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + for (const auto & protocol : protocols) { std::vector hosts; @@ -1926,7 +1966,6 @@ void Server::createServers( { std::string conf_name = "protocols." + protocol; std::string prefix = conf_name + "."; - std::unordered_set pset {conf_name}; if (!config.has(prefix + "port")) continue; @@ -1936,33 +1975,7 @@ void Server::createServers( description = config.getString(prefix + "description"); std::string port_name = prefix + "port"; bool is_secure = false; - auto stack = std::make_unique(*this, conf_name); - - while (true) - { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) - { - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; - } - - stack->append(create_factory(type, conf_name)); - } - - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } + auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); if (stack->empty()) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); diff --git a/programs/server/Server.h b/programs/server/Server.h index 44a5a441e43..53841b1fcd4 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,6 +3,8 @@ #include #include +#include +#include /** Server provides three interfaces: * 1. HTTP - simple interface for any applications. @@ -77,6 +79,13 @@ private: UInt16 port, [[maybe_unused]] bool secure = false) const; + std::unique_ptr buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure); + using CreateServerFunc = std::function; void createServer( Poco::Util::AbstractConfiguration & config, From 997fa5e2953f6b811bd6f8ad5fe44118df598496 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Oct 2022 01:16:22 +0000 Subject: [PATCH 183/266] review suggestions --- src/Server/TCPProtocolStackFactory.h | 2 +- tests/integration/test_composable_protocols/configs/users.xml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 448b019b849..16b57649a72 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -81,7 +81,7 @@ public: void append(TCPServerConnectionFactory::Ptr factory) { - stack.push_back(factory); + stack.push_back(std::move(factory)); } size_t size() { return stack.size(); } diff --git a/tests/integration/test_composable_protocols/configs/users.xml b/tests/integration/test_composable_protocols/configs/users.xml index 6f94d1696e3..da8425b3695 100644 --- a/tests/integration/test_composable_protocols/configs/users.xml +++ b/tests/integration/test_composable_protocols/configs/users.xml @@ -1,8 +1,6 @@ - 10000000000 - 64999 From 76d6204f994094d1fe579a1f9633bd013841e667 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 6 Oct 2022 14:26:17 +0200 Subject: [PATCH 184/266] JIT compilation migration to LLVM 15 --- src/AggregateFunctions/AggregateFunctionAvg.h | 20 +- .../AggregateFunctionAvgWeighted.h | 7 +- .../AggregateFunctionBitwise.h | 13 +- .../AggregateFunctionCount.h | 16 +- .../AggregateFunctionIf.cpp | 6 +- .../AggregateFunctionMinMaxAny.h | 22 +- .../AggregateFunctionNull.h | 22 +- src/AggregateFunctions/AggregateFunctionSum.h | 10 +- src/DataTypes/Native.h | 6 + src/Interpreters/JIT/CHJIT.h | 1 + src/Interpreters/JIT/compileFunction.cpp | 825 ++++++------------ 11 files changed, 320 insertions(+), 628 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index cd7f1c4ea65..ee46a40023d 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -153,10 +153,10 @@ public: auto * numerator_type = toNativeType(b); - auto * numerator_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, numerator_type->getPointerTo()); + auto * numerator_dst_ptr = aggregate_data_dst_ptr; auto * numerator_dst_value = b.CreateLoad(numerator_type, numerator_dst_ptr); - auto * numerator_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, numerator_type->getPointerTo()); + auto * numerator_src_ptr = aggregate_data_src_ptr; auto * numerator_src_value = b.CreateLoad(numerator_type, numerator_src_ptr); auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_dst_value, numerator_src_value) : b.CreateFAdd(numerator_dst_value, numerator_src_value); @@ -164,10 +164,8 @@ public: auto * denominator_type = toNativeType(b); static constexpr size_t denominator_offset = offsetof(Fraction, denominator); - auto * ty_aggregate_data_dst_ptr = llvm::cast(aggregate_data_dst_ptr->getType()->getScalarType())->getElementType(); - auto * denominator_dst_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(ty_aggregate_data_dst_ptr, aggregate_data_dst_ptr, denominator_offset), denominator_type->getPointerTo()); - auto * ty_aggregate_data_src_ptr = llvm::cast(aggregate_data_src_ptr->getType()->getScalarType())->getElementType(); - auto * denominator_src_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(ty_aggregate_data_src_ptr, aggregate_data_src_ptr, denominator_offset), denominator_type->getPointerTo()); + auto * denominator_dst_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_dst_ptr, denominator_offset); + auto * denominator_src_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_src_ptr, denominator_offset); auto * denominator_dst_value = b.CreateLoad(denominator_type, denominator_dst_ptr); auto * denominator_src_value = b.CreateLoad(denominator_type, denominator_src_ptr); @@ -181,13 +179,12 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * numerator_type = toNativeType(b); - auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo()); + auto * numerator_ptr = aggregate_data_ptr; auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr); auto * denominator_type = toNativeType(b); static constexpr size_t denominator_offset = offsetof(Fraction, denominator); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(ty_aggregate_data_ptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo()); + auto * denominator_ptr = b.CreateConstGEP1_32(b.getInt8Ty(), aggregate_data_ptr, denominator_offset); auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr); auto * double_numerator = nativeCast(b, numerator_value, b.getDoubleTy()); @@ -306,7 +303,7 @@ public: auto * numerator_type = toNativeType(b); - auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo()); + auto * numerator_ptr = aggregate_data_ptr; auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr); auto * value_cast_to_numerator = nativeCast(b, arguments_types[0], argument_values[0], numerator_type); auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_cast_to_numerator) : b.CreateFAdd(numerator_value, value_cast_to_numerator); @@ -314,8 +311,7 @@ public: auto * denominator_type = toNativeType(b); static constexpr size_t denominator_offset = offsetof(Fraction, denominator); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(ty_aggregate_data_ptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo()); + auto * denominator_ptr = b.CreateConstGEP1_32(b.getInt8Ty(), aggregate_data_ptr, denominator_offset); auto * denominator_value_updated = b.CreateAdd(b.CreateLoad(denominator_type, denominator_ptr), llvm::ConstantInt::get(denominator_type, 1)); b.CreateStore(denominator_value_updated, denominator_ptr); } diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h index bd9e12b97b6..bc3e3a32a71 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h @@ -60,8 +60,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * numerator_type = toNativeType(b); - - auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo()); + auto * numerator_ptr = aggregate_data_ptr; auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr); auto * argument = nativeCast(b, arguments_types[0], argument_values[0], numerator_type); @@ -74,9 +73,7 @@ public: auto * denominator_type = toNativeType(b); static constexpr size_t denominator_offset = offsetof(Fraction, denominator); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * denominator_offset_ptr = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, denominator_offset); - auto * denominator_ptr = b.CreatePointerCast(denominator_offset_ptr, denominator_type->getPointerTo()); + auto * denominator_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, denominator_offset); auto * weight_cast_to_denominator = nativeCast(b, arguments_types[1], argument_values[1], denominator_type); diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index 2b46f86bf30..b8d3bc79007 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -143,10 +143,7 @@ public: void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override { - llvm::IRBuilder<> & b = static_cast &>(builder); - - auto * return_type = toNativeType(b, getReturnType()); - auto * value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * value_ptr = aggregate_data_ptr; Data::compileCreate(builder, value_ptr); } @@ -156,7 +153,7 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * value_ptr = aggregate_data_ptr; auto * value = b.CreateLoad(return_type, value_ptr); const auto & argument_value = argument_values[0]; @@ -171,10 +168,10 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo()); + auto * value_dst_ptr = aggregate_data_dst_ptr; auto * value_dst = b.CreateLoad(return_type, value_dst_ptr); - auto * value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo()); + auto * value_src_ptr = aggregate_data_src_ptr; auto * value_src = b.CreateLoad(return_type, value_src_ptr); auto * result_value = Data::compileUpdate(builder, value_dst, value_src); @@ -187,7 +184,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * return_type = toNativeType(b, getReturnType()); - auto * value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, value_ptr); } diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 3e53190ae8c..6e2c86f065b 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -169,7 +169,7 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * count_value_ptr = aggregate_data_ptr; auto * count_value = b.CreateLoad(return_type, count_value_ptr); auto * updated_count_value = b.CreateAdd(count_value, llvm::ConstantInt::get(return_type, 1)); @@ -182,10 +182,10 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * count_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo()); + auto * count_value_dst_ptr = aggregate_data_dst_ptr; auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr); - auto * count_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo()); + auto * count_value_src_ptr = aggregate_data_src_ptr; auto * count_value_src = b.CreateLoad(return_type, count_value_src_ptr); auto * count_value_dst_updated = b.CreateAdd(count_value_dst, count_value_src); @@ -198,7 +198,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * return_type = toNativeType(b, getReturnType()); - auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * count_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, count_value_ptr); } @@ -316,7 +316,7 @@ public: auto * is_null_value = b.CreateExtractValue(values[0], {1}); auto * increment_value = b.CreateSelect(is_null_value, llvm::ConstantInt::get(return_type, 0), llvm::ConstantInt::get(return_type, 1)); - auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * count_value_ptr = aggregate_data_ptr; auto * count_value = b.CreateLoad(return_type, count_value_ptr); auto * updated_count_value = b.CreateAdd(count_value, increment_value); @@ -329,10 +329,10 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * count_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo()); + auto * count_value_dst_ptr = aggregate_data_dst_ptr; auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr); - auto * count_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo()); + auto * count_value_src_ptr = aggregate_data_src_ptr; auto * count_value_src = b.CreateLoad(return_type, count_value_src_ptr); auto * count_value_dst_updated = b.CreateAdd(count_value_dst, count_value_src); @@ -345,7 +345,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * return_type = toNativeType(b, getReturnType()); - auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * count_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, count_value_ptr); } diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index 9b548e1b3f3..c32454b10e4 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -207,8 +207,7 @@ public: if constexpr (result_is_nullable) b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size); + auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, this->prefix_size); this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value }); b.CreateBr(join_block); @@ -420,8 +419,7 @@ public: if constexpr (result_is_nullable) b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size); + auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, this->prefix_size); this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values); b.CreateBr(join_block); diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index f8418f7a055..783fa0606b5 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -199,11 +199,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); static constexpr size_t value_offset_from_structure = offsetof(SingleValueDataFixed, value); - - auto * type = toNativeType(builder); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * value_ptr_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, value_offset_from_structure); - auto * value_ptr = b.CreatePointerCast(value_ptr_with_offset, type->getPointerTo()); + auto * value_ptr = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, value_offset_from_structure); return value_ptr; } @@ -222,7 +218,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_ptr = aggregate_data_ptr; b.CreateStore(b.getInt1(true), has_value_ptr); auto * value_ptr = getValuePtrFromAggregateDataPtr(b, aggregate_data_ptr); @@ -240,7 +236,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_ptr = aggregate_data_ptr; auto * has_value_value = b.CreateLoad(b.getInt1Ty(), has_value_ptr); auto * head = b.GetInsertBlock(); @@ -265,10 +261,10 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * has_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_dst_ptr = aggregate_data_dst_ptr; auto * has_value_dst = b.CreateLoad(b.getInt1Ty(), has_value_dst_ptr); - auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_src_ptr = aggregate_data_src_ptr; auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr); auto * head = b.GetInsertBlock(); @@ -298,7 +294,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_src_ptr = aggregate_data_src_ptr; auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr); auto * head = b.GetInsertBlock(); @@ -324,7 +320,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_ptr = aggregate_data_ptr; auto * has_value_value = b.CreateLoad(b.getInt1Ty(), has_value_ptr); auto * value = getValueFromAggregateDataPtr(b, aggregate_data_ptr); @@ -371,12 +367,12 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * has_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_dst_ptr = aggregate_data_dst_ptr; auto * has_value_dst = b.CreateLoad(b.getInt1Ty(), has_value_dst_ptr); auto * value_dst = getValueFromAggregateDataPtr(b, aggregate_data_dst_ptr); - auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo()); + auto * has_value_src_ptr = aggregate_data_src_ptr; auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr); auto * value_src = getValueFromAggregateDataPtr(b, aggregate_data_src_ptr); diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 0af45e96ae4..f3a3f55972f 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -225,8 +225,7 @@ public: if constexpr (result_is_nullable) b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), this->prefix_size, llvm::assumeAligned(this->alignOfData())); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size); + auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, this->prefix_size); this->nested_function->compileCreate(b, aggregate_data_ptr_with_prefix_size_offset); } @@ -236,18 +235,16 @@ public: if constexpr (result_is_nullable) { - auto * aggregate_data_is_null_dst_value = b.CreateLoad(aggregate_data_dst_ptr->getType()->getPointerElementType(), aggregate_data_dst_ptr); - auto * aggregate_data_is_null_src_value = b.CreateLoad(aggregate_data_src_ptr->getType()->getPointerElementType(), aggregate_data_src_ptr); + auto * aggregate_data_is_null_dst_value = b.CreateLoad(b.getInt8Ty(), aggregate_data_dst_ptr); + auto * aggregate_data_is_null_src_value = b.CreateLoad(b.getInt8Ty(), aggregate_data_src_ptr); auto * is_src_null = nativeBoolCast(b, std::make_shared(), aggregate_data_is_null_src_value); auto * is_null_result_value = b.CreateSelect(is_src_null, llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_is_null_dst_value); b.CreateStore(is_null_result_value, aggregate_data_dst_ptr); } - auto * ty_aggregate_data_dst_ptr = llvm::cast(aggregate_data_dst_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_dst_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_dst_ptr, aggregate_data_dst_ptr, this->prefix_size); - auto * ty_aggregate_data_src_ptr = llvm::cast(aggregate_data_src_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_src_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_src_ptr, aggregate_data_src_ptr, this->prefix_size); + auto * aggregate_data_dst_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_dst_ptr, this->prefix_size); + auto * aggregate_data_src_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_src_ptr, this->prefix_size); this->nested_function->compileMerge(b, aggregate_data_dst_ptr_with_prefix_size_offset, aggregate_data_src_ptr_with_prefix_size_offset); } @@ -281,8 +278,7 @@ public: b.CreateBr(join_block); b.SetInsertPoint(if_not_null); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size); + auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, this->prefix_size); auto * nested_result = this->nested_function->compileGetResult(builder, aggregate_data_ptr_with_prefix_size_offset); b.CreateStore(b.CreateInsertValue(nullable_value, nested_result, {0}), nullable_value_ptr); b.CreateBr(join_block); @@ -378,8 +374,7 @@ public: if constexpr (result_is_nullable) b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size); + auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, this->prefix_size); this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value }); b.CreateBr(join_block); @@ -603,8 +598,7 @@ public: if constexpr (result_is_nullable) b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr); - auto * ty_aggregate_data_ptr = llvm::cast(aggregate_data_ptr->getType()->getScalarType())->getElementType(); - auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size); + auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_ptr, this->prefix_size); this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, arguments_types, wrapped_values); b.CreateBr(join_block); diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 8e24b288fff..ae6c51134f9 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -559,7 +559,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * return_type = toNativeType(b, getReturnType()); - auto * aggregate_sum_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * aggregate_sum_ptr = aggregate_data_ptr; b.CreateStore(llvm::Constant::getNullValue(return_type), aggregate_sum_ptr); } @@ -570,7 +570,7 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * sum_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * sum_value_ptr = aggregate_data_ptr; auto * sum_value = b.CreateLoad(return_type, sum_value_ptr); const auto & argument_type = arguments_types[0]; @@ -588,10 +588,10 @@ public: auto * return_type = toNativeType(b, getReturnType()); - auto * sum_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo()); + auto * sum_value_dst_ptr = aggregate_data_dst_ptr; auto * sum_value_dst = b.CreateLoad(return_type, sum_value_dst_ptr); - auto * sum_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo()); + auto * sum_value_src_ptr = aggregate_data_src_ptr; auto * sum_value_src = b.CreateLoad(return_type, sum_value_src_ptr); auto * sum_return_value = sum_value_dst->getType()->isIntegerTy() ? b.CreateAdd(sum_value_dst, sum_value_src) : b.CreateFAdd(sum_value_dst, sum_value_src); @@ -603,7 +603,7 @@ public: llvm::IRBuilder<> & b = static_cast &>(builder); auto * return_type = toNativeType(b, getReturnType()); - auto * sum_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * sum_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, sum_value_ptr); } diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index ab00ad0e2e0..40086b14a0c 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -30,6 +30,12 @@ static inline bool typeIsSigned(const IDataType & type) return data_type.isNativeInt() || data_type.isFloat() || data_type.isEnum(); } +static inline llvm::Type * toNullableType(llvm::IRBuilderBase & builder, llvm::Type * type) +{ + auto * is_null_type = builder.getInt1Ty(); + return llvm::StructType::get(type, is_null_type); +} + static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type) { WhichDataType data_type(type); diff --git a/src/Interpreters/JIT/CHJIT.h b/src/Interpreters/JIT/CHJIT.h index 3efbfa4daf9..cde1129c010 100644 --- a/src/Interpreters/JIT/CHJIT.h +++ b/src/Interpreters/JIT/CHJIT.h @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index cf8abe6c3ee..d3a7eb0cfaa 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -13,13 +13,17 @@ namespace { - struct ColumnDataPlaceholder - { - llvm::Value * data_init = nullptr; /// first row - llvm::Value * null_init = nullptr; - llvm::PHINode * data = nullptr; /// current row - llvm::PHINode * null = nullptr; - }; + +struct ColumnDataPlaceholder +{ + /// Pointer to column raw data + llvm::Value * data_ptr = nullptr; + /// Data type of column raw data element + llvm::Type * data_element_type = nullptr; + /// Pointer to null column raw data. Data type UInt8 + llvm::Value * null_data_ptr = nullptr; +}; + } namespace ProfileEvents @@ -39,16 +43,17 @@ namespace ErrorCodes ColumnData getColumnData(const IColumn * column) { - ColumnData result; const bool is_const = isColumnConst(*column); if (is_const) throw Exception(ErrorCodes::LOGICAL_ERROR, "Input columns should not be constant"); + ColumnData result; + if (const auto * nullable = typeid_cast(column)) { result.null_data = nullable->getNullMapColumn().getRawData().data(); - column = & nullable->getNestedColumn(); + column = &nullable->getNestedColumn(); } result.data = column->getRawData().data(); @@ -58,92 +63,7 @@ ColumnData getColumnData(const IColumn * column) static void compileFunction(llvm::Module & module, const IFunctionBase & function) { - /** Algorithm is to create a loop that iterate over ColumnDataRowsSize size_t argument and - * over ColumnData data and null_data. On each step compiled expression from function - * will be executed over column data and null_data row. - * - * Example of preudocode of generated instructions of function with 1 input column. - * In case of multiple columns more column_i_data, column_i_null_data is created. - * - * void compiled_function(size_t rows_count, ColumnData * columns) - * { - * /// Initialize column values - * - * Column0Type * column_0_data = static_cast(columns[0].data); - * UInt8 * column_0_null_data = static_cast(columns[0].null_data); - * - * /// Initialize other input columns data with indexes < input_columns_count - * - * ResultType * result_column_data = static_cast(columns[input_columns_count].data); - * UInt8 * result_column_null_data = static_cast(columns[input_columns_count].data); - * - * if (rows_count == 0) - * goto end; - * - * /// Loop - * - * size_t counter = 0; - * - * loop: - * - * /// Create column values tuple in case of non nullable type it is just column value - * /// In case of nullable type it is tuple of column value and is column row nullable - * - * Column0Tuple column_0_value; - * if (Column0Type is nullable) - * { - * value[0] = column_0_data; - * value[1] = static_cast(column_1_null_data); - * } - * else - * { - * value[0] = column_0_data - * } - * - * /// Initialize other input column values tuple with indexes < input_columns_count - * /// execute_compiled_expressions function takes input columns values and must return single result value - * - * if (ResultType is nullable) - * { - * (ResultType, bool) result_column_value = execute_compiled_expressions(column_0_value, ...); - * *result_column_data = result_column_value[0]; - * *result_column_null_data = static_cast(result_column_value[1]); - * } - * else - * { - * ResultType result_column_value = execute_compiled_expressions(column_0_value, ...); - * *result_column_data = result_column_value; - * } - * - * /// Increment input and result column current row pointer - * - * ++column_0_data; - * if (Column 0 type is nullable) - * { - * ++column_0_null_data; - * } - * - * ++result_column_data; - * if (ResultType is nullable) - * { - * ++result_column_null_data; - * } - * - * /// Increment loop counter and check if we should exit. - * - * ++counter; - * if (counter == rows_count) - * goto end; - * else - * goto loop; - * - * /// End - * end: - * return; - * } - */ - - const auto & arg_types = function.getArgumentTypes(); + const auto & function_argument_types = function.getArgumentTypes(); llvm::IRBuilder<> b(module.getContext()); auto * size_type = b.getIntNTy(sizeof(size_t) * 8); @@ -162,13 +82,14 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", func); b.SetInsertPoint(entry); - std::vector columns(arg_types.size() + 1); - for (size_t i = 0; i <= arg_types.size(); ++i) + std::vector columns(function_argument_types.size() + 1); + for (size_t i = 0; i <= function_argument_types.size(); ++i) { - const auto & type = i == arg_types.size() ? function.getResultType() : arg_types[i]; + const auto & function_argument_type = i == function_argument_types.size() ? function.getResultType() : function_argument_types[i]; auto * data = b.CreateLoad(data_type, b.CreateConstInBoundsGEP1_64(data_type, columns_arg, i)); - columns[i].data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(type))->getPointerTo()); - columns[i].null_init = type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr; + columns[i].data_ptr = b.CreateExtractValue(data, {0}); + columns[i].data_element_type = toNativeType(b, removeNullable(function_argument_type)); + columns[i].null_data_ptr = function_argument_type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr; } /// Initialize loop @@ -179,74 +100,61 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio b.SetInsertPoint(loop); + /// Loop + auto * counter_phi = b.CreatePHI(rows_count_arg->getType(), 2); counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry); - for (auto & col : columns) - { - col.data = b.CreatePHI(col.data_init->getType(), 2); - col.data->addIncoming(col.data_init, entry); - if (col.null_init) - { - col.null = b.CreatePHI(col.null_init->getType(), 2); - col.null->addIncoming(col.null_init, entry); - } - } - /// Initialize column row values Values arguments; - arguments.reserve(arg_types.size()); + arguments.reserve(function_argument_types.size()); - for (size_t i = 0; i < arg_types.size(); ++i) + for (size_t i = 0; i < function_argument_types.size(); ++i) { auto & column = columns[i]; - const auto & type = arg_types[i]; + const auto & type = function_argument_types[i]; + + auto * column_data_ptr = column.data_ptr; + auto * column_element_value = b.CreateLoad(column.data_element_type, b.CreateGEP(column.data_element_type, column_data_ptr, counter_phi)); - auto * value = b.CreateLoad(toNativeType(b, removeNullable(type)), column.data); if (!type->isNullable()) { - arguments.emplace_back(value); + arguments.emplace_back(column_element_value); continue; } - auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column.null), b.getInt8(0)); - auto * nullable_unitilized = llvm::Constant::getNullValue(toNativeType(b, type)); - auto * nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, value, {0}), is_null, {1}); + auto * column_is_null_element_value = b.CreateLoad(b.getInt8Ty(), b.CreateGEP(b.getInt8Ty(), column.null_data_ptr, counter_phi)); + auto * is_null = b.CreateICmpNE(column_is_null_element_value, b.getInt8(0)); + auto * nullable_unitialized = llvm::Constant::getNullValue(toNullableType(b, column.data_element_type)); + auto * nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitialized, column_element_value, {0}), is_null, {1}); arguments.emplace_back(nullable_value); } /// Compile values for column rows and store compiled value in result column auto * result = function.compile(b, std::move(arguments)); - if (columns.back().null) + auto * result_column_element_ptr = b.CreateGEP(columns.back().data_element_type, columns.back().data_ptr, counter_phi); + + if (columns.back().null_data_ptr) { - b.CreateStore(b.CreateExtractValue(result, {0}), columns.back().data); - b.CreateStore(b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)), columns.back().null); + b.CreateStore(b.CreateExtractValue(result, {0}), result_column_element_ptr); + auto * result_column_is_null_element_ptr = b.CreateGEP(b.getInt8Ty(), columns.back().null_data_ptr, counter_phi); + auto * is_result_column_element_null = b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)); + b.CreateStore(is_result_column_element_null, result_column_is_null_element_ptr); } else { - b.CreateStore(result, columns.back().data); + b.CreateStore(result, result_column_element_ptr); } /// End of loop - auto * cur_block = b.GetInsertBlock(); - for (auto & col : columns) - { - auto * ty_data = llvm::cast(col.data->getType()->getScalarType())->getElementType(); - col.data->addIncoming(b.CreateConstInBoundsGEP1_64(ty_data, col.data, 1), cur_block); - if (col.null) - { - auto * ty_null = llvm::cast(col.null->getType()->getScalarType())->getElementType(); - col.null->addIncoming(b.CreateConstInBoundsGEP1_64(ty_null, col.null, 1), cur_block); - } - } + auto * current_block = b.GetInsertBlock(); + auto * incremeted_counter = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); + counter_phi->addIncoming(incremeted_counter, current_block); - auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); - counter_phi->addIncoming(value, cur_block); - - b.CreateCondBr(b.CreateICmpEQ(value, rows_count_arg), end, loop); + b.CreateCondBr(b.CreateICmpEQ(incremeted_counter, rows_count_arg), end, loop); b.SetInsertPoint(end); b.CreateRetVoid(); @@ -292,32 +200,46 @@ static void compileCreateAggregateStatesFunctions(llvm::Module & module, const s auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", create_aggregate_states_function); b.SetInsertPoint(entry); - std::vector columns(functions.size()); for (const auto & function_to_compile : functions) { size_t aggregate_function_offset = function_to_compile.aggregate_data_offset; + auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_arg, aggregate_function_offset); + const auto * aggregate_function = function_to_compile.function; - auto * ty_aggregate_data_place_arg = llvm::cast(aggregate_data_place_arg->getType()->getScalarType())->getElementType(); - auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_place_arg, aggregate_data_place_arg, aggregate_function_offset); aggregate_function->compileCreate(b, aggregation_place_with_offset); } b.CreateRetVoid(); } -static void compileAddIntoAggregateStatesFunctions(llvm::Module & module, const std::vector & functions, const std::string & name) +enum class AddIntoAggregateStatesPlacesArgumentType +{ + SinglePlace, + MultiplePlaces, +}; + +static void compileAddIntoAggregateStatesFunctions(llvm::Module & module, + const std::vector & functions, + const std::string & name, + AddIntoAggregateStatesPlacesArgumentType places_argument_type) { auto & context = module.getContext(); llvm::IRBuilder<> b(context); auto * size_type = b.getIntNTy(sizeof(size_t) * 8); - auto * places_type = b.getInt8Ty()->getPointerTo()->getPointerTo(); - auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); + llvm::Type * places_type = nullptr; - auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_data_type->getPointerTo(), places_type }, false); - auto * aggregate_loop_func_definition = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module); + if (places_argument_type == AddIntoAggregateStatesPlacesArgumentType::MultiplePlaces) + places_type = b.getInt8Ty()->getPointerTo()->getPointerTo(); + else + places_type = b.getInt8Ty()->getPointerTo(); - auto * arguments = aggregate_loop_func_definition->args().begin(); + auto * column_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); + + auto * add_into_aggregate_states_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_type->getPointerTo(), places_type }, false); + auto * add_into_aggregate_states_func = llvm::Function::Create(add_into_aggregate_states_func_declaration, llvm::Function::ExternalLinkage, name, module); + + auto * arguments = add_into_aggregate_states_func->args().begin(); llvm::Value * row_start_arg = arguments++; llvm::Value * row_end_arg = arguments++; llvm::Value * columns_arg = arguments++; @@ -325,41 +247,30 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module, const /// Initialize ColumnDataPlaceholder llvm representation of ColumnData - auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func_definition); + auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", add_into_aggregate_states_func); b.SetInsertPoint(entry); - llvm::IRBuilder<> entry_builder(entry); - auto * ty_places_arg = llvm::cast(places_arg->getType()->getScalarType())->getElementType(); - auto * places_start_arg = entry_builder.CreateInBoundsGEP(ty_places_arg, places_arg, row_start_arg); - std::vector columns; size_t previous_columns_size = 0; for (const auto & function : functions) { auto argument_types = function.function->getArgumentTypes(); + size_t function_arguments_size = argument_types.size(); ColumnDataPlaceholder data_placeholder; - size_t function_arguments_size = argument_types.size(); - for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index) { const auto & argument_type = argument_types[column_argument_index]; - auto * data = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_arg, previous_columns_size + column_argument_index)); - data_placeholder.data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(argument_type))->getPointerTo()); - auto * ty_data_init = llvm::cast(data_placeholder.data_init->getType()->getScalarType())->getElementType(); - data_placeholder.data_init = entry_builder.CreateInBoundsGEP(ty_data_init, data_placeholder.data_init, row_start_arg); + auto * data = b.CreateLoad(column_type, b.CreateConstInBoundsGEP1_64(column_type, columns_arg, previous_columns_size + column_argument_index)); + + data_placeholder.data_ptr = b.CreateExtractValue(data, {0}); + data_placeholder.data_element_type = toNativeType(b, removeNullable(argument_type)); + if (argument_type->isNullable()) - { - data_placeholder.null_init = b.CreateExtractValue(data, {1}); - auto * ty_null_init = llvm::cast(data_placeholder.null_init->getType()->getScalarType())->getElementType(); - data_placeholder.null_init = entry_builder.CreateInBoundsGEP(ty_null_init, data_placeholder.null_init, row_start_arg); - } - else - { - data_placeholder.null_init = nullptr; - } + data_placeholder.null_data_ptr = b.CreateExtractValue(data, {1}); + columns.emplace_back(data_placeholder); } @@ -368,238 +279,70 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module, const /// Initialize loop - auto * end = llvm::BasicBlock::Create(b.getContext(), "end", aggregate_loop_func_definition); - auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", aggregate_loop_func_definition); + auto * end = llvm::BasicBlock::Create(b.getContext(), "end", add_into_aggregate_states_func); + auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", add_into_aggregate_states_func); b.CreateCondBr(b.CreateICmpEQ(row_start_arg, row_end_arg), end, loop); b.SetInsertPoint(loop); + /// Loop + auto * counter_phi = b.CreatePHI(row_start_arg->getType(), 2); counter_phi->addIncoming(row_start_arg, entry); - auto * places_phi = b.CreatePHI(places_start_arg->getType(), 2); - places_phi->addIncoming(places_start_arg, entry); + llvm::Value * aggregation_place = nullptr; - for (auto & col : columns) - { - col.data = b.CreatePHI(col.data_init->getType(), 2); - col.data->addIncoming(col.data_init, entry); - - if (col.null_init) - { - col.null = b.CreatePHI(col.null_init->getType(), 2); - col.null->addIncoming(col.null_init, entry); - } - } - - auto * aggregation_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), places_phi); + if (places_argument_type == AddIntoAggregateStatesPlacesArgumentType::MultiplePlaces) + aggregation_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), b.CreateGEP(b.getInt8Ty()->getPointerTo(), places_arg, counter_phi)); + else + aggregation_place = places_arg; + std::vector function_arguments_values; previous_columns_size = 0; + for (const auto & function : functions) { - size_t aggregate_function_offset = function.aggregate_data_offset; - const auto * aggregate_function_ptr = function.function; - auto arguments_types = function.function->getArgumentTypes(); - std::vector arguments_values; - size_t function_arguments_size = arguments_types.size(); - arguments_values.resize(function_arguments_size); for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index) { - auto * column_argument_data = columns[previous_columns_size + column_argument_index].data; - auto * column_argument_null_data = columns[previous_columns_size + column_argument_index].null; - + auto & column = columns[previous_columns_size + column_argument_index]; auto & argument_type = arguments_types[column_argument_index]; - auto * value = b.CreateLoad(toNativeType(b, removeNullable(argument_type)), column_argument_data); + auto * column_data_element = b.CreateLoad(column.data_element_type, b.CreateGEP(column.data_element_type, column.data_ptr, counter_phi)); + if (!argument_type->isNullable()) { - arguments_values[column_argument_index] = value; + function_arguments_values.push_back(column_data_element); continue; } - auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column_argument_null_data), b.getInt8(0)); - auto * nullable_unitilized = llvm::Constant::getNullValue(toNativeType(b, argument_type)); - auto * nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, value, {0}), is_null, {1}); - arguments_values[column_argument_index] = nullable_value; + auto * column_null_data_with_offset = b.CreateGEP(b.getInt8Ty(), column.null_data_ptr, counter_phi); + auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column_null_data_with_offset), b.getInt8(0)); + auto * nullable_unitialized = llvm::Constant::getNullValue(toNullableType(b, column.data_element_type)); + auto * first_insert = b.CreateInsertValue(nullable_unitialized, column_data_element, {0}); + auto * nullable_value = b.CreateInsertValue(first_insert, is_null, {1}); + function_arguments_values.push_back(nullable_value); } - auto * ty_aggregation_place = llvm::cast(aggregation_place->getType()->getScalarType())->getElementType(); - auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregation_place, aggregation_place, aggregate_function_offset); - aggregate_function_ptr->compileAdd(b, aggregation_place_with_offset, arguments_types, arguments_values); + size_t aggregate_function_offset = function.aggregate_data_offset; + auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregation_place, aggregate_function_offset); + + const auto * aggregate_function_ptr = function.function; + aggregate_function_ptr->compileAdd(b, aggregation_place_with_offset, arguments_types, function_arguments_values); + + function_arguments_values.clear(); previous_columns_size += function_arguments_size; } /// End of loop - auto * cur_block = b.GetInsertBlock(); - for (auto & col : columns) - { - auto * ty_data = llvm::cast(col.data->getType()->getScalarType())->getElementType(); - col.data->addIncoming(b.CreateConstInBoundsGEP1_64(ty_data, col.data, 1), cur_block); - - if (col.null) - { - auto * ty_null = llvm::cast(col.null->getType()->getScalarType())->getElementType(); - col.null->addIncoming(b.CreateConstInBoundsGEP1_64(ty_null, col.null, 1), cur_block); - } - } - - auto * ty_places_phi = llvm::cast(places_phi->getType()->getScalarType())->getElementType(); - places_phi->addIncoming(b.CreateConstInBoundsGEP1_64(ty_places_phi, places_phi, 1), cur_block); - + auto * current_block = b.GetInsertBlock(); auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); - counter_phi->addIncoming(value, cur_block); - - b.CreateCondBr(b.CreateICmpEQ(value, row_end_arg), end, loop); - - b.SetInsertPoint(end); - b.CreateRetVoid(); -} - -static void compileAddIntoAggregateStatesFunctionsSinglePlace(llvm::Module & module, const std::vector & functions, const std::string & name) -{ - auto & context = module.getContext(); - llvm::IRBuilder<> b(context); - - auto * size_type = b.getIntNTy(sizeof(size_t) * 8); - auto * places_type = b.getInt8Ty()->getPointerTo(); - auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); - - auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_data_type->getPointerTo(), places_type }, false); - auto * aggregate_loop_func_definition = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module); - - auto * arguments = aggregate_loop_func_definition->args().begin(); - llvm::Value * row_start_arg = arguments++; - llvm::Value * row_end_arg = arguments++; - llvm::Value * columns_arg = arguments++; - llvm::Value * place_arg = arguments++; - - /// Initialize ColumnDataPlaceholder llvm representation of ColumnData - - auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func_definition); - b.SetInsertPoint(entry); - - llvm::IRBuilder<> entry_builder(entry); - - std::vector columns; - size_t previous_columns_size = 0; - - for (const auto & function : functions) - { - auto argument_types = function.function->getArgumentTypes(); - - ColumnDataPlaceholder data_placeholder; - - size_t function_arguments_size = argument_types.size(); - - for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index) - { - const auto & argument_type = argument_types[column_argument_index]; - auto * data = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_arg, previous_columns_size + column_argument_index)); - data_placeholder.data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(argument_type))->getPointerTo()); - auto * ty_data_init = llvm::cast(data_placeholder.data_init->getType()->getScalarType())->getElementType(); - data_placeholder.data_init = entry_builder.CreateInBoundsGEP(ty_data_init, data_placeholder.data_init, row_start_arg); - if (argument_type->isNullable()) - { - data_placeholder.null_init = b.CreateExtractValue(data, {1}); - auto * ty_null_init = llvm::cast(data_placeholder.null_init->getType()->getScalarType())->getElementType(); - data_placeholder.null_init = entry_builder.CreateInBoundsGEP(ty_null_init, data_placeholder.null_init, row_start_arg); - } - else - { - data_placeholder.null_init = nullptr; - } - columns.emplace_back(data_placeholder); - } - - previous_columns_size += function_arguments_size; - } - - /// Initialize loop - - auto * end = llvm::BasicBlock::Create(b.getContext(), "end", aggregate_loop_func_definition); - auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", aggregate_loop_func_definition); - - b.CreateCondBr(b.CreateICmpEQ(row_start_arg, row_end_arg), end, loop); - - b.SetInsertPoint(loop); - - auto * counter_phi = b.CreatePHI(row_start_arg->getType(), 2); - counter_phi->addIncoming(row_start_arg, entry); - - for (auto & col : columns) - { - col.data = b.CreatePHI(col.data_init->getType(), 2); - col.data->addIncoming(col.data_init, entry); - - if (col.null_init) - { - col.null = b.CreatePHI(col.null_init->getType(), 2); - col.null->addIncoming(col.null_init, entry); - } - } - - previous_columns_size = 0; - for (const auto & function : functions) - { - size_t aggregate_function_offset = function.aggregate_data_offset; - const auto * aggregate_function_ptr = function.function; - - auto arguments_types = function.function->getArgumentTypes(); - std::vector arguments_values; - - size_t function_arguments_size = arguments_types.size(); - arguments_values.resize(function_arguments_size); - - for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index) - { - auto * column_argument_data = columns[previous_columns_size + column_argument_index].data; - auto * column_argument_null_data = columns[previous_columns_size + column_argument_index].null; - - auto & argument_type = arguments_types[column_argument_index]; - - auto * value = b.CreateLoad(toNativeType(b, removeNullable(argument_type)), column_argument_data); - if (!argument_type->isNullable()) - { - arguments_values[column_argument_index] = value; - continue; - } - - auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column_argument_null_data), b.getInt8(0)); - auto * nullable_unitilized = llvm::Constant::getNullValue(toNativeType(b, argument_type)); - auto * nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, value, {0}), is_null, {1}); - arguments_values[column_argument_index] = nullable_value; - } - - auto * ty_place_arg = llvm::cast(place_arg->getType()->getScalarType())->getElementType(); - auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(ty_place_arg, place_arg, aggregate_function_offset); - aggregate_function_ptr->compileAdd(b, aggregation_place_with_offset, arguments_types, arguments_values); - - previous_columns_size += function_arguments_size; - } - - /// End of loop - - auto * cur_block = b.GetInsertBlock(); - for (auto & col : columns) - { - auto * ty_data = llvm::cast(col.data->getType()->getScalarType())->getElementType(); - col.data->addIncoming(b.CreateConstInBoundsGEP1_64(ty_data, col.data, 1), cur_block); - - if (col.null) - { - auto * ty_null = llvm::cast(col.null->getType()->getScalarType())->getElementType(); - col.null->addIncoming(b.CreateConstInBoundsGEP1_64(ty_null, col.null, 1), cur_block); - } - } - - auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); - counter_phi->addIncoming(value, cur_block); + counter_phi->addIncoming(value, current_block); b.CreateCondBr(b.CreateICmpEQ(value, row_end_arg), end, loop); @@ -609,30 +352,27 @@ static void compileAddIntoAggregateStatesFunctionsSinglePlace(llvm::Module & mod static void compileMergeAggregatesStates(llvm::Module & module, const std::vector & functions, const std::string & name) { - auto & context = module.getContext(); - llvm::IRBuilder<> b(context); + llvm::IRBuilder<> b(module.getContext()); - auto * aggregate_data_places_type = b.getInt8Ty()->getPointerTo(); - auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_places_type, aggregate_data_places_type }, false); - auto * aggregate_loop_func = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module); + auto * aggregate_data_place_type = b.getInt8Ty()->getPointerTo(); + auto * merge_aggregates_states_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_place_type, aggregate_data_place_type }, false); + auto * merge_aggregates_states_func = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module); - auto * arguments = aggregate_loop_func->args().begin(); + auto * arguments = merge_aggregates_states_func->args().begin(); llvm::Value * aggregate_data_place_dst_arg = arguments++; llvm::Value * aggregate_data_place_src_arg = arguments++; - auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func); + auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", merge_aggregates_states_func); b.SetInsertPoint(entry); for (const auto & function_to_compile : functions) { size_t aggregate_function_offset = function_to_compile.aggregate_data_offset; + + auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst_arg, aggregate_function_offset); + auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src_arg, aggregate_function_offset); + const auto * aggregate_function_ptr = function_to_compile.function; - - auto * ty_aggregate_data_place_dst_arg = llvm::cast(aggregate_data_place_dst_arg->getType()->getScalarType())->getElementType(); - auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_place_dst_arg, aggregate_data_place_dst_arg, aggregate_function_offset); - auto * ty_aggregate_data_place_src_arg = llvm::cast(aggregate_data_place_src_arg->getType()->getScalarType())->getElementType(); - auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_place_src_arg, aggregate_data_place_src_arg, aggregate_function_offset); - aggregate_function_ptr->compileMerge(b, aggregate_data_place_merge_dst_with_offset, aggregate_data_place_merge_src_with_offset); } @@ -646,44 +386,37 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons auto * size_type = b.getIntNTy(sizeof(size_t) * 8); - auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); + auto * column_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); auto * aggregate_data_places_type = b.getInt8Ty()->getPointerTo()->getPointerTo(); - auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_data_type->getPointerTo(), aggregate_data_places_type }, false); - auto * aggregate_loop_func = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module); + auto * insert_aggregates_into_result_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, size_type, column_type->getPointerTo(), aggregate_data_places_type }, false); + auto * insert_aggregates_into_result_func = llvm::Function::Create(insert_aggregates_into_result_func_declaration, llvm::Function::ExternalLinkage, name, module); - auto * arguments = aggregate_loop_func->args().begin(); - llvm::Value * row_start_arg = &*arguments++; - llvm::Value * row_end_arg = &*arguments++; - llvm::Value * columns_arg = &*arguments++; - llvm::Value * aggregate_data_places_arg = &*arguments++; + auto * arguments = insert_aggregates_into_result_func->args().begin(); + llvm::Value * row_start_arg = arguments++; + llvm::Value * row_end_arg = arguments++; + llvm::Value * columns_arg = arguments++; + llvm::Value * aggregate_data_places_arg = arguments++; - auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func); + auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", insert_aggregates_into_result_func); b.SetInsertPoint(entry); - llvm::IRBuilder<> entry_builder(entry); - std::vector columns(functions.size()); for (size_t i = 0; i < functions.size(); ++i) { auto return_type = functions[i].function->getReturnType(); - auto * data = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_arg, i)); - columns[i].data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(return_type))->getPointerTo()); - auto * ty_data_init = llvm::cast(columns[i].data_init->getType()->getScalarType())->getElementType(); - columns[i].data_init = entry_builder.CreateInBoundsGEP(ty_data_init, columns[i].data_init, row_start_arg); + auto * data = b.CreateLoad(column_type, b.CreateConstInBoundsGEP1_64(column_type, columns_arg, i)); + + auto * column_data_type = toNativeType(b, removeNullable(return_type)); + + columns[i].data_ptr = b.CreateExtractValue(data, {0}); + columns[i].data_element_type = column_data_type; + if (return_type->isNullable()) - { - columns[i].null_init = b.CreateExtractValue(data, {1}); - auto * ty_null_init = llvm::cast(columns[i].null_init->getType()->getScalarType())->getElementType(); - columns[i].null_init = entry_builder.CreateInBoundsGEP(ty_null_init, columns[i].null_init, row_start_arg); - } - else - { - columns[i].null_init = nullptr; - } + columns[i].null_data_ptr = b.CreateExtractValue(data, {1}); } - auto * end = llvm::BasicBlock::Create(b.getContext(), "end", aggregate_loop_func); - auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", aggregate_loop_func); + auto * end = llvm::BasicBlock::Create(b.getContext(), "end", insert_aggregates_into_result_func); + auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", insert_aggregates_into_result_func); b.CreateCondBr(b.CreateICmpEQ(row_start_arg, row_end_arg), end, loop); @@ -692,65 +425,36 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons auto * counter_phi = b.CreatePHI(row_start_arg->getType(), 2); counter_phi->addIncoming(row_start_arg, entry); - auto * aggregate_data_place_phi = b.CreatePHI(aggregate_data_places_type, 2); - aggregate_data_place_phi->addIncoming(aggregate_data_places_arg, entry); - - for (auto & col : columns) - { - col.data = b.CreatePHI(col.data_init->getType(), 2); - col.data->addIncoming(col.data_init, entry); - - if (col.null_init) - { - col.null = b.CreatePHI(col.null_init->getType(), 2); - col.null->addIncoming(col.null_init, entry); - } - } + auto * aggregate_data_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), b.CreateGEP(b.getInt8Ty()->getPointerTo(), aggregate_data_places_arg, counter_phi)); for (size_t i = 0; i < functions.size(); ++i) { size_t aggregate_function_offset = functions[i].aggregate_data_offset; + auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place, aggregate_function_offset); + const auto * aggregate_function_ptr = functions[i].function; - - auto * aggregate_data_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), aggregate_data_place_phi); - auto * ty_aggregate_data_place = llvm::cast(aggregate_data_place->getType()->getScalarType())->getElementType(); - auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_place, aggregate_data_place, aggregate_function_offset); - auto * final_value = aggregate_function_ptr->compileGetResult(b, aggregation_place_with_offset); - if (columns[i].null_init) + auto * result_column_data_element = b.CreateGEP(columns[i].data_element_type, columns[i].data_ptr, counter_phi); + if (columns[i].null_data_ptr) { - b.CreateStore(b.CreateExtractValue(final_value, {0}), columns[i].data); - b.CreateStore(b.CreateSelect(b.CreateExtractValue(final_value, {1}), b.getInt8(1), b.getInt8(0)), columns[i].null); + b.CreateStore(b.CreateExtractValue(final_value, {0}), result_column_data_element); + auto * result_column_is_null_element = b.CreateGEP(b.getInt8Ty(), columns[i].null_data_ptr, counter_phi); + b.CreateStore(b.CreateSelect(b.CreateExtractValue(final_value, {1}), b.getInt8(1), b.getInt8(0)), result_column_is_null_element); } else { - b.CreateStore(final_value, columns[i].data); + b.CreateStore(final_value, result_column_data_element); } } /// End of loop - auto * cur_block = b.GetInsertBlock(); - for (auto & col : columns) - { - auto * ty_col_data = llvm::cast(col.data->getType()->getScalarType())->getElementType(); - col.data->addIncoming(b.CreateConstInBoundsGEP1_64(ty_col_data, col.data, 1), cur_block); + auto * current_block = b.GetInsertBlock(); + auto * incremented_counter = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); + counter_phi->addIncoming(incremented_counter, current_block); - if (col.null) - { - auto * ty_col_null = llvm::cast(col.null->getType()->getScalarType())->getElementType(); - col.null->addIncoming(b.CreateConstInBoundsGEP1_64(ty_col_null, col.null, 1), cur_block); - } - } - - auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1), "", true, true); - counter_phi->addIncoming(value, cur_block); - - auto * ty_aggregate_data_place_phi = llvm::cast(aggregate_data_place_phi->getType()->getScalarType())->getElementType(); - aggregate_data_place_phi->addIncoming(b.CreateConstInBoundsGEP1_64(ty_aggregate_data_place_phi, aggregate_data_place_phi, 1), cur_block); - - b.CreateCondBr(b.CreateICmpEQ(value, row_end_arg), end, loop); + b.CreateCondBr(b.CreateICmpEQ(incremented_counter, row_end_arg), end, loop); b.SetInsertPoint(end); b.CreateRetVoid(); @@ -769,11 +473,8 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec auto compiled_module = jit.compileModule([&](llvm::Module & module) { compileCreateAggregateStatesFunctions(module, functions, create_aggregate_states_functions_name); - compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name); - /// FIXME: this leads to use-of-uninitialized-value in llvm - /// But for now, it is safe, since it is not used by Aggregator anyway - (void)compileAddIntoAggregateStatesFunctionsSinglePlace; - /// compileAddIntoAggregateStatesFunctionsSinglePlace(module, functions, add_aggregate_states_functions_name_single_place); + compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name, AddIntoAggregateStatesPlacesArgumentType::MultiplePlaces); + compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name_single_place, AddIntoAggregateStatesPlacesArgumentType::SinglePlace); compileMergeAggregatesStates(module, functions, merge_aggregate_states_functions_name); compileInsertAggregatesIntoResultColumns(module, functions, insert_aggregate_states_functions_name); }); @@ -786,7 +487,7 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec assert(create_aggregate_states_function); assert(add_into_aggregate_states_function); - /// assert(add_into_aggregate_states_function_single_place); /// FIXME + assert(add_into_aggregate_states_function_single_place); assert(merge_aggregate_states_function); assert(insert_aggregate_states_function); @@ -809,6 +510,118 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec return compiled_aggregate_functions; } +static void compileSortDescription(llvm::Module & module, + SortDescription & description, + const DataTypes & sort_description_types, + const std::string & sort_description_dump) +{ + llvm::IRBuilder<> b(module.getContext()); + + auto * size_type = b.getIntNTy(sizeof(size_t) * 8); + + auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); + + std::vector function_argument_types = {size_type, size_type, column_data_type->getPointerTo(), column_data_type->getPointerTo()}; + auto * comparator_func_declaration = llvm::FunctionType::get(b.getInt8Ty(), function_argument_types, false); + auto * comparator_func = llvm::Function::Create(comparator_func_declaration, llvm::Function::ExternalLinkage, sort_description_dump, module); + + auto * arguments = comparator_func->args().begin(); + llvm::Value * lhs_index_arg = arguments++; + llvm::Value * rhs_index_arg = arguments++; + llvm::Value * columns_lhs_arg = arguments++; + llvm::Value * columns_rhs_arg = arguments++; + + size_t columns_size = description.size(); + + std::vector> comparator_steps_and_results; + for (size_t i = 0; i < columns_size; ++i) + { + auto * step = llvm::BasicBlock::Create(b.getContext(), "step_" + std::to_string(i), comparator_func); + comparator_steps_and_results.emplace_back(step, nullptr); + } + + auto * lhs_equals_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 0); + + auto * comparator_join = llvm::BasicBlock::Create(b.getContext(), "comparator_join", comparator_func); + + for (size_t i = 0; i < columns_size; ++i) + { + b.SetInsertPoint(comparator_steps_and_results[i].first); + + const auto & sort_description = description[i]; + const auto & column_type = sort_description_types[i]; + + auto dummy_column = column_type->createColumn(); + + auto * column_native_type = toNativeType(b, removeNullable(column_type)); + if (!column_native_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No native type for column type {}", column_type->getName()); + + bool column_type_is_nullable = column_type->isNullable(); + + auto * nullable_unitialized = llvm::Constant::getNullValue(toNullableType(b, column_native_type)); + + auto * lhs_column = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_lhs_arg, i)); + auto * lhs_column_data = b.CreateExtractValue(lhs_column, {0}); + auto * lhs_column_null_data = column_type_is_nullable ? b.CreateExtractValue(lhs_column, {1}) : nullptr; + + llvm::Value * lhs_column_element_offset = b.CreateInBoundsGEP(column_native_type, lhs_column_data, lhs_index_arg); + llvm::Value * lhs_value = b.CreateLoad(column_native_type, lhs_column_element_offset); + + if (lhs_column_null_data) + { + auto * is_null_value_pointer = b.CreateInBoundsGEP(b.getInt8Ty(), lhs_column_null_data, lhs_index_arg); + auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), is_null_value_pointer), b.getInt8(0)); + auto * lhs_nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitialized, lhs_value, {0}), is_null, {1}); + lhs_value = lhs_nullable_value; + } + + auto * rhs_column = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_rhs_arg, i)); + auto * rhs_column_data = b.CreateExtractValue(rhs_column, {0}); + auto * rhs_column_null_data = column_type_is_nullable ? b.CreateExtractValue(rhs_column, {1}) : nullptr; + + llvm::Value * rhs_column_element_offset = b.CreateInBoundsGEP(column_native_type, rhs_column_data, rhs_index_arg); + llvm::Value * rhs_value = b.CreateLoad(column_native_type, rhs_column_element_offset); + + if (rhs_column_null_data) + { + auto * is_null_value_pointer = b.CreateInBoundsGEP(b.getInt8Ty(), rhs_column_null_data, rhs_index_arg); + auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), is_null_value_pointer), b.getInt8(0)); + auto * rhs_nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitialized, rhs_value, {0}), is_null, {1}); + rhs_value = rhs_nullable_value; + } + + llvm::Value * direction = llvm::ConstantInt::getSigned(b.getInt8Ty(), sort_description.direction); + llvm::Value * nan_direction_hint = llvm::ConstantInt::getSigned(b.getInt8Ty(), sort_description.nulls_direction); + llvm::Value * compare_result = dummy_column->compileComparator(b, lhs_value, rhs_value, nan_direction_hint); + llvm::Value * result = b.CreateMul(direction, compare_result); + + comparator_steps_and_results[i].first = b.GetInsertBlock(); + comparator_steps_and_results[i].second = result; + + /** 1. If it is last condition block move to join block. + * 2. If column elements are not equal move to join block. + * 3. If column elements are equal move to next column condition. + */ + if (i == columns_size - 1) + b.CreateBr(comparator_join); + else + b.CreateCondBr(b.CreateICmpEQ(result, lhs_equals_rhs_result), comparator_steps_and_results[i + 1].first, comparator_join); + } + + b.SetInsertPoint(comparator_join); + + /** Join results from all comparator steps. + * Result of columns comparison equals to first compare block where lhs is not equal to lhs or last compare block. + */ + auto * compare_result_phi = b.CreatePHI(b.getInt8Ty(), comparator_steps_and_results.size()); + + for (const auto & [block, result_value] : comparator_steps_and_results) + compare_result_phi->addIncoming(result_value, block); + + b.CreateRet(compare_result_phi); +} + CompiledSortDescriptionFunction compileSortDescription( CHJIT & jit, SortDescription & description, @@ -819,113 +632,7 @@ CompiledSortDescriptionFunction compileSortDescription( auto compiled_module = jit.compileModule([&](llvm::Module & module) { - auto & context = module.getContext(); - llvm::IRBuilder<> b(context); - - auto * size_type = b.getIntNTy(sizeof(size_t) * 8); - - auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy()); - - std::vector types = { size_type, size_type, column_data_type->getPointerTo(), column_data_type->getPointerTo() }; - auto * comparator_func_declaration = llvm::FunctionType::get(b.getInt8Ty(), types, false); - auto * comparator_func = llvm::Function::Create(comparator_func_declaration, llvm::Function::ExternalLinkage, sort_description_dump, module); - - auto * arguments = comparator_func->args().begin(); - llvm::Value * lhs_index_arg = &*arguments++; - llvm::Value * rhs_index_arg = &*arguments++; - llvm::Value * columns_lhs_arg = &*arguments++; - llvm::Value * columns_rhs_arg = &*arguments++; - - size_t columns_size = description.size(); - - std::vector> comparator_steps_and_results; - for (size_t i = 0; i < columns_size; ++i) - { - auto * step = llvm::BasicBlock::Create(b.getContext(), "step_" + std::to_string(i), comparator_func); - llvm::Value * result_value = nullptr; - comparator_steps_and_results.emplace_back(step, result_value); - } - - auto * lhs_equals_rhs_result = llvm::ConstantInt::getSigned(b.getInt8Ty(), 0); - - auto * comparator_join = llvm::BasicBlock::Create(b.getContext(), "comparator_join", comparator_func); - - for (size_t i = 0; i < columns_size; ++i) - { - b.SetInsertPoint(comparator_steps_and_results[i].first); - - const auto & sort_description = description[i]; - const auto & column_type = sort_description_types[i]; - - auto dummy_column = column_type->createColumn(); - - auto * column_native_type_nullable = toNativeType(b, column_type); - auto * column_native_type = toNativeType(b, removeNullable(column_type)); - if (!column_native_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No native type for column type {}", column_type->getName()); - - auto * column_native_type_pointer = column_native_type->getPointerTo(); - bool column_type_is_nullable = column_type->isNullable(); - - auto * nullable_unitilized = llvm::Constant::getNullValue(column_native_type_nullable); - - auto * lhs_column = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_lhs_arg, i)); - auto * lhs_column_data = b.CreatePointerCast(b.CreateExtractValue(lhs_column, {0}), column_native_type_pointer); - auto * lhs_column_null_data = column_type_is_nullable ? b.CreateExtractValue(lhs_column, {1}) : nullptr; - - auto * ty_lhs_column_data = llvm::cast(lhs_column_data->getType()->getScalarType())->getElementType(); - - llvm::Value * lhs_cib_gep = b.CreateInBoundsGEP(ty_lhs_column_data, lhs_column_data, lhs_index_arg); - llvm::Value * lhs_value = b.CreateLoad(lhs_cib_gep->getType()->getPointerElementType(), lhs_cib_gep); - - if (lhs_column_null_data) - { - auto * ty_lhs_column_null_data = llvm::cast(lhs_column_null_data->getType()->getScalarType())->getElementType(); - auto * is_null_value_pointer = b.CreateInBoundsGEP(ty_lhs_column_null_data, lhs_column_null_data, lhs_index_arg); - auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), is_null_value_pointer), b.getInt8(0)); - auto * lhs_nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, lhs_value, {0}), is_null, {1}); - lhs_value = lhs_nullable_value; - } - - auto * rhs_column = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_rhs_arg, i)); - auto * rhs_column_data = b.CreatePointerCast(b.CreateExtractValue(rhs_column, {0}), column_native_type_pointer); - auto * rhs_column_null_data = column_type_is_nullable ? b.CreateExtractValue(rhs_column, {1}) : nullptr; - - auto * ty_rhs_column_data = llvm::cast(rhs_column_data->getType()->getScalarType())->getElementType(); - - llvm::Value * rhs_cib_gep = b.CreateInBoundsGEP(ty_rhs_column_data, rhs_column_data, rhs_index_arg); - llvm::Value * rhs_value = b.CreateLoad(rhs_cib_gep->getType()->getPointerElementType(), rhs_cib_gep); - - if (rhs_column_null_data) - { - auto * ty_rhs_column_null_data = llvm::cast(rhs_column_null_data->getType()->getScalarType())->getElementType(); - auto * is_null_value_pointer = b.CreateInBoundsGEP(ty_rhs_column_null_data, rhs_column_null_data, rhs_index_arg); - auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), is_null_value_pointer), b.getInt8(0)); - auto * rhs_nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, rhs_value, {0}), is_null, {1}); - rhs_value = rhs_nullable_value; - } - - llvm::Value * direction = llvm::ConstantInt::getSigned(b.getInt8Ty(), sort_description.direction); - llvm::Value * nan_direction_hint = llvm::ConstantInt::getSigned(b.getInt8Ty(), sort_description.nulls_direction); - llvm::Value * compare_result = dummy_column->compileComparator(b, lhs_value, rhs_value, nan_direction_hint); - llvm::Value * result = b.CreateMul(direction, compare_result); - - comparator_steps_and_results[i].first = b.GetInsertBlock(); - comparator_steps_and_results[i].second = result; - - if (i == columns_size - 1) - b.CreateBr(comparator_join); - else - b.CreateCondBr(b.CreateICmpEQ(result, lhs_equals_rhs_result), comparator_steps_and_results[i + 1].first, comparator_join); - } - - b.SetInsertPoint(comparator_join); - auto * phi = b.CreatePHI(b.getInt8Ty(), comparator_steps_and_results.size()); - - for (const auto & [block, result_value] : comparator_steps_and_results) - phi->addIncoming(result_value, block); - - b.CreateRet(phi); + compileSortDescription(module, description, sort_description_types, sort_description_dump); }); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); From 4eb29f7428751c3bd79223a4accbff1efc3ad88b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Oct 2022 10:51:54 +0200 Subject: [PATCH 185/266] Updated submodule --- contrib/llvm-project | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/llvm-project b/contrib/llvm-project index 328e4602120..c7f7cfc85e4 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit 328e4602120ddd6b2c1fb91bf2d50bd7bc249711 +Subproject commit c7f7cfc85e4b81c1c76cdd633dd8808d2dfd6114 From 62599e39011fe8748205ba05213e488ce150ee09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 7 Oct 2022 11:14:35 +0200 Subject: [PATCH 186/266] Simpler macro --- base/base/defines.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index 786a9c9813f..a053e9dc183 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -159,8 +159,8 @@ # define TSA_REQUIRES_SHARED(...) # define TSA_NO_THREAD_SAFETY_ANALYSIS -# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() -> const auto & { return (x); }()) -# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() -> auto & { return (x); }()) +# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) +# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) # define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x) #endif From 53d54ae6c9cca68614108be5afd14d475e9d55b8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 7 Oct 2022 12:26:54 +0200 Subject: [PATCH 187/266] Fix --- src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index 9e688587b05..98ca31b1426 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -100,11 +100,11 @@ bool ReadIndirectBufferFromRemoteFS::nextImpl() chassert(offset <= size); chassert(size <= internal_buffer.size()); - if (size) - { - file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); + size_t bytes_read = size - offset; + if (bytes_read) working_buffer = Buffer(internal_buffer.begin() + offset, internal_buffer.begin() + size); - } + + file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); /// In case of multiple files for the same file in clickhouse (i.e. log family) /// file_offset_of_buffer_end will not match getImplementationBufferOffset() @@ -112,7 +112,7 @@ bool ReadIndirectBufferFromRemoteFS::nextImpl() chassert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset()); chassert(file_offset_of_buffer_end <= impl->getFileSize()); - return size; + return bytes_read; } } From 984fe4f05889c60266acd1290d2644ebff7fee1e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Oct 2022 10:28:15 +0000 Subject: [PATCH 188/266] Add test. --- .../02456_alter-nullable-column-bag-2.reference | 1 + .../02456_alter-nullable-column-bag-2.sql | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference create mode 100644 tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql new file mode 100644 index 00000000000..d66c5f0e59e --- /dev/null +++ b/tests/queries/0_stateless/02456_alter-nullable-column-bag-2.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t1 SYNC; +CREATE TABLE t1 (v UInt64) ENGINE=ReplicatedMergeTree('/test/tables/{database}/test/t1', 'r1') ORDER BY v PARTITION BY v; +INSERT INTO t1 values(1); +ALTER TABLE t1 ADD COLUMN s String; +INSERT INTO t1 values(1, '1'); +ALTER TABLE t1 MODIFY COLUMN s Nullable(String); +-- SELECT _part, * FROM t1; + +alter table t1 detach partition 1; + +SELECT _part, * FROM t1; +--0 rows in set. Elapsed: 0.001 sec. + +alter table t1 attach partition 1; +select count() from t1; + From 81aec3c3400b3237cc2c5f39c2f16b4710ccfb40 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 7 Oct 2022 11:39:47 +0000 Subject: [PATCH 189/266] Grouping sets, rollup, cube, totals can be only with GROUP BY --- src/Client/QueryFuzzer.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index cd010d7b15d..6ef0ad08a1e 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -548,6 +548,26 @@ void QueryFuzzer::fuzz(ASTPtr & ast) { select->groupBy()->children.clear(); select->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); + select->group_by_with_grouping_sets = false; + select->group_by_with_rollup = false; + select->group_by_with_cube = false; + select->group_by_with_totals = true; + } + else if (fuzz_rand() % 100 == 0) + { + select->group_by_with_grouping_sets = !select->group_by_with_grouping_sets; + } + else if (fuzz_rand() % 100 == 0) + { + select->group_by_with_rollup = !select->group_by_with_rollup; + } + else if (fuzz_rand() % 100 == 0) + { + select->group_by_with_cube = !select->group_by_with_cube; + } + else if (fuzz_rand() % 100 == 0) + { + select->group_by_with_totals = !select->group_by_with_totals; } } else if (fuzz_rand() % 50 == 0) From 5d9c4a39c0b482bce5449067ce536a5ee05c3942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 7 Oct 2022 14:06:28 +0200 Subject: [PATCH 190/266] Update comment --- base/base/defines.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/base/defines.h b/base/base/defines.h index a053e9dc183..c9b6cfed4a4 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -142,7 +142,9 @@ # define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) -/// Consider adding a comment before using these macros. +/// They use a lambda function to apply function attribute to a single statement. This enable us to supress warnings locally instead of +/// suppressing them in the whole function +/// Consider adding a comment when using these macros. # define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) # define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()) From 2c84ad30baeb910a8bd43797456e94b7cb9008e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Oct 2022 12:14:59 +0200 Subject: [PATCH 191/266] Fix double "file" in "Writing backup for file" message Signed-off-by: Azat Khuzhin --- src/Backups/BackupImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index ffd20e02dd3..3a57e0fb8ee 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -691,7 +691,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) std::string from_file_name = "memory buffer"; if (auto fname = entry->getFilePath(); !fname.empty()) from_file_name = "file " + fname; - LOG_TRACE(log, "Writing backup for file {} from file {}", file_name, from_file_name); + LOG_TRACE(log, "Writing backup for file {} from {}", file_name, from_file_name); auto adjusted_path = removeLeadingSlash(file_name); if (coordination->getFileInfo(adjusted_path)) From e1859e9ee81cfae01ff6a10f14c698689273b91c Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 7 Oct 2022 12:43:20 +0000 Subject: [PATCH 192/266] fix style --- src/Client/QueryFuzzer.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index d69371af7e9..b28cc89b2df 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -858,7 +858,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast) { fuzzColumnLikeExpressionList(select->select().get()); - if(select->groupBy().get()) + if (select->groupBy().get()) { if (fuzz_rand() % 50 == 0) { @@ -869,19 +869,19 @@ void QueryFuzzer::fuzz(ASTPtr & ast) select->group_by_with_cube = false; select->group_by_with_totals = true; } - else if (fuzz_rand() % 100 == 0) + else if (fuzz_rand() % 100 == 0) { select->group_by_with_grouping_sets = !select->group_by_with_grouping_sets; } - else if (fuzz_rand() % 100 == 0) + else if (fuzz_rand() % 100 == 0) { select->group_by_with_rollup = !select->group_by_with_rollup; } - else if (fuzz_rand() % 100 == 0) + else if (fuzz_rand() % 100 == 0) { select->group_by_with_cube = !select->group_by_with_cube; } - else if (fuzz_rand() % 100 == 0) + else if (fuzz_rand() % 100 == 0) { select->group_by_with_totals = !select->group_by_with_totals; } @@ -891,7 +891,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast) select->setExpression(ASTSelectQuery::Expression::GROUP_BY, getRandomExpressionList()); } - if(select->where().get()) + if (select->where().get()) { if (fuzz_rand() % 50 == 0) { From f1d93d52d19f979466cfbbb145279cfde11a12f2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 7 Oct 2022 14:49:25 +0200 Subject: [PATCH 193/266] Fix checking parent for old-format parts --- src/Storages/StorageReplicatedMergeTree.cpp | 27 +++++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7ff9d73c0fc..94a98d0e923 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7778,7 +7778,7 @@ namespace /// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part. /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas /// still need parent part. -NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version) +NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log) { NameSet files_not_to_remove; @@ -7791,12 +7791,13 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri Strings parts_str; zookeeper_ptr->tryGetChildren(zero_copy_part_path_prefix, parts_str); - /// Parsing infos - std::vector parts_infos; + /// Parsing infos. It's hard to convert info -> string for old-format merge tree + /// so storing string as is. + std::vector> parts_infos; for (const auto & part_str : parts_str) { MergeTreePartInfo parent_candidate_info = MergeTreePartInfo::fromPartName(part_str, format_version); - parts_infos.push_back(parent_candidate_info); + parts_infos.emplace_back(parent_candidate_info, part_str); } /// Sort is important. We need to find our closest parent, like: @@ -7807,7 +7808,7 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri std::sort(parts_infos.begin(), parts_infos.end()); /// In reverse order to process from bigger to smaller - for (const auto & parent_candidate_info : parts_infos | std::views::reverse) + for (const auto & [parent_candidate_info, part_candidate_info_str] : parts_infos | std::views::reverse) { if (parent_candidate_info == part_info) continue; @@ -7815,10 +7816,20 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri /// We are mutation child of this parent if (part_info.isMutationChildOf(parent_candidate_info)) { + LOG_TRACE(log, "Found mutation parent {} for part {}", part_candidate_info_str, part_info_str); /// Get hardlinked files String files_not_to_remove_str; - zookeeper_ptr->tryGet(fs::path(zero_copy_part_path_prefix) / parent_candidate_info.getPartName(), files_not_to_remove_str); - boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); + Coordination::Error code; + zookeeper_ptr->tryGet(fs::path(zero_copy_part_path_prefix) / part_candidate_info_str, files_not_to_remove_str, nullptr, nullptr, &code); + if (code != Coordination::Error::ZOK) + LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), errorMessage(code)); + + if (!files_not_to_remove_str.empty()) + { + boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); + LOG_TRACE(log, "Found files not to remove from parent part {}: [{}]", part_candidate_info_str, fmt::join(files_not_to_remove, ", ")); + } + break; } } @@ -7848,7 +7859,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( if (!files_not_to_remove_str.empty()) boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); - auto parent_not_to_remove = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version); + auto parent_not_to_remove = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger); files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end()); String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / part_id; From 94566abda95706f7d4a33a0a6b75c3adddc63291 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Oct 2022 12:59:11 +0200 Subject: [PATCH 194/266] Fix reusing of files > 4GB from base backup Previosly u64 numbers was truncated to u32 numbers during writing to the mdatadata xml file, and further incremental backup cannot reuse them, since the file in base backup is smaller. P.S. There can be other places, I thought about enabling -Wshorten-64-to-32, but there are lots of warnings right now. Signed-off-by: Azat Khuzhin --- src/Backups/BackupImpl.cpp | 12 +++---- .../test_backup_restore_new/test.py | 36 +++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 3a57e0fb8ee..5b5e4aa7d92 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -275,7 +275,7 @@ void BackupImpl::writeBackupMetadata() assert(!is_internal_backup); Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; - config->setUInt("version", CURRENT_BACKUP_VERSION); + config->setUInt64("version", CURRENT_BACKUP_VERSION); config->setString("timestamp", toString(LocalDateTime{timestamp})); config->setString("uuid", toString(*uuid)); @@ -302,7 +302,7 @@ void BackupImpl::writeBackupMetadata() { String prefix = index ? "contents.file[" + std::to_string(index) + "]." : "contents.file."; config->setString(prefix + "name", info.file_name); - config->setUInt(prefix + "size", info.size); + config->setUInt64(prefix + "size", info.size); if (info.size) { config->setString(prefix + "checksum", hexChecksum(info.checksum)); @@ -311,7 +311,7 @@ void BackupImpl::writeBackupMetadata() config->setBool(prefix + "use_base", true); if (info.base_size != info.size) { - config->setUInt(prefix + "base_size", info.base_size); + config->setUInt64(prefix + "base_size", info.base_size); config->setString(prefix + "base_checksum", hexChecksum(info.base_checksum)); } } @@ -367,7 +367,7 @@ void BackupImpl::readBackupMetadata() Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; config->load(stream); - version = config->getUInt("version"); + version = config->getUInt64("version"); if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION)) throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name, version); @@ -389,13 +389,13 @@ void BackupImpl::readBackupMetadata() String prefix = "contents." + key + "."; FileInfo info; info.file_name = config->getString(prefix + "name"); - info.size = config->getUInt(prefix + "size"); + info.size = config->getUInt64(prefix + "size"); if (info.size) { info.checksum = unhexChecksum(config->getString(prefix + "checksum")); bool use_base = config->getBool(prefix + "use_base", false); - info.base_size = config->getUInt(prefix + "base_size", use_base ? info.size : 0); + info.base_size = config->getUInt64(prefix + "base_size", use_base ? info.size : 0); if (info.base_size) use_base = true; diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index ca0d6a632a0..c94dc6d4a87 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -191,6 +191,42 @@ def test_incremental_backup(): assert instance.query("SELECT count(), sum(x) FROM test.table2") == "102\t5081\n" +def test_incremental_backup_overflow(): + backup_name = new_backup_name() + incremental_backup_name = new_backup_name() + + instance.query("CREATE DATABASE test") + instance.query( + "CREATE TABLE test.table(y String CODEC(NONE)) ENGINE=MergeTree ORDER BY tuple()" + ) + # Create a column of 4GB+10K + instance.query( + "INSERT INTO test.table SELECT toString(repeat('A', 1024)) FROM numbers((4*1024*1024)+10)" + ) + # Force one part + instance.query("OPTIMIZE TABLE test.table FINAL") + + # ensure that the column's size on disk is indeed greater then 4GB + assert ( + int( + instance.query( + "SELECT bytes_on_disk FROM system.parts_columns WHERE active AND database = 'test' AND table = 'table' AND column = 'y'" + ) + ) + > 4 * 1024 * 1024 * 1024 + ) + + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + instance.query( + f"BACKUP TABLE test.table TO {incremental_backup_name} SETTINGS base_backup = {backup_name}" + ) + + # And now check that incremental backup does not have any files + assert os.listdir(os.path.join(get_path_to_backup(incremental_backup_name))) == [ + ".backup" + ] + + def test_incremental_backup_after_renaming_table(): backup_name = new_backup_name() incremental_backup_name = new_backup_name() From dae8d6b316958eab2db393067c83c36b4aa26863 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Oct 2022 14:59:38 +0200 Subject: [PATCH 195/266] Convert backup version from UInt64 to int Signed-off-by: Azat Khuzhin --- src/Backups/BackupImpl.cpp | 8 ++++---- src/Backups/BackupImpl.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 5b5e4aa7d92..8b648af44ec 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -43,8 +43,8 @@ namespace ErrorCodes namespace { - const UInt64 INITIAL_BACKUP_VERSION = 1; - const UInt64 CURRENT_BACKUP_VERSION = 1; + const int INITIAL_BACKUP_VERSION = 1; + const int CURRENT_BACKUP_VERSION = 1; using SizeAndChecksum = IBackup::SizeAndChecksum; using FileInfo = IBackupCoordination::FileInfo; @@ -275,7 +275,7 @@ void BackupImpl::writeBackupMetadata() assert(!is_internal_backup); Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; - config->setUInt64("version", CURRENT_BACKUP_VERSION); + config->setInt("version", CURRENT_BACKUP_VERSION); config->setString("timestamp", toString(LocalDateTime{timestamp})); config->setString("uuid", toString(*uuid)); @@ -367,7 +367,7 @@ void BackupImpl::readBackupMetadata() Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; config->load(stream); - version = config->getUInt64("version"); + version = config->getInt("version"); if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION)) throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name, version); diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index e539239d3ef..67742c77139 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -122,7 +122,7 @@ private: size_t num_files = 0; UInt64 uncompressed_size = 0; UInt64 compressed_size = 0; - UInt64 version; + int version; std::optional base_backup_info; std::shared_ptr base_backup; std::optional base_backup_uuid; From e759a64d38b5ed0027b3c1556f0dfd28159b54a7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 7 Oct 2022 13:22:40 +0000 Subject: [PATCH 196/266] Docs: Redirect links to misc.md --> https://clickhouse.com/docs/en/sql-reference/statements/misc This page indexes a bunch of SQL statements but they are easily reachable by the sidebar already. --- docs/en/operations/settings/settings.md | 2 +- docs/en/operations/system-tables/columns.md | 2 +- docs/en/sql-reference/statements/alter/column.md | 4 ++-- docs/en/sql-reference/statements/alter/index.md | 2 +- .../en/sql-reference/statements/alter/partition.md | 2 +- docs/en/sql-reference/statements/grant.md | 14 +++++++------- docs/ru/getting-started/tutorial.md | 2 +- docs/ru/operations/access-rights.md | 14 +++++++------- docs/ru/operations/settings/settings.md | 2 +- docs/ru/operations/system-tables/columns.md | 2 +- docs/ru/sql-reference/index.md | 1 - docs/ru/sql-reference/statements/alter/column.md | 4 ++-- docs/ru/sql-reference/statements/create/role.md | 6 +++--- docs/ru/sql-reference/statements/grant.md | 8 ++++---- 14 files changed, 32 insertions(+), 33 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index bb8198b8b72..74cf3a5dd14 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1599,7 +1599,7 @@ Right now it requires `optimize_skip_unused_shards` (the reason behind this is t ## optimize_throw_if_noop {#setting-optimize_throw_if_noop} -Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query didnโ€™t perform a merge. +Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/optimize.md) query didnโ€™t perform a merge. By default, `OPTIMIZE` returns successfully even if it didnโ€™t do anything. This setting lets you differentiate these situations and get the reason in an exception message. diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index a2b26c3684c..8b633fbe2f0 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -5,7 +5,7 @@ slug: /en/operations/system-tables/columns Contains information about columns in all the tables. -You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once. +You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) query, but for multiple tables at once. Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 12584909688..067a350dca7 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -127,7 +127,7 @@ Adds a comment to the column. If the `IF EXISTS` clause is specified, the query Each column can have one comment. If a comment already exists for the column, a new comment overwrites the previous comment. -Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../../sql-reference/statements/misc.md#misc-describe-table) query. +Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../../sql-reference/statements/describe-table.md) query. Example: @@ -253,7 +253,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../../sql-reference/statements/misc.md#misc_operations-rename) query and delete the old table. You can use the [clickhouse-copier](../../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../../sql-reference/statements/rename.md#rename-table) query and delete the old table. You can use the [clickhouse-copier](../../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index eeee5e03c8b..4027429cf0d 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -44,7 +44,7 @@ For `*MergeTree` tables mutations execute by **rewriting whole data parts**. The Mutations are totally ordered by their creation order and are applied to each part in that order. Mutations are also partially ordered with `INSERT INTO` queries: data that was inserted into the table before the mutation was submitted will be mutated and data that was inserted after that will not be mutated. Note that mutations do not block inserts in any way. -A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for non-replicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../../sql-reference/statements/misc.md#kill-mutation) query. +A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for non-replicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../../sql-reference/statements/kill.md#kill-mutation) query. Entries for finished mutations are not deleted right away (the number of preserved entries is determined by the `finished_mutations_to_keep` storage engine parameter). Older mutation entries are deleted. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index a2d142c2a6d..a216de85cfc 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -319,7 +319,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. -All the rules above are also true for the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: +All the rules above are also true for the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: ``` sql OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 56bb4cd4b65..546a8b0958d 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -221,7 +221,7 @@ By default, a user account or a role has no privileges. If a user or a role has no privileges, it is displayed as [NONE](#grant-none) privilege. -Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`. +Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/optimize.md) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`. ### SELECT @@ -304,11 +304,11 @@ Examples of how this hierarchy is treated: - The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters. - The `ATTACH` operation needs the [CREATE](#grant-create) privilege. - The `DETACH` operation needs the [DROP](#grant-drop) privilege. -- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. +- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. ### CREATE -Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/misc.md#attach) DDL-queries according to the following hierarchy of privileges: +Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/attach.md) DDL-queries according to the following hierarchy of privileges: - `CREATE`. Level: `GROUP` - `CREATE DATABASE`. Level: `DATABASE` @@ -323,7 +323,7 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A ### DROP -Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges: +Allows executing [DROP](../../sql-reference/statements/drop.md) and [DETACH](../../sql-reference/statements/detach.md) queries according to the following hierarchy of privileges: - `DROP`. Level: `GROUP` - `DROP DATABASE`. Level: `DATABASE` @@ -333,13 +333,13 @@ Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH ### TRUNCATE -Allows executing [TRUNCATE](../../sql-reference/statements/misc.md#truncate-statement) queries. +Allows executing [TRUNCATE](../../sql-reference/statements/truncate.md) queries. Privilege level: `TABLE`. ### OPTIMIZE -Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/misc.md#misc_operations-optimize) queries. +Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/optimize.md) queries. Privilege level: `TABLE`. @@ -359,7 +359,7 @@ A user has the `SHOW` privilege if it has any other privilege concerning the spe ### KILL QUERY -Allows executing [KILL](../../sql-reference/statements/misc.md#kill-query-statement) queries according to the following hierarchy of privileges: +Allows executing [KILL](../../sql-reference/statements/kill.md#kill-query) queries according to the following hierarchy of privileges: Privilege level: `GLOBAL`. diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index b1abc787c5d..be6d138669a 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -488,7 +488,7 @@ FORMAT TSV max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." ``` -Optionally you can [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: +Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: ``` bash clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index 987f7fecc55..7f4e06205be 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -64,7 +64,7 @@ ClickHouse ะฟะพะดะดะตั€ะถะธะฒะฐะตั‚ ัƒะฟั€ะฐะฒะปะตะฝะธะต ะดะพัั‚ัƒะฟะพะผ ะฝะฐ - [CREATE USER](../sql-reference/statements/create/user.md#create-user-statement) - [ALTER USER](../sql-reference/statements/alter/user.md) -- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement) +- [DROP USER](../sql-reference/statements/drop.md#drop-user) - [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) ### ะŸั€ะธะผะตะฝะตะฝะธะต ะฝะฐัั‚ั€ะพะตะบ {#access-control-settings-applying} @@ -91,9 +91,9 @@ ClickHouse ะฟะพะดะดะตั€ะถะธะฒะฐะตั‚ ัƒะฟั€ะฐะฒะปะตะฝะธะต ะดะพัั‚ัƒะฟะพะผ ะฝะฐ - [CREATE ROLE](../sql-reference/statements/create/index.md#create-role-statement) - [ALTER ROLE](../sql-reference/statements/alter/role.md) -- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement) -- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement) -- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement) +- [DROP ROLE](../sql-reference/statements/drop.md#drop-role) +- [SET ROLE](../sql-reference/statements/set-role.md) +- [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role) - [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) ะŸั€ะธะฒะธะปะตะณะธะธ ะผะพะถะฝะพ ะฟั€ะธัะฒะพะธั‚ัŒ ั€ะพะปะธ ั ะฟะพะผะพั‰ัŒัŽ ะทะฐะฟั€ะพัะฐ [GRANT](../sql-reference/statements/grant.md). ะ”ะปั ะพั‚ะทั‹ะฒะฐ ะฟั€ะธะฒะธะปะตะณะธะน ัƒ ั€ะพะปะธ ClickHouse ะฟั€ะตะดะพัั‚ะฐะฒะปัะตั‚ ะทะฐะฟั€ะพั [REVOKE](../sql-reference/statements/revoke.md). @@ -106,7 +106,7 @@ ClickHouse ะฟะพะดะดะตั€ะถะธะฒะฐะตั‚ ัƒะฟั€ะฐะฒะปะตะฝะธะต ะดะพัั‚ัƒะฟะพะผ ะฝะฐ - [CREATE ROW POLICY](../sql-reference/statements/create/index.md#create-row-policy-statement) - [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md) -- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement) +- [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy) - [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) @@ -118,7 +118,7 @@ ClickHouse ะฟะพะดะดะตั€ะถะธะฒะฐะตั‚ ัƒะฟั€ะฐะฒะปะตะฝะธะต ะดะพัั‚ัƒะฟะพะผ ะฝะฐ - [CREATE SETTINGS PROFILE](../sql-reference/statements/create/index.md#create-settings-profile-statement) - [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md) -- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement) +- [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile) - [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) @@ -132,7 +132,7 @@ ClickHouse ะฟะพะดะดะตั€ะถะธะฒะฐะตั‚ ัƒะฟั€ะฐะฒะปะตะฝะธะต ะดะพัั‚ัƒะฟะพะผ ะฝะฐ - [CREATE QUOTA](../sql-reference/statements/create/index.md#create-quota-statement) - [ALTER QUOTA](../sql-reference/statements/alter/quota.md) -- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement) +- [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota) - [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 3d765b03d58..01cc696b574 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1986,7 +1986,7 @@ SELECT * FROM test_table ## optimize_throw_if_noop {#setting-optimize_throw_if_noop} -ะ’ะบะปัŽั‡ะฐะตั‚ ะธะปะธ ะพั‚ะบะปัŽั‡ะฐะตั‚ ะณะตะฝะตั€ะธั€ะพะฒะฐะฝะธะต ะธัะบะปัŽั‡ะตะฝะธั ะฒ ัะปัƒั‡ะฐัั…, ะบะพะณะดะฐ ะทะฐะฟั€ะพั [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) ะฝะต ะฒั‹ะฟะพะปะฝัะตั‚ ะผั‘ั€ะถ. +ะ’ะบะปัŽั‡ะฐะตั‚ ะธะปะธ ะพั‚ะบะปัŽั‡ะฐะตั‚ ะณะตะฝะตั€ะธั€ะพะฒะฐะฝะธะต ะธัะบะปัŽั‡ะตะฝะธั ะฒ ัะปัƒั‡ะฐัั…, ะบะพะณะดะฐ ะทะฐะฟั€ะพั [OPTIMIZE](../../sql-reference/statements/optimize.md) ะฝะต ะฒั‹ะฟะพะปะฝัะตั‚ ะผั‘ั€ะถ. ะŸะพ ัƒะผะพะปั‡ะฐะฝะธัŽ, `OPTIMIZE` ะทะฐะฒะตั€ัˆะฐะตั‚ัั ัƒัะฟะตัˆะฝะพ ะธ ะฒ ั‚ะตั… ัะปัƒั‡ะฐัั…, ะบะพะณะดะฐ ะพะฝ ะฝะธั‡ะตะณะพ ะฝะต ัะดะตะปะฐะป. ะะฐัั‚ั€ะพะนะบะฐ ะฟะพะทะฒะพะปัะตั‚ ะพั‚ะดะตะปะธั‚ัŒ ะฟะพะดะพะฑะฝั‹ะต ัะปัƒั‡ะฐะธ ะธ ะฒะบะปัŽั‡ะฐะตั‚ ะณะตะฝะตั€ะธั€ะพะฒะฐะฝะธะต ะธัะบะปัŽั‡ะตะฝะธั ั ะฟะพััะฝััŽั‰ะธะผ ัะพะพะฑั‰ะตะฝะธะตะผ. diff --git a/docs/ru/operations/system-tables/columns.md b/docs/ru/operations/system-tables/columns.md index 818da3d6ac6..cade6f0a557 100644 --- a/docs/ru/operations/system-tables/columns.md +++ b/docs/ru/operations/system-tables/columns.md @@ -5,7 +5,7 @@ slug: /ru/operations/system-tables/columns ะกะพะดะตั€ะถะธั‚ ะธะฝั„ะพั€ะผะฐั†ะธัŽ ะพ ัั‚ะพะปะฑั†ะฐั… ะฒัะตั… ั‚ะฐะฑะปะธั†. -ะก ะฟะพะผะพั‰ัŒัŽ ัั‚ะพะน ั‚ะฐะฑะปะธั†ั‹ ะผะพะถะฝะพ ะฟะพะปัƒั‡ะธั‚ัŒ ะธะฝั„ะพั€ะผะฐั†ะธัŽ ะฐะฝะฐะปะพะณะธั‡ะฝะพ ะทะฐะฟั€ะพััƒ [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), ะฝะพ ะดะปั ะผะฝะพะณะธั… ั‚ะฐะฑะปะธั† ัั€ะฐะทัƒ. +ะก ะฟะพะผะพั‰ัŒัŽ ัั‚ะพะน ั‚ะฐะฑะปะธั†ั‹ ะผะพะถะฝะพ ะฟะพะปัƒั‡ะธั‚ัŒ ะธะฝั„ะพั€ะผะฐั†ะธัŽ ะฐะฝะฐะปะพะณะธั‡ะฝะพ ะทะฐะฟั€ะพััƒ [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md), ะฝะพ ะดะปั ะผะฝะพะณะธั… ั‚ะฐะฑะปะธั† ัั€ะฐะทัƒ. ะšะพะปะพะฝะบะธ [ะฒั€ะตะผะตะฝะฝั‹ั… ั‚ะฐะฑะปะธั†](../../sql-reference/statements/create/table.md#temporary-tables) ัะพะดะตั€ะถะฐั‚ัั ะฒ `system.columns` ั‚ะพะปัŒะบะพ ะฒ ั‚ะตั… ัะตััะธัั…, ะฒ ะบะพั‚ะพั€ั‹ั… ัั‚ะธ ั‚ะฐะฑะปะธั†ั‹ ะฑั‹ะปะธ ัะพะทะดะฐะฝั‹. ะŸะพะปะต `database` ัƒ ั‚ะฐะบะธั… ะบะพะปะพะฝะพะบ ะฟัƒัั‚ะพะต. diff --git a/docs/ru/sql-reference/index.md b/docs/ru/sql-reference/index.md index f55c5e859f1..95e2d6a3918 100644 --- a/docs/ru/sql-reference/index.md +++ b/docs/ru/sql-reference/index.md @@ -10,5 +10,4 @@ sidebar_position: 28 - [INSERT INTO](statements/insert-into.md) - [CREATE](statements/create/index.md) - [ALTER](statements/alter/index.md#query_language_queries_alter) -- [ะŸั€ะพั‡ะธะต ะฒะธะดั‹ ะทะฐะฟั€ะพัะพะฒ](statements/misc.md) diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index c337b64f1d6..11ec72596c4 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -128,7 +128,7 @@ COMMENT COLUMN [IF EXISTS] name 'Text comment' ะšะฐะถะดั‹ะน ัั‚ะพะปะฑะตั† ะผะพะถะตั‚ ัะพะดะตั€ะถะฐั‚ัŒ ั‚ะพะปัŒะบะพ ะพะดะธะฝ ะบะพะผะผะตะฝั‚ะฐั€ะธะน. ะŸั€ะธ ะฒั‹ะฟะพะปะฝะตะฝะธะธ ะทะฐะฟั€ะพัะฐ ััƒั‰ะตัั‚ะฒัƒัŽั‰ะธะน ะบะพะผะผะตะฝั‚ะฐั€ะธะน ะทะฐะผะตะฝัะตั‚ัั ะฝะฐ ะฝะพะฒั‹ะน. -ะŸะพัะผะพั‚ั€ะตั‚ัŒ ะบะพะผะผะตะฝั‚ะฐั€ะธะธ ะผะพะถะฝะพ ะฒ ัั‚ะพะปะฑั†ะต `comment_expression` ะธะท ะทะฐะฟั€ะพัะฐ [DESCRIBE TABLE](../misc.md#misc-describe-table). +ะŸะพัะผะพั‚ั€ะตั‚ัŒ ะบะพะผะผะตะฝั‚ะฐั€ะธะธ ะผะพะถะฝะพ ะฒ ัั‚ะพะปะฑั†ะต `comment_expression` ะธะท ะทะฐะฟั€ะพัะฐ [DESCRIBE TABLE](../describe-table.md). ะŸั€ะธะผะตั€: @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; ะžั‚ััƒั‚ัั‚ะฒัƒะตั‚ ะฒะพะทะผะพะถะฝะพัั‚ัŒ ัƒะดะฐะปัั‚ัŒ ัั‚ะพะปะฑั†ั‹, ะฒั…ะพะดัั‰ะธะต ะฒ ะฟะตั€ะฒะธั‡ะฝั‹ะน ะบะปัŽั‡ ะธะปะธ ะบะปัŽั‡ ะดะปั ััะผะฟะปะธั€ะพะฒะฐะฝะธั (ะฒ ะพะฑั‰ะตะผ, ะฒั…ะพะดัั‰ะธะต ะฒ ะฒั‹ั€ะฐะถะตะฝะธะต `ENGINE`). ะ˜ะทะผะตะฝะตะฝะธะต ั‚ะธะฟะฐ ัƒ ัั‚ะพะปะฑั†ะพะฒ, ะฒั…ะพะดัั‰ะธั… ะฒ ะฟะตั€ะฒะธั‡ะฝั‹ะน ะบะปัŽั‡ ะฒะพะทะผะพะถะฝะพ ั‚ะพะปัŒะบะพ ะฒ ั‚ะพะผ ัะปัƒั‡ะฐะต, ะตัะปะธ ัั‚ะพ ะธะทะผะตะฝะตะฝะธะต ะฝะต ะฟั€ะธะฒะพะดะธั‚ ะบ ะธะทะผะตะฝะตะฝะธัŽ ะดะฐะฝะฝั‹ั… (ะฝะฐะฟั€ะธะผะตั€, ั€ะฐะทั€ะตัˆะตะฝะพ ะดะพะฑะฐะฒะปะตะฝะธะต ะทะฝะฐั‡ะตะฝะธั ะฒ Enum ะธะปะธ ะธะทะผะตะฝะตะฝะธะต ั‚ะธะฟะฐ ั `DateTime` ะฝะฐ `UInt32`). -ะ•ัะปะธ ะฒะพะทะผะพะถะฝะพัั‚ะตะน ะทะฐะฟั€ะพัะฐ `ALTER` ะฝะต ั…ะฒะฐั‚ะฐะตั‚ ะดะปั ะฝัƒะถะฝะพะณะพ ะธะทะผะตะฝะตะฝะธั ั‚ะฐะฑะปะธั†ั‹, ะฒั‹ ะผะพะถะตั‚ะต ัะพะทะดะฐั‚ัŒ ะฝะพะฒัƒัŽ ั‚ะฐะฑะปะธั†ัƒ, ัะบะพะฟะธั€ะพะฒะฐั‚ัŒ ั‚ัƒะดะฐ ะดะฐะฝะฝั‹ะต ั ะฟะพะผะพั‰ัŒัŽ ะทะฐะฟั€ะพัะฐ [INSERT SELECT](../insert-into.md#insert_query_insert-select), ะทะฐั‚ะตะผ ะฟะพะผะตะฝัั‚ัŒ ั‚ะฐะฑะปะธั†ั‹ ะผะตัั‚ะฐะผะธ ั ะฟะพะผะพั‰ัŒัŽ ะทะฐะฟั€ะพัะฐ [RENAME](../misc.md#misc_operations-rename), ะธ ัƒะดะฐะปะธั‚ัŒ ัั‚ะฐั€ัƒัŽ ั‚ะฐะฑะปะธั†ัƒ. ะ’ ะบะฐั‡ะตัั‚ะฒะต ะฐะปัŒั‚ะตั€ะฝะฐั‚ะธะฒั‹ ะดะปั ะทะฐะฟั€ะพัะฐ `INSERT SELECT`, ะผะพะถะฝะพ ะธัะฟะพะปัŒะทะพะฒะฐั‚ัŒ ะธะฝัั‚ั€ัƒะผะตะฝั‚ [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +ะ•ัะปะธ ะฒะพะทะผะพะถะฝะพัั‚ะตะน ะทะฐะฟั€ะพัะฐ `ALTER` ะฝะต ั…ะฒะฐั‚ะฐะตั‚ ะดะปั ะฝัƒะถะฝะพะณะพ ะธะทะผะตะฝะตะฝะธั ั‚ะฐะฑะปะธั†ั‹, ะฒั‹ ะผะพะถะตั‚ะต ัะพะทะดะฐั‚ัŒ ะฝะพะฒัƒัŽ ั‚ะฐะฑะปะธั†ัƒ, ัะบะพะฟะธั€ะพะฒะฐั‚ัŒ ั‚ัƒะดะฐ ะดะฐะฝะฝั‹ะต ั ะฟะพะผะพั‰ัŒัŽ ะทะฐะฟั€ะพัะฐ [INSERT SELECT](../insert-into.md#insert_query_insert-select), ะทะฐั‚ะตะผ ะฟะพะผะตะฝัั‚ัŒ ั‚ะฐะฑะปะธั†ั‹ ะผะตัั‚ะฐะผะธ ั ะฟะพะผะพั‰ัŒัŽ ะทะฐะฟั€ะพัะฐ [RENAME](../rename.md#rename-table), ะธ ัƒะดะฐะปะธั‚ัŒ ัั‚ะฐั€ัƒัŽ ั‚ะฐะฑะปะธั†ัƒ. ะ’ ะบะฐั‡ะตัั‚ะฒะต ะฐะปัŒั‚ะตั€ะฝะฐั‚ะธะฒั‹ ะดะปั ะทะฐะฟั€ะพัะฐ `INSERT SELECT`, ะผะพะถะฝะพ ะธัะฟะพะปัŒะทะพะฒะฐั‚ัŒ ะธะฝัั‚ั€ัƒะผะตะฝั‚ [clickhouse-copier](../../../sql-reference/statements/alter/index.md). ะ—ะฐะฟั€ะพั `ALTER` ะฑะปะพะบะธั€ัƒะตั‚ ะฒัะต ั‡ั‚ะตะฝะธั ะธ ะทะฐะฟะธัะธ ะดะปั ั‚ะฐะฑะปะธั†ั‹. ะขะพ ะตัั‚ัŒ ะตัะปะธ ะฝะฐ ะผะพะผะตะฝั‚ ะทะฐะฟั€ะพัะฐ `ALTER` ะฒั‹ะฟะพะปะฝัะปัั ะดะพะปะณะธะน `SELECT`, ั‚ะพ ะทะฐะฟั€ะพั `ALTER` ัะฝะฐั‡ะฐะปะฐ ะดะพะถะดั‘ั‚ัั ะตะณะพ ะฒั‹ะฟะพะปะฝะตะฝะธั. ะ˜ ะฒ ัั‚ะพ ะฒั€ะตะผั ะฒัะต ะฝะพะฒั‹ะต ะทะฐะฟั€ะพัั‹ ะบ ั‚ะพะน ะถะต ั‚ะฐะฑะปะธั†ะต ะฑัƒะดัƒั‚ ะถะดะฐั‚ัŒ, ะฟะพะบะฐ ะทะฐะฒะตั€ัˆะธั‚ัั ัั‚ะพั‚ `ALTER`. diff --git a/docs/ru/sql-reference/statements/create/role.md b/docs/ru/sql-reference/statements/create/role.md index bd1141be4c5..1aa222d4de1 100644 --- a/docs/ru/sql-reference/statements/create/role.md +++ b/docs/ru/sql-reference/statements/create/role.md @@ -17,13 +17,13 @@ CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, nam ## ะฃะฟั€ะฐะฒะปะตะฝะธะต ั€ะพะปัะผะธ {#managing-roles} -ะžะดะฝะพะผัƒ ะฟะพะปัŒะทะพะฒะฐั‚ะตะปัŽ ะผะพะถะฝะพ ะฝะฐะทะฝะฐั‡ะธั‚ัŒ ะฝะตัะบะพะปัŒะบะพ ั€ะพะปะตะน. ะŸะพะปัŒะทะพะฒะฐั‚ะตะปะธ ะผะพะณัƒั‚ ะฟั€ะธะผะตะฝัั‚ัŒ ะฝะฐะทะฝะฐั‡ะตะฝะฝั‹ะต ั€ะพะปะธ ะฒ ะฟั€ะพะธะทะฒะพะปัŒะฝั‹ั… ะบะพะผะฑะธะฝะฐั†ะธัั… ั ะฟะพะผะพั‰ัŒัŽ ะฒั‹ั€ะฐะถะตะฝะธั [SET ROLE](../misc.md#set-role-statement). ะšะพะฝะตั‡ะฝั‹ะน ะพะฑัŠะตะผ ะฟั€ะธะฒะธะปะตะณะธะน โ€” ัั‚ะพ ะบะพะผะฑะธะฝะฐั†ะธั ะฒัะตั… ะฟั€ะธะฒะธะปะตะณะธะน ะฒัะตั… ะฟั€ะธะผะตะฝะตะฝะฝั‹ั… ั€ะพะปะตะน. ะ•ัะปะธ ัƒ ะฟะพะปัŒะทะพะฒะฐั‚ะตะปั ะธะผะตัŽั‚ัั ะฟั€ะธะฒะธะปะตะณะธะธ, ะฟั€ะธัะฒะพะตะฝะฝั‹ะต ะตะณะพ ะฐะบะบะฐัƒะฝั‚ัƒ ะฝะฐะฟั€ัะผัƒัŽ, ะพะฝะธ ั‚ะฐะบะถะต ะฟั€ะธะฑะฐะฒะปััŽั‚ัั ะบ ะฟั€ะธะฒะธะปะตะณะธัะผ, ะฟั€ะธัะฒะพะตะฝะฝั‹ะผ ั‡ะตั€ะตะท ั€ะพะปะธ. +ะžะดะฝะพะผัƒ ะฟะพะปัŒะทะพะฒะฐั‚ะตะปัŽ ะผะพะถะฝะพ ะฝะฐะทะฝะฐั‡ะธั‚ัŒ ะฝะตัะบะพะปัŒะบะพ ั€ะพะปะตะน. ะŸะพะปัŒะทะพะฒะฐั‚ะตะปะธ ะผะพะณัƒั‚ ะฟั€ะธะผะตะฝัั‚ัŒ ะฝะฐะทะฝะฐั‡ะตะฝะฝั‹ะต ั€ะพะปะธ ะฒ ะฟั€ะพะธะทะฒะพะปัŒะฝั‹ั… ะบะพะผะฑะธะฝะฐั†ะธัั… ั ะฟะพะผะพั‰ัŒัŽ ะฒั‹ั€ะฐะถะตะฝะธั [SET ROLE](../set-role.md). ะšะพะฝะตั‡ะฝั‹ะน ะพะฑัŠะตะผ ะฟั€ะธะฒะธะปะตะณะธะน โ€” ัั‚ะพ ะบะพะผะฑะธะฝะฐั†ะธั ะฒัะตั… ะฟั€ะธะฒะธะปะตะณะธะน ะฒัะตั… ะฟั€ะธะผะตะฝะตะฝะฝั‹ั… ั€ะพะปะตะน. ะ•ัะปะธ ัƒ ะฟะพะปัŒะทะพะฒะฐั‚ะตะปั ะธะผะตัŽั‚ัั ะฟั€ะธะฒะธะปะตะณะธะธ, ะฟั€ะธัะฒะพะตะฝะฝั‹ะต ะตะณะพ ะฐะบะบะฐัƒะฝั‚ัƒ ะฝะฐะฟั€ัะผัƒัŽ, ะพะฝะธ ั‚ะฐะบะถะต ะฟั€ะธะฑะฐะฒะปััŽั‚ัั ะบ ะฟั€ะธะฒะธะปะตะณะธัะผ, ะฟั€ะธัะฒะพะตะฝะฝั‹ะผ ั‡ะตั€ะตะท ั€ะพะปะธ. -ะ ะพะปะธ ะฟะพ ัƒะผะพะปั‡ะฐะฝะธัŽ ะฟั€ะธะผะตะฝััŽั‚ัั ะฟั€ะธ ะฒั…ะพะดะต ะฟะพะปัŒะทะพะฒะฐั‚ะตะปั ะฒ ัะธัั‚ะตะผัƒ. ะฃัั‚ะฐะฝะพะฒะธั‚ัŒ ั€ะพะปะธ ะฟะพ ัƒะผะพะปั‡ะฐะฝะธัŽ ะผะพะถะฝะพ ั ะฟะพะผะพั‰ัŒัŽ ะฒั‹ั€ะฐะถะตะฝะธะน [SET DEFAULT ROLE](../misc.md#set-default-role-statement) ะธะปะธ [ALTER USER](../alter/index.md#alter-user-statement). +ะ ะพะปะธ ะฟะพ ัƒะผะพะปั‡ะฐะฝะธัŽ ะฟั€ะธะผะตะฝััŽั‚ัั ะฟั€ะธ ะฒั…ะพะดะต ะฟะพะปัŒะทะพะฒะฐั‚ะตะปั ะฒ ัะธัั‚ะตะผัƒ. ะฃัั‚ะฐะฝะพะฒะธั‚ัŒ ั€ะพะปะธ ะฟะพ ัƒะผะพะปั‡ะฐะฝะธัŽ ะผะพะถะฝะพ ั ะฟะพะผะพั‰ัŒัŽ ะฒั‹ั€ะฐะถะตะฝะธะน [SET DEFAULT ROLE](../set-role.md#set-default-role) ะธะปะธ [ALTER USER](../alter/index.md#alter-user-statement). ะ”ะปั ะพั‚ะทั‹ะฒะฐ ั€ะพะปะธ ะธัะฟะพะปัŒะทัƒะตั‚ัั ะฒั‹ั€ะฐะถะตะฝะธะต [REVOKE](../../../sql-reference/statements/revoke.md). -ะ”ะปั ัƒะดะฐะปะตะฝะธั ั€ะพะปะธ ะธัะฟะพะปัŒะทัƒะตั‚ัั ะฒั‹ั€ะฐะถะตะฝะธะต [DROP ROLE](../misc.md#drop-role-statement). ะฃะดะฐะปะตะฝะฝะฐั ั€ะพะปัŒ ะฐะฒั‚ะพะผะฐั‚ะธั‡ะตัะบะธ ะพั‚ะทั‹ะฒะฐะตั‚ัั ัƒ ะฒัะตั… ะฟะพะปัŒะทะพะฒะฐั‚ะตะปะตะน, ะบะพั‚ะพั€ั‹ะผ ะฑั‹ะปะฐ ะฝะฐะทะฝะฐั‡ะตะฝะฐ. +ะ”ะปั ัƒะดะฐะปะตะฝะธั ั€ะพะปะธ ะธัะฟะพะปัŒะทัƒะตั‚ัั ะฒั‹ั€ะฐะถะตะฝะธะต [DROP ROLE](../drop.md#drop-role). ะฃะดะฐะปะตะฝะฝะฐั ั€ะพะปัŒ ะฐะฒั‚ะพะผะฐั‚ะธั‡ะตัะบะธ ะพั‚ะทั‹ะฒะฐะตั‚ัั ัƒ ะฒัะตั… ะฟะพะปัŒะทะพะฒะฐั‚ะตะปะตะน, ะบะพั‚ะพั€ั‹ะผ ะฑั‹ะปะฐ ะฝะฐะทะฝะฐั‡ะตะฝะฐ. ## ะŸั€ะธะผะตั€ั‹ {#create-role-examples} diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 79e3006d4ad..7c281634c98 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -221,7 +221,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION ะžั‚ััƒั‚ัั‚ะฒะธะต ะฟั€ะธะฒะธะปะตะณะธะน ัƒ ะฟะพะปัŒะทะพะฒะฐั‚ะตะปั ะธะปะธ ั€ะพะปะธ ะพั‚ะพะฑั€ะฐะถะฐะตั‚ัั ะบะฐะบ ะฟั€ะธะฒะธะปะตะณะธั [NONE](#grant-none). -ะ’ั‹ะฟะพะปะฝะตะฝะธะต ะฝะตะบะพั‚ะพั€ั‹ั… ะทะฐะฟั€ะพัะพะฒ ั‚ั€ะตะฑัƒะตั‚ ะพะฟั€ะตะดะตะปะตะฝะฝะพะณะพ ะฝะฐะฑะพั€ะฐ ะฟั€ะธะฒะธะปะตะณะธะน. ะะฐะฟั€ะธะผะตั€, ั‡ั‚ะพะฑั‹ ะฒั‹ะฟะพะปะฝะธั‚ัŒ ะทะฐะฟั€ะพั [RENAME](misc.md#misc_operations-rename), ะฝัƒะถะฝั‹ ัะปะตะดัƒัŽั‰ะธะต ะฟั€ะธะฒะธะปะตะณะธะธ: `SELECT`, `CREATE TABLE`, `INSERT` ะธ `DROP TABLE`. +ะ’ั‹ะฟะพะปะฝะตะฝะธะต ะฝะตะบะพั‚ะพั€ั‹ั… ะทะฐะฟั€ะพัะพะฒ ั‚ั€ะตะฑัƒะตั‚ ะพะฟั€ะตะดะตะปะตะฝะฝะพะณะพ ะฝะฐะฑะพั€ะฐ ะฟั€ะธะฒะธะปะตะณะธะน. ะะฐะฟั€ะธะผะตั€, ั‡ั‚ะพะฑั‹ ะฒั‹ะฟะพะปะฝะธั‚ัŒ ะทะฐะฟั€ะพั [RENAME](rename.md#rename-table), ะฝัƒะถะฝั‹ ัะปะตะดัƒัŽั‰ะธะต ะฟั€ะธะฒะธะปะตะณะธะธ: `SELECT`, `CREATE TABLE`, `INSERT` ะธ `DROP TABLE`. ### SELECT {#grant-select} @@ -309,7 +309,7 @@ GRANT INSERT(x,y) ON db.table TO john ### CREATE {#grant-create} -ะ ะฐะทั€ะตัˆะฐะตั‚ ะฒั‹ะฟะพะปะฝัั‚ัŒ DDL-ะทะฐะฟั€ะพัั‹ [CREATE](../../sql-reference/statements/create/index.md) ะธ [ATTACH](misc.md#attach) ะฒ ัะพะพั‚ะฒะตั‚ัั‚ะฒะธะธ ัะพ ัะปะตะดัƒัŽั‰ะตะน ะธะตั€ะฐั€ั…ะธะตะน ะฟั€ะธะฒะธะปะตะณะธะน: +ะ ะฐะทั€ะตัˆะฐะตั‚ ะฒั‹ะฟะพะปะฝัั‚ัŒ DDL-ะทะฐะฟั€ะพัั‹ [CREATE](../../sql-reference/statements/create/index.md) ะธ [ATTACH](attach.md) ะฒ ัะพะพั‚ะฒะตั‚ัั‚ะฒะธะธ ัะพ ัะปะตะดัƒัŽั‰ะตะน ะธะตั€ะฐั€ั…ะธะตะน ะฟั€ะธะฒะธะปะตะณะธะน: - `CREATE`. ะฃั€ะพะฒะตะฝัŒ: `GROUP` - `CREATE DATABASE`. ะฃั€ะพะฒะตะฝัŒ: `DATABASE` @@ -324,7 +324,7 @@ GRANT INSERT(x,y) ON db.table TO john ### DROP {#grant-drop} -ะ ะฐะทั€ะตัˆะฐะตั‚ ะฒั‹ะฟะพะปะฝัั‚ัŒ ะทะฐะฟั€ะพัั‹ [DROP](misc.md#drop) ะธ [DETACH](misc.md#detach-statement) ะฒ ัะพะพั‚ะฒะตั‚ัั‚ะฒะธะธ ัะพ ัะปะตะดัƒัŽั‰ะตะน ะธะตั€ะฐั€ั…ะธะตะน ะฟั€ะธะฒะธะปะตะณะธะน: +ะ ะฐะทั€ะตัˆะฐะตั‚ ะฒั‹ะฟะพะปะฝัั‚ัŒ ะทะฐะฟั€ะพัั‹ [DROP](drop.md) ะธ [DETACH](detach.md) ะฒ ัะพะพั‚ะฒะตั‚ัั‚ะฒะธะธ ัะพ ัะปะตะดัƒัŽั‰ะตะน ะธะตั€ะฐั€ั…ะธะตะน ะฟั€ะธะฒะธะปะตะณะธะน: - `DROP`. ะฃั€ะพะฒะตะฝัŒ: `GROUP` - `DROP DATABASE`. ะฃั€ะพะฒะตะฝัŒ: `DATABASE` @@ -340,7 +340,7 @@ GRANT INSERT(x,y) ON db.table TO john ### OPTIMIZE {#grant-optimize} -ะ ะฐะทั€ะตัˆะฐะตั‚ ะฒั‹ะฟะพะปะฝัั‚ัŒ ะทะฐะฟั€ะพัั‹ [OPTIMIZE TABLE](misc.md#misc_operations-optimize). +ะ ะฐะทั€ะตัˆะฐะตั‚ ะฒั‹ะฟะพะปะฝัั‚ัŒ ะทะฐะฟั€ะพัั‹ [OPTIMIZE TABLE](optimize.md). ะฃั€ะพะฒะตะฝัŒ: `TABLE`. From 72c52e6b07eb55a3149dbf3e326d8115e2481f17 Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Fri, 7 Oct 2022 15:28:17 +0200 Subject: [PATCH 197/266] Update README.md Add 22.10, add Cloud webinar, remove 22.9 and completed US meeetup --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c851fca27fc..9f4a39a2c97 100644 --- a/README.md +++ b/README.md @@ -16,5 +16,5 @@ ClickHouseยฎ is an open-source column-oriented database management system that a * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming events -* [**v22.9 Release Webinar**](https://clickhouse.com/company/events/v22-9-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. -* [**ClickHouse for Analytics @ Barracuda Networks**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/288140358/) Join us for this in person meetup hosted by our friends at Barracuda in Bay Area. +* [**v22.10 Release Webinar**](https://clickhouse.com/company/events/v22-10-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. +* [**Introducing ClickHouse Cloud**](https://clickhouse.com/company/events/cloud-beta) Introducing ClickHouse as a service, built by creators and maintainers of the fastest OLAP database on earth. Join Tanya Bragin for a detailed walkthrough of ClickHouse Cloud capabilities, as well as a peek behind the curtain to understand the unique architecture that makes our service tick. From b8d906600483fac2422d6387f2293c50cec057e7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Oct 2022 15:53:30 +0200 Subject: [PATCH 198/266] Revert "Resurrect parallel distributed insert select with s3Cluster (#41535)" This reverts commit 860e34e76007976f5f1ad0702ab7f7a972800b1f. --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Storages/HDFS/StorageHDFSCluster.cpp | 26 +-- src/Storages/HDFS/StorageHDFSCluster.h | 6 +- src/Storages/IStorageCluster.h | 29 --- src/Storages/StorageDistributed.cpp | 181 ++++----------- src/Storages/StorageDistributed.h | 4 - src/Storages/StorageReplicatedMergeTree.cpp | 105 --------- src/Storages/StorageReplicatedMergeTree.h | 5 - src/Storages/StorageS3Cluster.cpp | 25 +-- src/Storages/StorageS3Cluster.h | 7 +- .../test_s3_cluster/configs/cluster.xml | 17 +- tests/integration/test_s3_cluster/test.py | 211 +++--------------- 12 files changed, 90 insertions(+), 528 deletions(-) delete mode 100644 src/Storages/IStorageCluster.h diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 841b0d946cb..6e4efdc5167 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -331,7 +331,7 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && settings.parallel_distributed_insert_select) + if (query.select && table->isRemote() && settings.parallel_distributed_insert_select) // Distributed INSERT SELECT distributed_pipeline = table->distributedWrite(query, getContext()); diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 16074026bc0..467203c58f6 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -1,5 +1,4 @@ #include "config.h" -#include "Interpreters/Context_fwd.h" #if USE_HDFS @@ -42,7 +41,7 @@ StorageHDFSCluster::StorageHDFSCluster( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & compression_method_) - : IStorageCluster(table_id_) + : IStorage(table_id_) , cluster_name(cluster_name_) , uri(uri_) , format_name(format_name_) @@ -75,8 +74,13 @@ Pipe StorageHDFSCluster::read( size_t /*max_block_size*/, unsigned /*num_streams*/) { - auto cluster = getCluster(context); - auto extension = getTaskIteratorExtension(query_info.query, context); + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); + + auto iterator = std::make_shared(context, uri); + auto callback = std::make_shared([iterator]() mutable -> String + { + return iterator->next(); + }); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) Block header = @@ -113,7 +117,7 @@ Pipe StorageHDFSCluster::read( scalars, Tables(), processed_stage, - extension); + RemoteQueryExecutor::Extension{.task_iterator = callback}); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); } @@ -136,18 +140,6 @@ QueryProcessingStage::Enum StorageHDFSCluster::getQueryProcessingStage( } -ClusterPtr StorageHDFSCluster::getCluster(ContextPtr context) const -{ - return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); -} - -RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, ContextPtr context) const -{ - auto iterator = std::make_shared(context, uri); - auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); - return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; -} - NamesAndTypesList StorageHDFSCluster::getVirtuals() const { return NamesAndTypesList{ diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index db83762e7f4..3239a1e4076 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -9,7 +9,6 @@ #include #include -#include #include namespace DB @@ -17,7 +16,7 @@ namespace DB class Context; -class StorageHDFSCluster : public IStorageCluster +class StorageHDFSCluster : public IStorage { public: StorageHDFSCluster( @@ -40,9 +39,6 @@ public: NamesAndTypesList getVirtuals() const override; - ClusterPtr getCluster(ContextPtr context) const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override; - private: String cluster_name; String uri; diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h deleted file mode 100644 index 35d297428ba..00000000000 --- a/src/Storages/IStorageCluster.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ - - -/** - * Base cluster for Storages used in table functions like s3Cluster and hdfsCluster - * Needed for code simplification around parallel_distributed_insert_select - */ -class IStorageCluster : public IStorage -{ -public: - - explicit IStorageCluster(const StorageID & table_id_) : IStorage(table_id_) {} - - virtual ClusterPtr getCluster(ContextPtr context) const = 0; - /// Query is needed for pruning by virtual columns (_file, _path) - virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const = 0; - - bool isRemote() const override { return true; } -}; - - -} diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d7af9790a85..c39f235c46c 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -59,8 +59,6 @@ #include #include -#include - #include #include #include @@ -761,35 +759,55 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata } -std::optional StorageDistributed::distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr local_context) const +std::optional StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) { - const auto & settings = local_context->getSettingsRef(); + QueryPipeline pipeline; + + const Settings & settings = local_context->getSettingsRef(); + if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) + throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + + std::shared_ptr storage_src; + auto & select = query.select->as(); auto new_query = std::dynamic_pointer_cast(query.clone()); - - /// Unwrap view() function. - if (src_distributed.remote_table_function_ptr) + if (select.list_of_selects->children.size() == 1) { - const TableFunctionPtr src_table_function = - TableFunctionFactory::instance().get(src_distributed.remote_table_function_ptr, local_context); - const TableFunctionView * view_function = - assert_cast(src_table_function.get()); - new_query->select = view_function->getSelectQuery().clone(); - } - else - { - const auto select_with_union_query = std::make_shared(); - select_with_union_query->list_of_selects = std::make_shared(); + if (auto * select_query = select.list_of_selects->children.at(0)->as()) + { + JoinedTables joined_tables(Context::createCopy(local_context), *select_query); - auto * select = query.select->as().list_of_selects->children.at(0)->as(); - auto new_select_query = std::dynamic_pointer_cast(select->clone()); - select_with_union_query->list_of_selects->children.push_back(new_select_query); + if (joined_tables.tablesCount() == 1) + { + storage_src = std::dynamic_pointer_cast(joined_tables.getLeftTableStorage()); + if (storage_src) + { + /// Unwrap view() function. + if (storage_src->remote_table_function_ptr) + { + const TableFunctionPtr src_table_function = + TableFunctionFactory::instance().get(storage_src->remote_table_function_ptr, local_context); + const TableFunctionView * view_function = + assert_cast(src_table_function.get()); + new_query->select = view_function->getSelectQuery().clone(); + } + else + { + const auto select_with_union_query = std::make_shared(); + select_with_union_query->list_of_selects = std::make_shared(); - new_select_query->replaceDatabaseAndTable(src_distributed.getRemoteDatabaseName(), src_distributed.getRemoteTableName()); + auto new_select_query = std::dynamic_pointer_cast(select_query->clone()); + select_with_union_query->list_of_selects->children.push_back(new_select_query); - new_query->select = select_with_union_query; + new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName()); + + new_query->select = select_with_union_query; + } + } + } + } } - const Cluster::AddressesWithFailover & src_addresses = src_distributed.getCluster()->getShardsAddresses(); + const Cluster::AddressesWithFailover & src_addresses = storage_src ? storage_src->getCluster()->getShardsAddresses() : Cluster::AddressesWithFailover{}; const Cluster::AddressesWithFailover & dst_addresses = getCluster()->getShardsAddresses(); /// Compare addresses instead of cluster name, to handle remote()/cluster(). /// (since for remote()/cluster() the getClusterName() is empty string) @@ -804,7 +822,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu LOG_WARNING(log, "Parallel distributed INSERT SELECT is not possible " "(source cluster={} ({} addresses), destination cluster={} ({} addresses))", - src_distributed.getClusterName(), + storage_src ? storage_src->getClusterName() : "", src_addresses.size(), getClusterName(), dst_addresses.size()); @@ -831,7 +849,6 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu new_query_str = buf.str(); } - QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); ++query_context->getClientInfo().distributed_depth; @@ -865,120 +882,6 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu } -std::optional StorageDistributed::distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) const -{ - const auto & settings = local_context->getSettingsRef(); - auto & select = query.select->as(); - /// Select query is needed for pruining on virtual columns - auto extension = src_storage_cluster.getTaskIteratorExtension( - select.list_of_selects->children.at(0)->as()->clone(), - local_context); - - auto dst_cluster = getCluster(); - - auto new_query = std::dynamic_pointer_cast(query.clone()); - if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL) - { - new_query->table_id = StorageID(getRemoteDatabaseName(), getRemoteTableName()); - /// Reset table function for INSERT INTO remote()/cluster() - new_query->table_function.reset(); - } - - String new_query_str; - { - WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; - new_query->IAST::format(ast_format_settings); - new_query_str = buf.str(); - } - - QueryPipeline pipeline; - ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; - - /// Here we take addresses from destination cluster and assume source table exists on these nodes - for (const auto & replicas : getCluster()->getShardsAddresses()) - { - /// There will be only one replica, because we consider each replica as a shard - for (const auto & node : replicas) - { - auto connection = std::make_shared( - node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, node.quota_key, node.cluster, node.cluster_secret, - "ParallelInsertSelectInititiator", - node.compression, - node.secure - ); - - auto remote_query_executor = std::make_shared( - connection, - new_query_str, - Block{}, - query_context, - /*throttler=*/nullptr, - Scalars{}, - Tables{}, - QueryProcessingStage::Complete, - extension); - - QueryPipeline remote_pipeline(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote)); - remote_pipeline.complete(std::make_shared(remote_query_executor->getHeader())); - - pipeline.addCompletedPipeline(std::move(remote_pipeline)); - } - } - - return pipeline; -} - - -std::optional StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) -{ - const Settings & settings = local_context->getSettingsRef(); - if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); - - auto & select = query.select->as(); - - StoragePtr src_storage; - - /// Distributed write only works in the most trivial case INSERT ... SELECT - /// without any unions or joins on the right side - if (select.list_of_selects->children.size() == 1) - { - if (auto * select_query = select.list_of_selects->children.at(0)->as()) - { - JoinedTables joined_tables(Context::createCopy(local_context), *select_query); - - if (joined_tables.tablesCount() == 1) - { - src_storage = joined_tables.getLeftTableStorage(); - } - } - } - - if (!src_storage) - return {}; - - if (auto src_distributed = std::dynamic_pointer_cast(src_storage)) - { - return distributedWriteBetweenDistributedTables(*src_distributed, query, local_context); - } - if (auto src_storage_cluster = std::dynamic_pointer_cast(src_storage)) - { - return distributedWriteFromClusterStorage(*src_storage_cluster, query, local_context); - } - if (local_context->getClientInfo().distributed_depth == 0) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. "\ - "Reason: distributed reading is supported only from Distributed engine or *Cluster table functions, but got {} storage", src_storage->getName()); - } - - return {}; -} - - void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const { auto name_deps = getDependentViewsByColumn(local_context); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 3161f4b50f6..7cb25ae46ab 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -208,9 +207,6 @@ private: void delayInsertOrThrowIfNeeded() const; - std::optional distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context) const; - std::optional distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr context) const; - String remote_database; String remote_table; ASTPtr remote_table_function_ptr; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7ff9d73c0fc..f8112910dea 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -47,13 +47,11 @@ #include #include -#include #include #include #include #include #include -#include #include #include #include @@ -63,7 +61,6 @@ #include #include #include -#include #include #include @@ -78,7 +75,6 @@ #include #include #include -#include #include #include @@ -167,7 +163,6 @@ namespace ErrorCodes extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; extern const int CHECKSUM_DOESNT_MATCH; extern const int NOT_INITIALIZED; - extern const int TOO_LARGE_DISTRIBUTED_DEPTH; } namespace ActionLocks @@ -4458,106 +4453,6 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con } -std::optional StorageReplicatedMergeTree::distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) -{ - const auto & settings = local_context->getSettingsRef(); - auto extension = src_storage_cluster->getTaskIteratorExtension(nullptr, local_context); - - /// Here we won't check that the cluster formed from table replicas is a subset of a cluster specified in s3Cluster/hdfsCluster table function - auto src_cluster = src_storage_cluster->getCluster(local_context); - - /// Actually the query doesn't change, we just serialize it to string - String query_str; - { - WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; - query.IAST::format(ast_format_settings); - query_str = buf.str(); - } - - QueryPipeline pipeline; - ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; - - for (const auto & replicas : src_cluster->getShardsAddresses()) - { - /// There will be only one replica, because we consider each replica as a shard - for (const auto & node : replicas) - { - auto connection = std::make_shared( - node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, node.quota_key, node.cluster, node.cluster_secret, - "ParallelInsertSelectInititiator", - node.compression, - node.secure - ); - - auto remote_query_executor = std::make_shared( - connection, - query_str, - Block{}, - query_context, - /*throttler=*/nullptr, - Scalars{}, - Tables{}, - QueryProcessingStage::Complete, - extension); - - QueryPipeline remote_pipeline(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote)); - remote_pipeline.complete(std::make_shared(remote_query_executor->getHeader())); - - pipeline.addCompletedPipeline(std::move(remote_pipeline)); - } - } - - return pipeline; -} - -std::optional StorageReplicatedMergeTree::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) -{ - /// Do not enable parallel distributed INSERT SELECT in case when query probably comes from another server - if (local_context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY) - return {}; - - const Settings & settings = local_context->getSettingsRef(); - if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); - - auto & select = query.select->as(); - - StoragePtr src_storage; - - if (select.list_of_selects->children.size() == 1) - { - if (auto * select_query = select.list_of_selects->children.at(0)->as()) - { - JoinedTables joined_tables(Context::createCopy(local_context), *select_query); - - if (joined_tables.tablesCount() == 1) - { - src_storage = joined_tables.getLeftTableStorage(); - } - } - } - - if (!src_storage) - return {}; - - if (auto src_distributed = std::dynamic_pointer_cast(src_storage)) - { - return distributedWriteFromClusterStorage(src_distributed, query, local_context); - } - else if (local_context->getClientInfo().distributed_depth == 0) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. Reason: distributed " - "reading into Replicated table is supported only from *Cluster table functions, but got {} storage", src_storage->getName()); - } - - return {}; -} - - bool StorageReplicatedMergeTree::optimize( const ASTPtr &, const StorageMetadataPtr &, diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c06c67a5154..e10ffcce22c 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -140,8 +139,6 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; - std::optional distributedWrite(const ASTInsertQuery & /*query*/, ContextPtr /*context*/) override; - bool optimize( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, @@ -486,8 +483,6 @@ private: std::mutex last_broken_disks_mutex; std::set last_broken_disks; - static std::optional distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context); - template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 0e6aa8fcfb8..df927069bb0 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -51,7 +51,7 @@ StorageS3Cluster::StorageS3Cluster( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_) - : IStorageCluster(table_id_) + : IStorage(table_id_) , s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.rw_settings, configuration_.headers} , filename(configuration_.url) , cluster_name(configuration_.cluster_name) @@ -101,8 +101,11 @@ Pipe StorageS3Cluster::read( { StorageS3::updateS3Configuration(context, s3_configuration); - auto cluster = getCluster(context); - auto extension = getTaskIteratorExtension(query_info.query, context); + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); + + auto iterator = std::make_shared( + *s3_configuration.client, s3_configuration.uri, query_info.query, virtual_block, context); + auto callback = std::make_shared([iterator]() mutable -> String { return iterator->next(); }); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) Block header = @@ -127,6 +130,7 @@ Pipe StorageS3Cluster::read( node.secure ); + /// For unknown reason global context is passed to IStorage::read() method /// So, task_identifier is passed as constructor argument. It is more obvious. auto remote_query_executor = std::make_shared( @@ -138,7 +142,7 @@ Pipe StorageS3Cluster::read( scalars, Tables(), processed_stage, - extension); + RemoteQueryExecutor::Extension{.task_iterator = callback}); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); } @@ -161,19 +165,6 @@ QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage( } -ClusterPtr StorageS3Cluster::getCluster(ContextPtr context) const -{ - return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); -} - -RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, ContextPtr context) const -{ - auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.uri, query, virtual_block, context); - auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - NamesAndTypesList StorageS3Cluster::getVirtuals() const { return virtual_columns; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 0e6fd7ef66a..d2cf1b917a1 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -10,7 +10,6 @@ #include "Client/Connection.h" #include #include -#include #include namespace DB @@ -18,7 +17,7 @@ namespace DB class Context; -class StorageS3Cluster : public IStorageCluster +class StorageS3Cluster : public IStorage { public: StorageS3Cluster( @@ -38,11 +37,9 @@ public: NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override; - ClusterPtr getCluster(ContextPtr context) const override; - private: StorageS3::S3Configuration s3_configuration; + String filename; String cluster_name; String format_name; diff --git a/tests/integration/test_s3_cluster/configs/cluster.xml b/tests/integration/test_s3_cluster/configs/cluster.xml index 39275e99abd..18f15763633 100644 --- a/tests/integration/test_s3_cluster/configs/cluster.xml +++ b/tests/integration/test_s3_cluster/configs/cluster.xml @@ -20,23 +20,8 @@ - - - - - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - cluster_simple - + \ No newline at end of file diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index f1251719faf..2cbb36fcf06 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -1,9 +1,5 @@ -from email.errors import HeaderParseError import logging import os -import csv -import shutil -import time import pytest from helpers.cluster import ClickHouseCluster @@ -23,21 +19,6 @@ S3_DATA = [ def create_buckets_s3(cluster): minio = cluster.minio_client - - for file_number in range(100): - file_name = f"data/generated/file_{file_number}.csv" - os.makedirs(os.path.join(SCRIPT_DIR, "data/generated/"), exist_ok=True) - S3_DATA.append(file_name) - with open(os.path.join(SCRIPT_DIR, file_name), "w+", encoding="utf-8") as f: - # a String, b UInt64 - data = [] - - for number in range(100): - data.append([str(number) * 10, number]) - - writer = csv.writer(f) - writer.writerows(data) - for file in S3_DATA: minio.fput_object( bucket_name=cluster.minio_bucket, @@ -53,24 +34,10 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "s0_0_0", - main_configs=["configs/cluster.xml"], - macros={"replica": "node1", "shard": "shard1"}, - with_minio=True, - with_zookeeper=True, - ) - cluster.add_instance( - "s0_0_1", - main_configs=["configs/cluster.xml"], - macros={"replica": "replica2", "shard": "shard1"}, - with_zookeeper=True, - ) - cluster.add_instance( - "s0_1_0", - main_configs=["configs/cluster.xml"], - macros={"replica": "replica1", "shard": "shard2"}, - with_zookeeper=True, + "s0_0_0", main_configs=["configs/cluster.xml"], with_minio=True ) + cluster.add_instance("s0_0_1", main_configs=["configs/cluster.xml"]) + cluster.add_instance("s0_1_0", main_configs=["configs/cluster.xml"]) logging.info("Starting cluster...") cluster.start() @@ -80,7 +47,6 @@ def started_cluster(): yield cluster finally: - shutil.rmtree(os.path.join(SCRIPT_DIR, "data/generated/")) cluster.shutdown() @@ -89,17 +55,17 @@ def test_select_all(started_cluster): pure_s3 = node.query( """ SELECT * from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', - 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)""" ) # print(pure_s3) s3_distibuted = node.query( """ SELECT * from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)""" ) # print(s3_distibuted) @@ -112,15 +78,15 @@ def test_count(started_cluster): pure_s3 = node.query( """ SELECT count(*) from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" ) # print(pure_s3) s3_distibuted = node.query( """ SELECT count(*) from s3Cluster( - 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""" ) @@ -159,13 +125,13 @@ def test_union_all(started_cluster): SELECT * FROM ( SELECT * from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', - 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') UNION ALL SELECT * from s3( - 'http://minio1:9001/root/data/{clickhouse,database}/*', - 'minio', 'minio123', 'CSV', + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ) ORDER BY (name, value, polygon) @@ -177,13 +143,13 @@ def test_union_all(started_cluster): SELECT * FROM ( SELECT * from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') UNION ALL SELECT * from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ) ORDER BY (name, value, polygon) @@ -200,12 +166,12 @@ def test_wrong_cluster(started_cluster): """ SELECT count(*) from s3Cluster( 'non_existent_cluster', - 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') UNION ALL SELECT count(*) from s3Cluster( 'non_existent_cluster', - 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') """ ) @@ -218,139 +184,14 @@ def test_ambiguous_join(started_cluster): result = node.query( """ SELECT l.name, r.value from s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as l JOIN s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as r ON l.name = r.name """ ) assert "AMBIGUOUS_COLUMN_NAME" not in result - - -def test_distributed_insert_select(started_cluster): - first_replica_first_shard = started_cluster.instances["s0_0_0"] - second_replica_first_shard = started_cluster.instances["s0_0_1"] - first_replica_second_shard = started_cluster.instances["s0_1_0"] - - first_replica_first_shard.query( - """DROP TABLE IF EXISTS insert_select_local ON CLUSTER 'cluster_simple';""" - ) - first_replica_first_shard.query( - """DROP TABLE IF EXISTS insert_select_distributed ON CLUSTER 'cluster_simple';""" - ) - - first_replica_first_shard.query( - """ - CREATE TABLE insert_select_local ON CLUSTER 'cluster_simple' (a String, b UInt64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/insert_select', '{replica}') - ORDER BY (a, b); - """ - ) - - first_replica_first_shard.query( - """ - CREATE TABLE insert_select_distributed ON CLUSTER 'cluster_simple' as insert_select_local - ENGINE = Distributed('cluster_simple', default, insert_select_local, b % 2); - """ - ) - - first_replica_first_shard.query( - """ - INSERT INTO insert_select_distributed SETTINGS insert_distributed_sync=1 SELECT * FROM s3Cluster( - 'cluster_simple', - 'http://minio1:9001/root/data/generated/*.csv', 'minio', 'minio123', 'CSV','a String, b UInt64' - ) SETTINGS parallel_distributed_insert_select=1, insert_distributed_sync=1; - """ - ) - - for line in ( - first_replica_first_shard.query("""SELECT * FROM insert_select_local;""") - .strip() - .split("\n") - ): - _, b = line.split() - assert int(b) % 2 == 0 - - for line in ( - second_replica_first_shard.query("""SELECT * FROM insert_select_local;""") - .strip() - .split("\n") - ): - _, b = line.split() - assert int(b) % 2 == 0 - - for line in ( - first_replica_second_shard.query("""SELECT * FROM insert_select_local;""") - .strip() - .split("\n") - ): - _, b = line.split() - assert int(b) % 2 == 1 - - first_replica_first_shard.query( - """DROP TABLE IF EXISTS insert_select_local ON CLUSTER 'cluster_simple';""" - ) - first_replica_first_shard.query( - """DROP TABLE IF EXISTS insert_select_distributed ON CLUSTER 'cluster_simple';""" - ) - - -def test_distributed_insert_select_with_replicated(started_cluster): - first_replica_first_shard = started_cluster.instances["s0_0_0"] - second_replica_first_shard = started_cluster.instances["s0_0_1"] - - first_replica_first_shard.query( - """DROP TABLE IF EXISTS insert_select_replicated_local ON CLUSTER 'first_shard';""" - ) - - first_replica_first_shard.query( - """ - CREATE TABLE insert_select_replicated_local ON CLUSTER 'first_shard' (a String, b UInt64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/insert_select_with_replicated', '{replica}') - ORDER BY (a, b); - """ - ) - - for replica in [first_replica_first_shard, second_replica_first_shard]: - replica.query( - """ - SYSTEM STOP FETCHES; - """ - ) - replica.query( - """ - SYSTEM STOP MERGES; - """ - ) - - first_replica_first_shard.query( - """ - INSERT INTO insert_select_replicated_local SELECT * FROM s3Cluster( - 'first_shard', - 'http://minio1:9001/root/data/generated_replicated/*.csv', 'minio', 'minio123', 'CSV','a String, b UInt64' - ) SETTINGS parallel_distributed_insert_select=1; - """ - ) - - for replica in [first_replica_first_shard, second_replica_first_shard]: - replica.query( - """ - SYSTEM FLUSH LOGS; - """ - ) - - second = int( - second_replica_first_shard.query( - """SELECT count(*) FROM system.query_log WHERE not is_initial_query and query like '%s3Cluster%';""" - ).strip() - ) - - assert second != 0 - - first_replica_first_shard.query( - """DROP TABLE IF EXISTS insert_select_replicated_local ON CLUSTER 'first_shard';""" - ) From 3e3c0a7bb668a398e93ea573c4961f546cfd49c8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 7 Oct 2022 14:41:00 +0000 Subject: [PATCH 199/266] Docs: Add "TABLE" to CHECK/DESCRIBE statements in sidebar --- docs/en/sql-reference/statements/check-table.md | 2 +- docs/en/sql-reference/statements/describe-table.md | 2 +- docs/ru/sql-reference/statements/check-table.md | 2 +- docs/ru/sql-reference/statements/describe-table.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 1a2ec69e4f9..f9b428b74a1 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/statements/check-table sidebar_position: 41 -sidebar_label: CHECK +sidebar_label: CHECK TABLE title: "CHECK TABLE Statement" --- diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index ff3361caadb..4864743abbc 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/statements/describe-table sidebar_position: 42 -sidebar_label: DESCRIBE +sidebar_label: DESCRIBE TABLE title: "DESCRIBE TABLE" --- diff --git a/docs/ru/sql-reference/statements/check-table.md b/docs/ru/sql-reference/statements/check-table.md index 77d246b631e..633c3899006 100644 --- a/docs/ru/sql-reference/statements/check-table.md +++ b/docs/ru/sql-reference/statements/check-table.md @@ -1,7 +1,7 @@ --- slug: /ru/sql-reference/statements/check-table sidebar_position: 41 -sidebar_label: CHECK +sidebar_label: CHECK TABLE --- # CHECK TABLE Statement {#check-table} diff --git a/docs/ru/sql-reference/statements/describe-table.md b/docs/ru/sql-reference/statements/describe-table.md index 73b4278352a..14f97af1dd5 100644 --- a/docs/ru/sql-reference/statements/describe-table.md +++ b/docs/ru/sql-reference/statements/describe-table.md @@ -1,7 +1,7 @@ --- slug: /ru/sql-reference/statements/describe-table sidebar_position: 42 -sidebar_label: DESCRIBE +sidebar_label: DESCRIBE TABLE --- # DESCRIBE TABLE {#misc-describe-table} From a1ff83589456c9020966cc3c458cb313f9bf6490 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Oct 2022 19:52:34 +0200 Subject: [PATCH 200/266] fix --- src/Interpreters/DatabaseCatalog.cpp | 2 +- src/Interpreters/DatabaseCatalog.h | 2 +- .../0_stateless/01018_ddl_dictionaries_create.reference | 4 ++-- .../0_stateless/01192_rename_database_zookeeper.reference | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 3a1d0d4d071..2b6e6c83ff7 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1136,7 +1136,7 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database); } -void DatabaseCatalog::checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database) const +void DatabaseCatalog::checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database) { if (!is_drop_database) { diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 9fab2ee09f2..a44099b9fdc 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -245,7 +245,7 @@ private: void shutdownImpl(); - void checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database) const; + static void checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database); struct UUIDToStorageMapPart { diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index a4e2f380eb8..9b130f11df6 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -1,5 +1,5 @@ =DICTIONARY in Ordinary DB -CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict1 1 db_01018 dict1 @@ -12,7 +12,7 @@ db_01018 dict1 ==DROP DICTIONARY 0 =DICTIONARY in Memory DB -CREATE DICTIONARY memory_db.dict2\n(\n `key_column` UInt64 DEFAULT 0 INJECTIVE,\n `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY memory_db.dict2\n(\n `key_column` UInt64 DEFAULT 0 INJECTIVE,\n `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict2 1 memory_db dict2 diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.reference b/tests/queries/0_stateless/01192_rename_database_zookeeper.reference index 5b430f0a5b1..13f2a780e0b 100644 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.reference +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.reference @@ -14,7 +14,7 @@ renamed 10 45 10 45 ok -CREATE DICTIONARY test_01192_atomic.dict UUID \'00001192-0000-4000-8000-000000000002\'\n(\n `n` UInt64,\n `_part` String DEFAULT \'no\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'mt\' DB \'test_01192\'))\nLAYOUT(DIRECT()) +CREATE DICTIONARY test_01192_atomic.dict UUID \'00001192-0000-4000-8000-000000000002\'\n(\n `n` UInt64,\n `_part` String DEFAULT \'no\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'mt\' DB \'test_01192\'))\nLAYOUT(DIRECT()) test_01192_atomic dict NOT_LOADED 00001192-0000-4000-8000-000000000002 no ok From 1a1a20587ec2e2cf606585410992e59dea69f8aa Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Oct 2022 21:26:09 +0300 Subject: [PATCH 201/266] Update defines.h --- base/base/defines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/defines.h b/base/base/defines.h index c9b6cfed4a4..24e6f9e9eaa 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -142,7 +142,7 @@ # define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) -/// They use a lambda function to apply function attribute to a single statement. This enable us to supress warnings locally instead of +/// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of /// suppressing them in the whole function /// Consider adding a comment when using these macros. # define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()) From 4175f8cde6883d7480a8820fe698abcdc4b75b43 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Oct 2022 21:20:14 +0200 Subject: [PATCH 202/266] abort instead of __builtin_unreachable in debug builds --- base/base/defines.h | 4 ++++ base/base/safeExit.cpp | 2 +- programs/main.cpp | 2 +- src/Access/AccessEntityIO.cpp | 2 +- src/Access/AccessRights.cpp | 2 +- src/Access/IAccessStorage.cpp | 6 +++--- .../AggregateFunctionGroupArray.h | 2 +- .../AggregateFunctionSequenceNextNode.h | 2 +- .../AggregateFunctionStatisticsSimple.h | 2 +- src/AggregateFunctions/AggregateFunctionSum.h | 2 +- .../AggregateFunctionUniqCombined.cpp | 2 +- src/Common/DateLUTImpl.cpp | 2 +- src/Common/HashTable/Hash.h | 8 +++----- src/Common/HashTable/HashTable.h | 4 ++-- src/Common/IntervalKind.cpp | 16 ++++++++-------- src/Common/TargetSpecific.cpp | 3 ++- src/Common/ThreadProfileEvents.cpp | 2 +- src/Common/ZooKeeper/IKeeper.cpp | 2 +- .../gtest_thread_pool_schedule_exception.cpp | 2 +- src/Compression/CompressionCodecDeflateQpl.cpp | 2 +- src/Compression/CompressionCodecDoubleDelta.cpp | 2 +- src/Core/AccurateComparison.h | 2 +- src/Core/Field.cpp | 2 +- src/Core/Field.h | 2 +- src/DataTypes/Serializations/ISerialization.cpp | 2 +- src/Databases/DatabaseReplicated.cpp | 3 +-- src/Dictionaries/RedisDictionarySource.cpp | 2 +- src/Disks/DiskType.h | 3 ++- src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 2 +- .../MetadataFromDiskTransactionState.cpp | 3 ++- src/Disks/VolumeJBOD.cpp | 4 ++-- src/Formats/EscapingRuleUtils.cpp | 4 ++-- src/Formats/ProtobufSerializer.cpp | 2 +- src/Functions/FunctionsConversion.h | 2 +- src/Functions/FunctionsRound.h | 8 ++++---- src/Functions/FunctionsTimeWindow.cpp | 6 +++--- src/Functions/PolygonUtils.h | 2 +- src/Functions/toStartOfInterval.cpp | 2 +- src/IO/CompressionMethod.cpp | 2 +- src/IO/DoubleConverter.h | 1 + src/IO/HadoopSnappyReadBuffer.h | 2 +- src/Interpreters/Aggregator.cpp | 2 +- src/Interpreters/Aggregator.h | 8 ++++---- src/Interpreters/Cache/FileSegment.cpp | 2 +- src/Interpreters/ComparisonGraph.cpp | 2 +- src/Interpreters/FilesystemCacheLog.cpp | 2 +- src/Interpreters/HashJoin.cpp | 4 ++-- src/Interpreters/HashJoin.h | 6 +++--- .../InterpreterTransactionControlQuery.cpp | 3 +-- src/Interpreters/RowRefs.cpp | 2 +- src/Interpreters/SetVariants.cpp | 4 ++-- src/Parsers/ASTAlterQuery.cpp | 2 +- src/Parsers/ASTExplainQuery.h | 2 +- src/Parsers/Lexer.cpp | 5 +++-- .../Formats/Impl/MsgPackRowInputFormat.cpp | 2 +- src/Processors/IProcessor.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +++--- src/Processors/QueryPlan/TotalsHavingStep.cpp | 2 +- src/Processors/Transforms/FillingTransform.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Server/GRPCServer.cpp | 4 ++-- src/Server/HTTPHandler.cpp | 3 +-- src/Storages/ColumnsDescription.cpp | 2 +- .../MergeTree/BackgroundJobsAssignee.cpp | 2 +- src/Storages/MergeTree/KeyCondition.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- .../ParallelReplicasReadingCoordinator.cpp | 2 +- .../PartMovesBetweenShardsOrchestrator.cpp | 2 +- .../ReplicatedMergeTreeRestartingThread.cpp | 2 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 6 +++--- utils/check-style/check-style | 5 +++++ 73 files changed, 116 insertions(+), 107 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index 671253ed9e8..9f14d5413d9 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -123,11 +123,15 @@ /// - tries to print failed assertion into server log /// It can be used for all assertions except heavy ones. /// Heavy assertions (that run loops or call complex functions) are allowed in debug builds only. +/// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, +/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) #if defined(ABORT_ON_LOGICAL_ERROR) #define chassert(x) static_cast(x) ? void(0) : abortOnFailedAssertion(#x) + #define UNREACHABLE() abort() #else #define chassert(x) ((void)0) + #define UNREACHABLE() __builtin_unreachable() #endif #endif diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index 4ccfee80643..44d92643e91 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -11,7 +11,7 @@ /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, /// while connection handling threads are still run. (void)syscall(SYS_exit_group, code); - __builtin_unreachable(); + UNREACHABLE(); #else _exit(code); #endif diff --git a/programs/main.cpp b/programs/main.cpp index e1947652f97..f40bafc7027 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -219,7 +219,7 @@ auto instructionFailToString(InstructionFail fail) case InstructionFail::AVX512: ret("AVX512"); } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index c0dbcb5c6bf..5b362829629 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -145,7 +145,7 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String { e.addMessage("Could not parse " + file_path); e.rethrow(); - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 20afc916901..8482a2ded8a 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -209,7 +209,7 @@ namespace case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel(); case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel(); } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index f562a6ebeec..e5031139e7b 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -215,7 +215,7 @@ std::vector IAccessStorage::insert(const std::vector & mu e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str); } e.rethrow(); - __builtin_unreachable(); + UNREACHABLE(); } } @@ -319,7 +319,7 @@ std::vector IAccessStorage::remove(const std::vector & ids, bool thr e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str); } e.rethrow(); - __builtin_unreachable(); + UNREACHABLE(); } } @@ -416,7 +416,7 @@ std::vector IAccessStorage::update(const std::vector & ids, const Up e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str); } e.rethrow(); - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 116a998ccc3..6888c113556 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -56,7 +56,7 @@ static constexpr const char * getNameByTrait() return "groupArraySample"; // else if (Trait::sampler == Sampler::DETERMINATOR) // TODO - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h index e761adf8f74..423b53b03f3 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h @@ -395,7 +395,7 @@ public: break; return (i == events_size) ? base - i : unmatched_idx; } - __builtin_unreachable(); + UNREACHABLE(); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index 5c35bec503c..d57b043b491 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -114,7 +114,7 @@ public: return "covarSamp"; if constexpr (StatFunc::kind == StatisticsFunctionKind::corr) return "corr"; - __builtin_unreachable(); + UNREACHABLE(); } DataTypePtr getReturnType() const override diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 8e24b288fff..01704714395 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -407,7 +407,7 @@ public: return "sumWithOverflow"; else if constexpr (Type == AggregateFunctionTypeSumKahan) return "sumKahan"; - __builtin_unreachable(); + UNREACHABLE(); } explicit AggregateFunctionSum(const DataTypes & argument_types_) diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 2b7e0d97372..5652a6a2b44 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -131,7 +131,7 @@ namespace return createAggregateFunctionWithHashType<20>(use_64_bit_hash, argument_types, params); } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 31290c53b49..c4b32a3466b 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -30,7 +30,7 @@ UInt8 getDayOfWeek(const cctz::civil_day & date) case cctz::weekday::saturday: return 6; case cctz::weekday::sunday: return 7; } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 9825c8f6519..4d798173698 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -119,7 +119,7 @@ inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 up __builtin_memcpy(&value, pos, 7); break; default: - __builtin_unreachable(); + UNREACHABLE(); } reinterpret_cast(&value)[7] = size; @@ -194,8 +194,7 @@ inline size_t DefaultHash64(T key) static_cast(key >> 128) ^ static_cast(key >> 256)); } - assert(false); - __builtin_unreachable(); + UNREACHABLE(); } template @@ -454,8 +453,7 @@ struct IntHash32 return intHash32(u.out); } - assert(false); - __builtin_unreachable(); + UNREACHABLE(); } }; diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 6fa002139df..7aa375cfa79 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -353,11 +353,11 @@ struct HashTableFixedGrower size_t bufSize() const { return 1ULL << key_bits; } size_t place(size_t x) const { return x; } - /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently. + /// You could write UNREACHABLE(), but the compiler does not optimize everything, and it turns out less efficiently. size_t next(size_t pos) const { return pos + 1; } bool overflow(size_t /*elems*/) const { return false; } - void increaseSize() { __builtin_unreachable(); } + void increaseSize() { UNREACHABLE(); } void set(size_t /*num_elems*/) {} void setBufSize(size_t /*buf_size_*/) {} }; diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 4e923fdba55..f66ba4b086c 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -26,7 +26,7 @@ Int32 IntervalKind::toAvgSeconds() const case IntervalKind::Quarter: return 7889238; /// Exactly 1/4 of a year. case IntervalKind::Year: return 31556952; /// The average length of a Gregorian year is equal to 365.2425 days } - __builtin_unreachable(); + UNREACHABLE(); } Float64 IntervalKind::toSeconds() const @@ -52,7 +52,7 @@ Float64 IntervalKind::toSeconds() const default: throw Exception("Not possible to get precise number of seconds in non-precise interval", ErrorCodes::BAD_ARGUMENTS); } - __builtin_unreachable(); + UNREACHABLE(); } bool IntervalKind::isFixedLength() const @@ -71,7 +71,7 @@ bool IntervalKind::isFixedLength() const case IntervalKind::Quarter: case IntervalKind::Year: return false; } - __builtin_unreachable(); + UNREACHABLE(); } IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds) @@ -113,7 +113,7 @@ const char * IntervalKind::toKeyword() const case IntervalKind::Quarter: return "QUARTER"; case IntervalKind::Year: return "YEAR"; } - __builtin_unreachable(); + UNREACHABLE(); } @@ -133,7 +133,7 @@ const char * IntervalKind::toLowercasedKeyword() const case IntervalKind::Quarter: return "quarter"; case IntervalKind::Year: return "year"; } - __builtin_unreachable(); + UNREACHABLE(); } @@ -164,7 +164,7 @@ const char * IntervalKind::toDateDiffUnit() const case IntervalKind::Year: return "year"; } - __builtin_unreachable(); + UNREACHABLE(); } @@ -195,7 +195,7 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const case IntervalKind::Year: return "toIntervalYear"; } - __builtin_unreachable(); + UNREACHABLE(); } @@ -229,7 +229,7 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const case IntervalKind::Year: return "toYear"; } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index 70b03833775..9a445ea0fc1 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -42,7 +43,7 @@ String toString(TargetArch arch) case TargetArch::AVX512VBMI: return "avx512vbmi"; } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 82b03f6ed91..baa77468a13 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -76,7 +76,7 @@ const char * TasksStatsCounters::metricsProviderString(MetricsProvider provider) case MetricsProvider::Netlink: return "netlink"; } - __builtin_unreachable(); + UNREACHABLE(); } bool TasksStatsCounters::checkIfAvailable() diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 23d29ed3019..f0a07241735 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -112,7 +112,7 @@ const char * errorMessage(Error code) case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; } - __builtin_unreachable(); + UNREACHABLE(); } bool isHardwareError(Error zk_return_code) diff --git a/src/Common/tests/gtest_thread_pool_schedule_exception.cpp b/src/Common/tests/gtest_thread_pool_schedule_exception.cpp index 95b983cc3bb..69362c34cd2 100644 --- a/src/Common/tests/gtest_thread_pool_schedule_exception.cpp +++ b/src/Common/tests/gtest_thread_pool_schedule_exception.cpp @@ -36,7 +36,7 @@ static bool check() return true; } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index 9e165a9c913..87c0ead0795 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -393,7 +393,7 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); return; } - __builtin_unreachable(); + UNREACHABLE(); } void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests() diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 017c82701f5..816f242672a 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -164,7 +164,7 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size) default: assert(false && "only 1, 2, 4 and 8 data sizes are supported"); } - __builtin_unreachable(); + UNREACHABLE(); } struct WriteSpec diff --git a/src/Core/AccurateComparison.h b/src/Core/AccurateComparison.h index dfd305830c3..a201c136e3a 100644 --- a/src/Core/AccurateComparison.h +++ b/src/Core/AccurateComparison.h @@ -67,7 +67,7 @@ bool lessOp(A a, B b) static_assert(is_integer || std::is_floating_point_v); static_assert(is_integer || std::is_floating_point_v); - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index acdfca7a7b2..71a6d27e5b4 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -492,7 +492,7 @@ Field Field::restoreFromDump(std::string_view dump_) } show_error(); - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Core/Field.h b/src/Core/Field.h index 8732e8de2a9..c3516b705a6 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -604,7 +604,7 @@ public: case Types::AggregateFunctionState: return f(field.template get()); } - __builtin_unreachable(); + UNREACHABLE(); } String dump() const; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index da0142a5d57..782b890841a 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -36,7 +36,7 @@ String ISerialization::kindToString(Kind kind) case Kind::SPARSE: return "Sparse"; } - __builtin_unreachable(); + UNREACHABLE(); } ISerialization::Kind ISerialization::stringToKind(const String & str) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index db27b4fa975..4b7599affb0 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -357,8 +357,7 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP /// Other codes are unexpected, will throw zkutil::KeeperMultiException::check(res, ops, responses); - chassert(false); - __builtin_unreachable(); + UNREACHABLE(); } bool DatabaseReplicated::looksLikeReplicatedDatabasePath(const ZooKeeperPtr & current_zookeeper, const String & path) diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index b7aa72b253a..e597a5e7fb6 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -124,7 +124,7 @@ namespace DB return "none"; } - __builtin_unreachable(); + UNREACHABLE(); } QueryPipeline RedisDictionarySource::loadAll() diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 1a5c7312cb3..037b65f8e07 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace DB @@ -32,7 +33,7 @@ inline String toString(DataSourceType data_source_type) case DataSourceType::AzureBlobStorage: return "azure_blob_storage"; } - __builtin_unreachable(); + UNREACHABLE(); } struct DataSourceDescription diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 535d01f3a8c..14e8ea6c7e7 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -127,7 +127,7 @@ private: case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; } - __builtin_unreachable(); + UNREACHABLE(); } size_t first_offset = 0; diff --git a/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp b/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp index 1ee87537114..f6915370b10 100644 --- a/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp +++ b/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB @@ -16,7 +17,7 @@ std::string toString(MetadataFromDiskTransactionState state) case MetadataFromDiskTransactionState::PARTIALLY_ROLLED_BACK: return "PARTIALLY_ROLLED_BACK"; } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 401822fc901..423e1ef1d71 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -97,7 +97,7 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const return disks_by_size.top().disk; } } - __builtin_unreachable(); + UNREACHABLE(); } ReservationPtr VolumeJBOD::reserve(UInt64 bytes) @@ -137,7 +137,7 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes) return reservation; } } - __builtin_unreachable(); + UNREACHABLE(); } bool VolumeJBOD::areMergesAvoided() const diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index ef554ecdcdf..f1a97c84fec 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -70,7 +70,7 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule) case FormatSettings::EscapingRule::Raw: return "Raw"; } - __builtin_unreachable(); + UNREACHABLE(); } void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings) @@ -99,7 +99,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca readStringInto(out, buf); break; default: - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 1c666012df1..567a2a9ee98 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -3535,7 +3535,7 @@ namespace } } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index e740380637f..8cbe3b0e532 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2273,7 +2273,7 @@ struct ToNumberMonotonicity } } - __builtin_unreachable(); + UNREACHABLE(); } }; diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index b43d6eb0a4e..ccab6e9feca 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -151,7 +151,7 @@ struct IntegerRoundingComputation } } - __builtin_unreachable(); + UNREACHABLE(); } static ALWAYS_INLINE T compute(T x, T scale) @@ -165,7 +165,7 @@ struct IntegerRoundingComputation return computeImpl(x, scale); } - __builtin_unreachable(); + UNREACHABLE(); } static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral @@ -249,7 +249,7 @@ inline float roundWithMode(float x, RoundingMode mode) case RoundingMode::Trunc: return truncf(x); } - __builtin_unreachable(); + UNREACHABLE(); } inline double roundWithMode(double x, RoundingMode mode) @@ -262,7 +262,7 @@ inline double roundWithMode(double x, RoundingMode mode) case RoundingMode::Trunc: return trunc(x); } - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 61d4f694a61..286ed4a729d 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -194,7 +194,7 @@ struct TimeWindowImpl default: throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } - __builtin_unreachable(); + UNREACHABLE(); } template @@ -397,7 +397,7 @@ struct TimeWindowImpl default: throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } - __builtin_unreachable(); + UNREACHABLE(); } template @@ -546,7 +546,7 @@ struct TimeWindowImpl default: throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 0295dac90c5..0ffe05fbffc 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -384,7 +384,7 @@ bool PointInPolygonWithGrid::contains(CoordinateType x, Coordina return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]); } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 3190bcfed46..ac648b87448 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -510,7 +510,7 @@ private: return execute(from, time_column, num_units, result_type, time_zone, scale); } - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index 8046931ba08..e38cd811187 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -52,7 +52,7 @@ std::string toContentEncodingName(CompressionMethod method) case CompressionMethod::None: return ""; } - __builtin_unreachable(); + UNREACHABLE(); } CompressionMethod chooseHTTPCompressionMethod(const std::string & list) diff --git a/src/IO/DoubleConverter.h b/src/IO/DoubleConverter.h index 75429967390..18cbe4e3a1d 100644 --- a/src/IO/DoubleConverter.h +++ b/src/IO/DoubleConverter.h @@ -5,6 +5,7 @@ #pragma clang diagnostic ignored "-Wdouble-promotion" #endif +#include #include #include diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index e23971f75b3..6d1b95f6813 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -85,7 +85,7 @@ public: case Status::NEEDS_MORE_INPUT: return "NEEDS_MORE_INPUT"; } - __builtin_unreachable(); + UNREACHABLE(); } explicit HadoopSnappyReadBuffer( diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 3d68351110d..3a838d373e0 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -206,7 +206,7 @@ DB::AggregatedDataVariants::Type convertToTwoLevelTypeIfPossible(DB::AggregatedD default: return type; } - __builtin_unreachable(); + UNREACHABLE(); } void initDataVariantsWithSizeHint( diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 0697d67af78..b8aab7a3343 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -688,7 +688,7 @@ struct AggregatedDataVariants : private boost::noncopyable #undef M } - __builtin_unreachable(); + UNREACHABLE(); } /// The size without taking into account the row in which data is written for the calculation of TOTALS. @@ -705,7 +705,7 @@ struct AggregatedDataVariants : private boost::noncopyable #undef M } - __builtin_unreachable(); + UNREACHABLE(); } const char * getMethodName() const @@ -721,7 +721,7 @@ struct AggregatedDataVariants : private boost::noncopyable #undef M } - __builtin_unreachable(); + UNREACHABLE(); } bool isTwoLevel() const @@ -737,7 +737,7 @@ struct AggregatedDataVariants : private boost::noncopyable #undef M } - __builtin_unreachable(); + UNREACHABLE(); } #define APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index cbfc5e50ae4..cf48c5cd976 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -653,7 +653,7 @@ String FileSegment::stateToString(FileSegment::State state) case FileSegment::State::SKIP_CACHE: return "SKIP_CACHE"; } - __builtin_unreachable(); + UNREACHABLE(); } void FileSegment::assertCorrectness() const diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index 37d603b4923..aa44a03a0ce 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -156,7 +156,7 @@ ComparisonGraph::CompareResult ComparisonGraph::pathToCompareResult(Path path, b case Path::GREATER: return inverse ? CompareResult::LESS : CompareResult::GREATER; case Path::GREATER_OR_EQUAL: return inverse ? CompareResult::LESS_OR_EQUAL : CompareResult::GREATER_OR_EQUAL; } - __builtin_unreachable(); + UNREACHABLE(); } std::optional ComparisonGraph::findPath(size_t start, size_t finish) const diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index ea2aa3c6bea..17f0fda71ec 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -23,7 +23,7 @@ static String typeToString(FilesystemCacheLogElement::CacheType type) case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE: return "WRITE_THROUGH_CACHE"; } - __builtin_unreachable(); + UNREACHABLE(); } NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index aa606ce1ec2..7780b335128 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -624,7 +624,7 @@ namespace APPLY_FOR_JOIN_VARIANTS(M) #undef M } - __builtin_unreachable(); + UNREACHABLE(); } } @@ -1852,7 +1852,7 @@ private: throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type) ; } - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 50544ae9039..587fed9b4a6 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -279,7 +279,7 @@ public: #undef M } - __builtin_unreachable(); + UNREACHABLE(); } size_t getTotalByteCountImpl(Type which) const @@ -295,7 +295,7 @@ public: #undef M } - __builtin_unreachable(); + UNREACHABLE(); } size_t getBufferSizeInCells(Type which) const @@ -311,7 +311,7 @@ public: #undef M } - __builtin_unreachable(); + UNREACHABLE(); } }; diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index bdb523de880..a0a82121ba8 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -32,8 +32,7 @@ BlockIO InterpreterTransactionControlQuery::execute() case ASTTransactionControl::SET_SNAPSHOT: return executeSetSnapshot(session_context, tcl.snapshot); } - assert(false); - __builtin_unreachable(); + UNREACHABLE(); } BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context) diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 09af04bc7e5..68076e1fec2 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -37,7 +37,7 @@ void callWithType(TypeIndex type, F && f) DISPATCH(DateTime64) #undef DISPATCH - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index 1cc0f767ecc..f1fdc6c4095 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -42,7 +42,7 @@ size_t SetVariantsTemplate::getTotalRowCount() const #undef M } - __builtin_unreachable(); + UNREACHABLE(); } template @@ -58,7 +58,7 @@ size_t SetVariantsTemplate::getTotalByteCount() const #undef M } - __builtin_unreachable(); + UNREACHABLE(); } template diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index f53c39b192f..2d8193871b0 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -124,7 +124,7 @@ const char * ASTAlterCommand::typeToString(ASTAlterCommand::Type type) case MODIFY_DATABASE_SETTING: return "MODIFY_DATABASE_SETTING"; case MODIFY_COMMENT: return "MODIFY_COMMENT"; } - __builtin_unreachable(); + UNREACHABLE(); } void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index a1b6c9a8de6..ea9ccf5a4f4 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -116,7 +116,7 @@ private: case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION"; } - __builtin_unreachable(); + UNREACHABLE(); } }; diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 892c0ad4718..debcd9e7fd4 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -39,7 +40,7 @@ Token quotedString(const char *& pos, const char * const token_begin, const char continue; } - __builtin_unreachable(); + UNREACHABLE(); } } @@ -414,7 +415,7 @@ APPLY_FOR_TOKENS(M) #undef M } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index bfc4f726edb..931a7587903 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -512,7 +512,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type()); } } - __builtin_unreachable(); + UNREACHABLE(); } DataTypes MsgPackSchemaReader::readRowAndGetDataTypes() diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index a2533ee4c8c..8b160153733 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -37,7 +37,7 @@ std::string IProcessor::statusToName(Status status) return "ExpandPipeline"; } - __builtin_unreachable(); + UNREACHABLE(); } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 57eeb5dba2d..b340073e73d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -639,7 +639,7 @@ static void addMergingFinal( sort_description, max_block_size, merging_params.graphite_params, now); } - __builtin_unreachable(); + UNREACHABLE(); }; pipe.addTransform(get_merging_processor()); @@ -1240,7 +1240,7 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type) return "Skip"; } - __builtin_unreachable(); + UNREACHABLE(); } static const char * readTypeToString(ReadFromMergeTree::ReadType type) @@ -1255,7 +1255,7 @@ static const char * readTypeToString(ReadFromMergeTree::ReadType type) return "InReverseOrder"; } - __builtin_unreachable(); + UNREACHABLE(); } void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index bb918a1a02d..63991655426 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -88,7 +88,7 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr return "after_having_auto threshold " + std::to_string(auto_include_threshold); } - __builtin_unreachable(); + UNREACHABLE(); } void TotalsHavingStep::describeActions(FormatSettings & settings) const diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 786b28dc02e..16abb72cbd4 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -45,7 +45,7 @@ static FillColumnDescription::StepFunction getStepFunction( FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE } - __builtin_unreachable(); + UNREACHABLE(); } static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ea088c45471..174aaf67ec5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -744,7 +744,7 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St return std::current_exception(); } - __builtin_unreachable(); + UNREACHABLE(); } void FinalizingViewsTransform::work() diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index f9b3041ad40..a2a2db75d68 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -390,7 +390,7 @@ namespace case CALL_WITH_STREAM_IO: return "ExecuteQueryWithStreamIO()"; case CALL_MAX: break; } - __builtin_unreachable(); + UNREACHABLE(); } bool isInputStreaming(CallType call_type) @@ -550,7 +550,7 @@ namespace case CALL_WITH_STREAM_IO: return std::make_unique>(); case CALL_MAX: break; } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 93e9c4656b2..d02da92c613 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -878,8 +878,7 @@ try } else { - assert(false); - __builtin_unreachable(); + UNREACHABLE(); } used_output.finalize(); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index d2490858a72..5fa267a964b 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -490,7 +490,7 @@ static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) case ColumnDefaultKind::Ephemeral: return GetColumnsOptions::Ephemeral; } - __builtin_unreachable(); + UNREACHABLE(); } NamesAndTypesList ColumnsDescription::getByNames(const GetColumnsOptions & options, const Names & names) const diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp index ee721c5b920..f06ff909799 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp @@ -90,7 +90,7 @@ String BackgroundJobsAssignee::toString(Type type) case Type::Moving: return "Moving"; } - __builtin_unreachable(); + UNREACHABLE(); } void BackgroundJobsAssignee::start() diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 24b3a4a60b9..d7c33c8663b 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -2581,7 +2581,7 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri return "true"; } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4a7d2b2dd63..e65614b0fc0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -928,7 +928,7 @@ String MergeTreeData::MergingParams::getModeName() const case VersionedCollapsing: return "VersionedCollapsing"; } - __builtin_unreachable(); + UNREACHABLE(); } Int64 MergeTreeData::getMaxBlockNumber() const diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 95faef6aac7..7b99819340e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -244,7 +244,7 @@ Block MergeTreeDataWriter::mergeBlock( block, 1, sort_description, block_size + 1, merging_params.graphite_params, time(nullptr)); } - __builtin_unreachable(); + UNREACHABLE(); }; auto merging_algorithm = get_merging_algorithm(); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index a32c6d70665..730f9a05814 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -125,7 +125,7 @@ PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(Pa } } - __builtin_unreachable(); + UNREACHABLE(); } PartitionReadResponse ParallelReplicasReadingCoordinator::handleRequest(PartitionReadRequest request) diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index c4cb470923b..d5f35ea1b3c 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -609,7 +609,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st } } - __builtin_unreachable(); + UNREACHABLE(); } void PartMovesBetweenShardsOrchestrator::removePins(const Entry & entry, zkutil::ZooKeeperPtr zk) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 2392ea2eee8..e2b23d75746 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -123,7 +123,7 @@ bool ReplicatedMergeTreeRestartingThread::runImpl() } else { - __builtin_unreachable(); + UNREACHABLE(); } try diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d7af9790a85..4fec8861765 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1422,7 +1422,7 @@ size_t StorageDistributed::getRandomShardIndex(const Cluster::ShardsInfo & shard res -= shards[i].weight; } - __builtin_unreachable(); + UNREACHABLE(); } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 7e0194ffd30..d34066de769 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -296,7 +296,7 @@ namespace CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - __builtin_unreachable(); + UNREACHABLE(); } class AddingAggregatedChunkInfoTransform : public ISimpleTransform @@ -895,7 +895,7 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - __builtin_unreachable(); + UNREACHABLE(); } UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) @@ -923,7 +923,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - __builtin_unreachable(); + UNREACHABLE(); } void StorageWindowView::addFireSignal(std::set & signals) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index a4810701dee..772f48ad088 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -352,3 +352,8 @@ find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }' find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | grep -vP $EXCLUDE_DIRS | xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead" + +# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | + grep -vP $EXCLUDE_DIRS | + xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead" From b2917464a90eb0b768a97a3148a371a5812be895 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Oct 2022 21:57:36 +0200 Subject: [PATCH 203/266] tests: fix 02458_insert_select_progress_tcp flakiness CI: https://s3.amazonaws.com/clickhouse-test-reports/42078/318042c1178f08f5c405a63a430089784fa19feb/stateless_tests_flaky_check__asan_.html Signed-off-by: Azat Khuzhin --- .../02458_insert_select_progress_tcp.python | 35 ++++++++++++++----- ...02458_insert_select_progress_tcp.reference | 4 +-- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index faa7c8779dd..c638b3d2040 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -181,6 +181,21 @@ class Progress(): def __str__(self): return json.dumps(self.__dict__) + def __add__(self, b): + self.read_rows += b.read_rows + self.read_bytes += b.read_bytes + self.total_rows_to_read += b.total_rows_to_read + self.written_rows += b.written_rows + self.written_bytes += b.written_bytes + return self + + def readPacket(self, s): + self.read_rows += readVarUInt(s) + self.read_bytes += readVarUInt(s) + self.total_rows_to_read += readVarUInt(s) + self.written_rows += readVarUInt(s) + self.written_bytes += readVarUInt(s) + def __bool__(self): return ( self.read_rows > 0 or @@ -189,6 +204,7 @@ class Progress(): self.written_rows > 0 or self.written_bytes > 0) + def readProgress(s): packet_type = readVarUInt(s) if packet_type == 2: # Exception @@ -200,12 +216,7 @@ def readProgress(s): assertPacket(packet_type, 3) # Progress progress = Progress() - progress.read_rows += readVarUInt(s) - progress.read_bytes += readVarUInt(s) - progress.total_rows_to_read += readVarUInt(s) - - progress.written_rows += readVarUInt(s) - progress.written_bytes += readVarUInt(s) + progress.readPacket(s) return progress def readException(s): @@ -230,13 +241,21 @@ def main(): # external tables sendEmptyBlock(s) + summary_progress = Progress() + non_empty_progress_packets = 0 while True: progress = readProgress(s) if progress is None: break - # Print only non empty progress packets, eventually we should have 3 of them + summary_progress += progress if progress: - print(progress) + non_empty_progress_packets += 1 + + print(summary_progress) + # Print only non empty progress packets, eventually we should have at least 3 of them + # - 2 for each INSERT block (one of them can be merged with read block, heance 3 or for) + # - 1 or 2 for each SELECT block + assert non_empty_progress_packets in (3, 4), f"{non_empty_progress_packets=:}" s.close() diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference index 81f61f0d08f..2ec5c9652b5 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.reference @@ -1,3 +1 @@ -{"read_rows": 1, "read_bytes": 8, "total_rows_to_read": 2, "written_rows": 0, "written_bytes": 0} -{"read_rows": 1, "read_bytes": 8, "total_rows_to_read": 0, "written_rows": 1, "written_bytes": 4} -{"read_rows": 0, "read_bytes": 0, "total_rows_to_read": 0, "written_rows": 1, "written_bytes": 4} +{"read_rows": 2, "read_bytes": 16, "total_rows_to_read": 2, "written_rows": 2, "written_bytes": 8} From b9159db82f9e877d6a74b87e1776b5c78c669db9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Oct 2022 00:16:36 +0200 Subject: [PATCH 204/266] Remove temporary live views --- docs/en/operations/settings/settings.md | 6 - .../sql-reference/statements/create/view.md | 31 ---- docs/ru/operations/settings/settings.md | 6 - .../sql-reference/statements/create/view.md | 31 ---- .../sql-reference/statements/create/view.md | 31 ---- src/Core/Defines.h | 1 - src/Core/Settings.h | 4 +- src/Interpreters/DatabaseCatalog.cpp | 8 - src/Parsers/ASTCreateQuery.cpp | 12 +- src/Parsers/ASTCreateQuery.h | 1 - src/Parsers/ParserCreateQuery.cpp | 18 -- src/Storages/LiveView/StorageLiveView.cpp | 15 -- src/Storages/LiveView/StorageLiveView.h | 16 -- .../LiveView/TemporaryLiveViewCleaner.cpp | 165 ------------------ .../LiveView/TemporaryLiveViewCleaner.h | 57 ------ .../00961_temporary_live_view_watch.reference | 3 - .../00961_temporary_live_view_watch.sql | 22 --- ..._live_view_periodic_refresh_and_timeout.py | 15 +- .../00962_temporary_live_view_watch_live.py | 52 ------ ...2_temporary_live_view_watch_live.reference | 0 ...ary_live_view_watch_live_timeout.reference | 0 .../00964_live_view_watch_events_heartbeat.py | 2 +- .../00965_live_view_watch_heartbeat.py | 2 +- ...00980_create_temporary_live_view.reference | 4 - .../00980_create_temporary_live_view.sql | 18 -- ...ry_live_view_watch_events_heartbeat.python | 83 --------- ...live_view_watch_events_heartbeat.reference | 0 ...0991_temporary_live_view_watch_live.python | 81 --------- ...1_temporary_live_view_watch_live.reference | 7 - 29 files changed, 7 insertions(+), 684 deletions(-) delete mode 100644 src/Storages/LiveView/TemporaryLiveViewCleaner.cpp delete mode 100644 src/Storages/LiveView/TemporaryLiveViewCleaner.h delete mode 100644 tests/queries/0_stateless/00961_temporary_live_view_watch.reference delete mode 100644 tests/queries/0_stateless/00961_temporary_live_view_watch.sql delete mode 100755 tests/queries/0_stateless/00962_temporary_live_view_watch_live.py delete mode 100644 tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference delete mode 100644 tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference delete mode 100644 tests/queries/0_stateless/00980_create_temporary_live_view.reference delete mode 100644 tests/queries/0_stateless/00980_create_temporary_live_view.sql delete mode 100644 tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python delete mode 100644 tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference delete mode 100644 tests/queries/0_stateless/00991_temporary_live_view_watch_live.python delete mode 100644 tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index dc74b607289..deef82ee699 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2629,12 +2629,6 @@ Sets the maximum number of inserted blocks after which mergeable blocks are drop Default value: `64`. -## temporary_live_view_timeout {#temporary-live-view-timeout} - -Sets the interval in seconds after which [live view](../../sql-reference/statements/create/view.md#live-view) with timeout is deleted. - -Default value: `5`. - ## periodic_live_view_refresh {#periodic-live-view-refresh} Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md#live-view) is forced to refresh. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 46dd7e6fdd7..5833c43f55d 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -166,23 +166,6 @@ SELECT * FROM [db.]live_view WHERE ... You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement. -### WITH TIMEOUT Clause - -When a live view is created with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... -``` - -If the timeout value is not specified then the value specified by the [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout) setting is used. - -**Example:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; -``` - ### WITH REFRESH Clause When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger. @@ -212,20 +195,6 @@ WATCH lv โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` -You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... -``` - -**Example:** - -```sql -CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); -``` - -After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries. - ```sql WATCH lv ``` diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 3d765b03d58..05cfbcb4a87 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3258,12 +3258,6 @@ SELECT * FROM test2; ะ—ะฝะฐั‡ะตะฝะธะต ะฟะพ ัƒะผะพะปั‡ะฐะฝะธัŽ: `64`. -## temporary_live_view_timeout {#temporary-live-view-timeout} - -ะ—ะฐะดะฐะตั‚ ะฒั€ะตะผั ะฒ ัะตะบัƒะฝะดะฐั…, ะฟะพัะปะต ะบะพั‚ะพั€ะพะณะพ [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) ัƒะดะฐะปัะตั‚ัั. - -ะ—ะฝะฐั‡ะตะฝะธะต ะฟะพ ัƒะผะพะปั‡ะฐะฝะธัŽ: `5`. - ## periodic_live_view_refresh {#periodic-live-view-refresh} ะ—ะฐะดะฐะตั‚ ะฒั€ะตะผั ะฒ ัะตะบัƒะฝะดะฐั…, ะฟะพ ะธัั‚ะตั‡ะตะฝะธะธ ะบะพั‚ะพั€ะพะณะพ [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) ั ัƒัั‚ะฐะฝะพะฒะปะตะฝะฝั‹ะผ ะฐะฒั‚ะพะพะฑะฝะพะฒะปะตะฝะธะตะผ ะพะฑะฝะพะฒะปัะตั‚ัั. diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 573db8938b2..6cbd4c6a30c 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -156,23 +156,6 @@ SELECT * FROM [db.]live_view WHERE ... ะงั‚ะพะฑั‹ ะฟั€ะธะฝัƒะดะธั‚ะตะปัŒะฝะพ ะพะฑะฝะพะฒะธั‚ัŒ LIVE-ะฟั€ะตะดัั‚ะฐะฒะปะตะฝะธะต, ะธัะฟะพะปัŒะทัƒะนั‚ะต ะทะฐะฟั€ะพั `ALTER LIVE VIEW [db.]table_name REFRESH`. -### ะกะตะบั†ะธั WITH TIMEOUT {#live-view-with-timeout} - -LIVE-ะฟั€ะตะดัั‚ะฐะฒะปะตะฝะธะต, ัะพะทะดะฐะฝะฝะพะต ั ะฟะฐั€ะฐะผะตั‚ั€ะพะผ `WITH TIMEOUT`, ะฑัƒะดะตั‚ ะฐะฒั‚ะพะผะฐั‚ะธั‡ะตัะบะธ ัƒะดะฐะปะตะฝะพ ั‡ะตั€ะตะท ะพะฟั€ะตะดะตะปะตะฝะฝะพะต ะบะพะปะธั‡ะตัั‚ะฒะพ ัะตะบัƒะฝะด ั ะผะพะผะตะฝั‚ะฐ ะฟั€ะตะดั‹ะดัƒั‰ะตะณะพ ะทะฐะฟั€ะพัะฐ [WATCH](../../../sql-reference/statements/watch.md), ะฟั€ะธะผะตะฝะตะฝะฝะพะณะพ ะบ ะดะฐะฝะฝะพะผัƒ LIVE-ะฟั€ะตะดัั‚ะฐะฒะปะตะฝะธัŽ. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... -``` - -ะ•ัะปะธ ะฒั€ะตะผะตะฝะฝะพะน ะฟั€ะพะผะตะถัƒั‚ะพะบ ะฝะต ัƒะบะฐะทะฐะฝ, ะธัะฟะพะปัŒะทัƒะตั‚ัั ะทะฝะฐั‡ะตะฝะธะต ะฝะฐัั‚ั€ะพะนะบะธ [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout). - -**ะŸั€ะธะผะตั€:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; -``` - ### ะกะตะบั†ะธั WITH REFRESH {#live-view-with-refresh} LIVE-ะฟั€ะตะดัั‚ะฐะฒะปะตะฝะธะต, ัะพะทะดะฐะฝะฝะพะต ั ะฟะฐั€ะฐะผะตั‚ั€ะพะผ `WITH REFRESH`, ะฑัƒะดะตั‚ ะฐะฒั‚ะพะผะฐั‚ะธั‡ะตัะบะธ ะพะฑะฝะพะฒะปัั‚ัŒัั ั‡ะตั€ะตะท ัƒะบะฐะทะฐะฝะฝั‹ะต ะฟั€ะพะผะตะถัƒั‚ะบะธ ะฒั€ะตะผะตะฝะธ, ะฝะฐั‡ะธะฝะฐั ั ะผะพะผะตะฝั‚ะฐ ะฟะพัะปะตะดะฝะตะณะพ ะพะฑะฝะพะฒะปะตะฝะธั. @@ -202,20 +185,6 @@ WATCH lv; โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` -ะŸะฐั€ะฐะผะตั‚ั€ั‹ `WITH TIMEOUT` ะธ `WITH REFRESH` ะผะพะถะฝะพ ัะพั‡ะตั‚ะฐั‚ัŒ ั ะฟะพะผะพั‰ัŒัŽ `AND`. - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... -``` - -**ะŸั€ะธะผะตั€:** - -```sql -CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); -``` - -ะŸะพ ะธัั‚ะตั‡ะตะฝะธะธ 15 ัะตะบัƒะฝะด ะฟั€ะตะดัั‚ะฐะฒะปะตะฝะธะต ะฑัƒะดะตั‚ ะฐะฒั‚ะพะผะฐั‚ะธั‡ะตัะบะธ ัƒะดะฐะปะตะฝะพ, ะตัะปะธ ะฝะตั‚ ะฐะบั‚ะธะฒะฝะพะณะพ ะทะฐะฟั€ะพัะฐ `WATCH`. - ```sql WATCH lv; ``` diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 12ffe35dde0..be2f8d6ded3 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -164,23 +164,6 @@ SELECT * FROM [db.]live_view WHERE ... ๆ‚จๅฏไปฅไฝฟ็”จ`ALTER LIVE VIEW [db.]table_name REFRESH`่ฏญๆณ•. -### WITH TIMEOUTๆกไปถ {#live-view-with-timeout} - -ๅฝ“ไฝฟ็”จ`WITH TIMEOUT`ๅญๅฅๅˆ›ๅปบๅฎžๆ—ถ่ง†ๅ›พๆ—ถ๏ผŒ[WATCH](../../../sql-reference/statements/watch.md)่ง‚ๅฏŸๅฎžๆ—ถ่ง†ๅ›พ็š„ๆŸฅ่ฏขใ€‚ - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... -``` - -ๅฆ‚ๆžœๆœชๆŒ‡ๅฎš่ถ…ๆ—ถๅ€ผ๏ผŒๅˆ™็”ฑๆŒ‡ๅฎš็š„ๅ€ผ[temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout)ๅ†ณๅฎš. - -**็คบไพ‹:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; -``` - ### WITH REFRESHๆกไปถ {#live-view-with-refresh} ๅฝ“ไฝฟ็”จ`WITH REFRESH`ๅญๅฅๅˆ›ๅปบๅฎžๆ—ถ่ง†ๅ›พๆ—ถ๏ผŒๅฎƒๅฐ†ๅœจ่‡ชไธŠๆฌกๅˆทๆ–ฐๆˆ–่งฆๅ‘ๅŽ็ป่ฟ‡ๆŒ‡ๅฎš็š„็ง’ๆ•ฐๅŽ่‡ชๅŠจๅˆทๆ–ฐใ€‚ @@ -210,20 +193,6 @@ WATCH lv โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` -ๆ‚จๅฏไปฅไฝฟ็”จ`AND`ๅญๅฅ็ป„ๅˆ`WITH TIMEOUT`ๅ’Œ`WITH REFRESH`ๅญๅฅใ€‚ - -```sql -CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... -``` - -**็คบไพ‹:** - -```sql -CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); -``` - -15 ็ง’ๅŽ๏ผŒๅฆ‚ๆžœๆฒกๆœ‰ๆดปๅŠจ็š„`WATCH`ๆŸฅ่ฏข๏ผŒๅฎžๆ—ถ่ง†ๅ›พๅฐ†่‡ชๅŠจๅˆ ้™คใ€‚ - ```sql WATCH lv ``` diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 9665a20a397..80efe4f77bf 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -31,7 +31,6 @@ */ #define DEFAULT_MERGE_BLOCK_SIZE 8192 -#define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5 #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 26b6fd56ade..f429b7560d5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -501,7 +501,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ M(Bool, allow_settings_after_format_in_insert, false, "Allow SETTINGS after FORMAT, but note, that this is not always safe (note: this is a compatibility setting).", 0) \ - M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \ M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ M(Bool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ @@ -691,7 +690,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) MAKE_OBSOLETE(M, UInt64, background_message_broker_schedule_pool_size, 16) \ MAKE_OBSOLETE(M, UInt64, background_distributed_schedule_pool_size, 16) \ MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \ - MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ + MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ + MAKE_OBSOLETE(M, Seconds, temporary_live_view_timeout, 1) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 67fb256b1c9..73189979d85 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -171,16 +170,10 @@ void DatabaseCatalog::loadDatabases() std::lock_guard lock{tables_marked_dropped_mutex}; if (!tables_marked_dropped.empty()) (*drop_task)->schedule(); - - /// Another background thread which drops temporary LiveViews. - /// We should start it after loadMarkedAsDroppedTables() to avoid race condition. - TemporaryLiveViewCleaner::instance().startup(); } void DatabaseCatalog::shutdownImpl() { - TemporaryLiveViewCleaner::shutdown(); - if (cleanup_task) (*cleanup_task)->deactivate(); @@ -657,7 +650,6 @@ std::unique_ptr DatabaseCatalog::database_catalog; DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) : WithMutableContext(global_context_), log(&Poco::Logger::get("DatabaseCatalog")) { - TemporaryLiveViewCleaner::init(global_context_); } DatabaseCatalog & DatabaseCatalog::init(ContextMutablePtr global_context_) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index a277960643b..f8853d21178 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -297,18 +297,10 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") << quoteString(*attach_from_path); - if (live_view_timeout) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH TIMEOUT " << (settings.hilite ? hilite_none : "") - << *live_view_timeout; - if (live_view_periodic_refresh) { - if (live_view_timeout) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " AND" << (settings.hilite ? hilite_none : ""); - else - settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : ""); - - settings.ostr << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "") + settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH" << (settings.hilite ? hilite_none : "") + << (settings.hilite ? hilite_keyword : "") << " PERIODIC REFRESH " << (settings.hilite ? hilite_none : "") << *live_view_periodic_refresh; } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index f3729b1523f..de0f187f0e2 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -93,7 +93,6 @@ public: ASTExpressionList * dictionary_attributes_list = nullptr; /// attributes of ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.) - std::optional live_view_timeout; /// For CREATE LIVE VIEW ... WITH TIMEOUT ... std::optional live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ... bool is_watermark_strictly_ascending{false}; /// STRICTLY ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index fc90f9ce3ed..77540141b53 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -708,7 +708,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr as_database; ASTPtr as_table; ASTPtr select; - ASTPtr live_view_timeout; ASTPtr live_view_periodic_refresh; String cluster_str; @@ -740,20 +739,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (ParserKeyword{"WITH"}.ignore(pos, expected)) { - if (ParserKeyword{"TIMEOUT"}.ignore(pos, expected)) - { - if (!ParserNumber{}.parse(pos, live_view_timeout, expected)) - { - live_view_timeout = std::make_shared(static_cast(DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC)); - } - - /// Optional - AND - if (ParserKeyword{"AND"}.ignore(pos, expected)) - with_and = true; - - with_timeout = true; - } - if (ParserKeyword{"REFRESH"}.ignore(pos, expected) || ParserKeyword{"PERIODIC REFRESH"}.ignore(pos, expected)) { if (!ParserNumber{}.parse(pos, live_view_periodic_refresh, expected)) @@ -828,9 +813,6 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); - if (live_view_timeout) - query->live_view_timeout.emplace(live_view_timeout->as().value.safeGet()); - if (live_view_periodic_refresh) query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as().value.safeGet()); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 4f52267a7fe..e3d19d0a433 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,25 +21,19 @@ limitations under the License. */ #include #include #include -#include #include #include #include #include #include -#include "QueryPipeline/printPipeline.h" #include #include #include #include #include -#include #include -#include -#include -#include #include #include #include @@ -312,12 +306,6 @@ StorageLiveView::StorageLiveView( DatabaseCatalog::instance().addDependency(select_table_id, table_id_); - if (query.live_view_timeout) - { - is_temporary = true; - temporary_live_view_timeout = Seconds {*query.live_view_timeout}; - } - if (query.live_view_periodic_refresh) { is_periodically_refreshed = true; @@ -456,9 +444,6 @@ void StorageLiveView::checkTableCanBeDropped() const void StorageLiveView::startup() { - if (is_temporary) - TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast(shared_from_this())); - if (is_periodically_refreshed) periodic_refresh_task->activate(); } diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 4d140f6a82a..c6a0379e2ab 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -86,19 +86,6 @@ public: NamesAndTypesList getVirtuals() const override; - bool isTemporary() const { return is_temporary; } - bool isPeriodicallyRefreshed() const { return is_periodically_refreshed; } - - Seconds getTimeout() const { return temporary_live_view_timeout; } - Seconds getPeriodicRefresh() const { return periodic_live_view_refresh; } - - /// Check if we have any readers - /// must be called with mutex locked - bool hasUsers() - { - return blocks_ptr.use_count() > 1; - } - /// Check we have any active readers /// must be called with mutex locked bool hasActiveUsers() @@ -200,10 +187,7 @@ private: Poco::Logger * log; - bool is_temporary = false; bool is_periodically_refreshed = false; - - Seconds temporary_live_view_timeout; Seconds periodic_live_view_refresh; /// Mutex to protect access to sample block and inner_blocks_query diff --git a/src/Storages/LiveView/TemporaryLiveViewCleaner.cpp b/src/Storages/LiveView/TemporaryLiveViewCleaner.cpp deleted file mode 100644 index 12af472247d..00000000000 --- a/src/Storages/LiveView/TemporaryLiveViewCleaner.cpp +++ /dev/null @@ -1,165 +0,0 @@ -#include - -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - -namespace -{ - void executeDropQuery(const StorageID & storage_id, ContextMutablePtr context) - { - if (!DatabaseCatalog::instance().isTableExist(storage_id, context)) - return; - try - { - /// We create and execute `drop` query for this table - auto drop_query = std::make_shared(); - drop_query->setDatabase(storage_id.database_name); - drop_query->setTable(storage_id.table_name); - drop_query->kind = ASTDropQuery::Kind::Drop; - ASTPtr ast_drop_query = drop_query; - InterpreterDropQuery drop_interpreter(ast_drop_query, context); - drop_interpreter.execute(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } -} - - -std::unique_ptr TemporaryLiveViewCleaner::the_instance; - - -void TemporaryLiveViewCleaner::init(ContextMutablePtr global_context_) -{ - if (the_instance) - throw Exception("TemporaryLiveViewCleaner already initialized", ErrorCodes::LOGICAL_ERROR); - the_instance.reset(new TemporaryLiveViewCleaner(global_context_)); -} - -void TemporaryLiveViewCleaner::startup() -{ - background_thread_can_start = true; - - std::lock_guard lock{mutex}; - if (!views.empty()) - startBackgroundThread(); -} - -void TemporaryLiveViewCleaner::shutdown() -{ - the_instance.reset(); -} - -TemporaryLiveViewCleaner::TemporaryLiveViewCleaner(ContextMutablePtr global_context_) : WithMutableContext(global_context_) -{ -} - -TemporaryLiveViewCleaner::~TemporaryLiveViewCleaner() -{ - stopBackgroundThread(); -} - - -void TemporaryLiveViewCleaner::addView(const std::shared_ptr & view) -{ - if (!view->isTemporary() || background_thread_should_exit) - return; - - auto current_time = std::chrono::system_clock::now(); - auto time_of_next_check = current_time + view->getTimeout(); - - /// Keep the vector `views` sorted by time of next check. - StorageAndTimeOfCheck storage_and_time_of_check{view, time_of_next_check}; - std::lock_guard lock{mutex}; - views.insert(std::upper_bound(views.begin(), views.end(), storage_and_time_of_check), storage_and_time_of_check); - - if (background_thread_can_start) - { - startBackgroundThread(); - background_thread_wake_up.notify_one(); - } -} - - -void TemporaryLiveViewCleaner::backgroundThreadFunc() -{ - std::unique_lock lock{mutex}; - while (!background_thread_should_exit) - { - if (views.empty()) - background_thread_wake_up.wait(lock); - else - background_thread_wake_up.wait_until(lock, views.front().time_of_check); - - if (background_thread_should_exit) - break; - - auto current_time = std::chrono::system_clock::now(); - std::vector storages_to_drop; - - auto it = views.begin(); - while (it != views.end()) - { - std::shared_ptr storage = it->storage.lock(); - auto & time_of_check = it->time_of_check; - if (!storage) - { - /// Storage has been already removed. - it = views.erase(it); - continue; - } - - if (current_time < time_of_check) - break; /// It's not the time to check it yet. - - auto storage_id = storage->getStorageID(); - if (!storage->hasUsers() && DatabaseCatalog::instance().getDependencies(storage_id).empty()) - { - /// No users and no dependencies so we can remove the storage. - storages_to_drop.emplace_back(storage_id); - it = views.erase(it); - continue; - } - - /// Calculate time of the next check. - time_of_check = current_time + storage->getTimeout(); - - ++it; - } - - lock.unlock(); - for (const auto & storage_id : storages_to_drop) - executeDropQuery(storage_id, getContext()); - lock.lock(); - } -} - - -void TemporaryLiveViewCleaner::startBackgroundThread() -{ - if (!background_thread.joinable() && background_thread_can_start && !background_thread_should_exit) - background_thread = ThreadFromGlobalPool{&TemporaryLiveViewCleaner::backgroundThreadFunc, this}; -} - -void TemporaryLiveViewCleaner::stopBackgroundThread() -{ - background_thread_should_exit = true; - background_thread_wake_up.notify_one(); - if (background_thread.joinable()) - background_thread.join(); -} - -} diff --git a/src/Storages/LiveView/TemporaryLiveViewCleaner.h b/src/Storages/LiveView/TemporaryLiveViewCleaner.h deleted file mode 100644 index 9cc5933eb89..00000000000 --- a/src/Storages/LiveView/TemporaryLiveViewCleaner.h +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include -#include - -#include - - -namespace DB -{ - -class StorageLiveView; -struct StorageID; - -/// This class removes temporary live views in the background thread when it's possible. -/// There should only a single instance of this class. -class TemporaryLiveViewCleaner : WithMutableContext -{ -public: - static TemporaryLiveViewCleaner & instance() { return *the_instance; } - - /// Drops a specified live view after a while if it's temporary. - void addView(const std::shared_ptr & view); - - /// Should be called once. - static void init(ContextMutablePtr global_context_); - static void shutdown(); - - void startup(); - -private: - friend std::unique_ptr::deleter_type; - - explicit TemporaryLiveViewCleaner(ContextMutablePtr global_context_); - ~TemporaryLiveViewCleaner(); - - void backgroundThreadFunc(); - void startBackgroundThread(); - void stopBackgroundThread(); - - struct StorageAndTimeOfCheck - { - std::weak_ptr storage; - std::chrono::system_clock::time_point time_of_check; - bool operator <(const StorageAndTimeOfCheck & other) const { return time_of_check < other.time_of_check; } - }; - - static std::unique_ptr the_instance; - std::mutex mutex; - std::vector views; - ThreadFromGlobalPool background_thread; - std::atomic background_thread_can_start = false; - std::atomic background_thread_should_exit = false; - std::condition_variable background_thread_wake_up; -}; - -} diff --git a/tests/queries/0_stateless/00961_temporary_live_view_watch.reference b/tests/queries/0_stateless/00961_temporary_live_view_watch.reference deleted file mode 100644 index 6fbbedf1b21..00000000000 --- a/tests/queries/0_stateless/00961_temporary_live_view_watch.reference +++ /dev/null @@ -1,3 +0,0 @@ -0 1 -6 2 -21 3 diff --git a/tests/queries/0_stateless/00961_temporary_live_view_watch.sql b/tests/queries/0_stateless/00961_temporary_live_view_watch.sql deleted file mode 100644 index 8bf6fa5e07b..00000000000 --- a/tests/queries/0_stateless/00961_temporary_live_view_watch.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv WITH TIMEOUT AS SELECT sum(a) FROM mt; - -WATCH lv LIMIT 0; - -INSERT INTO mt VALUES (1),(2),(3); - -WATCH lv LIMIT 0; - -INSERT INTO mt VALUES (4),(5),(6); - -WATCH lv LIMIT 0; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py index 3bc649e92dc..983b330e24a 100755 --- a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py +++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py @@ -29,7 +29,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("DROP TABLE IF EXISTS test.lv") client1.expect(prompt) client1.send( - "CREATE LIVE VIEW test.lv WITH TIMEOUT 60 AND REFRESH 1" + "CREATE LIVE VIEW test.lv WITH REFRESH 1" " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'" ) client1.expect(prompt) @@ -43,16 +43,3 @@ with client(name="client1>", log=log) as client1, client( if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) - # poll until live view table is dropped - start_time = time.time() - while True: - client1.send("SELECT * FROM test.lv FORMAT JSONEachRow") - client1.expect(prompt) - if "Table test.lv doesn't exist" in client1.before: - break - if time.time() - start_time > 90: - break - # check table is dropped - client1.send("DROP TABLE test.lv") - client1.expect("Table test.lv doesn't exist") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py b/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py deleted file mode 100755 index 0358c28bf91..00000000000 --- a/tests/queries/0_stateless/00962_temporary_live_view_watch_live.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - client1.send("WATCH test.lv") - client1.expect("_version") - client1.expect(r"0.*1" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(r"6.*2" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") - client1.expect(r"21.*3" + end_of_block) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference b/tests/queries/0_stateless/00962_temporary_live_view_watch_live.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference b/tests/queries/0_stateless/00963_temporary_live_view_watch_live_timeout.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py b/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py index bafb283e487..c8902203a3d 100755 --- a/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py +++ b/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py @@ -33,7 +33,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt") + client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") client1.expect(prompt) client1.send("WATCH test.lv EVENTS FORMAT CSV") client1.expect("Progress: 1.00 rows.*\)") diff --git a/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py b/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py index 3cb1220bb49..b499f673cc0 100755 --- a/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py +++ b/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py @@ -33,7 +33,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt") + client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") client1.expect(prompt) client1.send("WATCH test.lv") client1.expect("_version") diff --git a/tests/queries/0_stateless/00980_create_temporary_live_view.reference b/tests/queries/0_stateless/00980_create_temporary_live_view.reference deleted file mode 100644 index 49d86fc2fbf..00000000000 --- a/tests/queries/0_stateless/00980_create_temporary_live_view.reference +++ /dev/null @@ -1,4 +0,0 @@ -temporary_live_view_timeout 5 -live_view_heartbeat_interval 15 -lv -0 diff --git a/tests/queries/0_stateless/00980_create_temporary_live_view.sql b/tests/queries/0_stateless/00980_create_temporary_live_view.sql deleted file mode 100644 index e01a6d9643e..00000000000 --- a/tests/queries/0_stateless/00980_create_temporary_live_view.sql +++ /dev/null @@ -1,18 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -SELECT name, value from system.settings WHERE name = 'temporary_live_view_timeout'; -SELECT name, value from system.settings WHERE name = 'live_view_heartbeat_interval'; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv WITH TIMEOUT 1 AS SELECT sum(a) FROM mt; - -SHOW TABLES WHERE database=currentDatabase() and name LIKE 'lv'; -SELECT sleep(2); -SHOW TABLES WHERE database=currentDatabase() and name LIKE 'lv'; - -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python b/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python deleted file mode 100644 index 8ddb1a1ea81..00000000000 --- a/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.python +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 - -import subprocess -import threading -import queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query, '--live_view_heartbeat_interval=1', '--progress'] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - # print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - # print(line) - assert (line.endswith('0\t1')) - assert ('Progress: 0.00 rows' in line) - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - assert (line.endswith('6\t2')) - assert ('Progress: 1.00 rows' in line) - - # send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - # line = q.get() - # print(line) - # assert (line.endswith('6\t2')) - # assert ('Progress: 1.00 rows' in line) - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - # print(line) - # assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference b/tests/queries/0_stateless/00991_temporary_live_view_watch_events_heartbeat.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python b/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python deleted file mode 100644 index a417cdf2937..00000000000 --- a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.python +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 - -import subprocess -import threading -import queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv WITH TIMEOUT AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - line = q.get() - print(line) - assert (line == '21\t3') - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - print(line) - assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference b/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference deleted file mode 100644 index 1e94cdade41..00000000000 --- a/tests/queries/0_stateless/00991_temporary_live_view_watch_live.reference +++ /dev/null @@ -1,7 +0,0 @@ -0 1 -0 1 -6 2 -6 2 -21 3 -21 3 -None From 169c505603e2fa5c23f6cdbdee5551f331d129e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 1 Oct 2022 20:46:50 +0200 Subject: [PATCH 205/266] Relax "too many parts" thresholds --- src/Interpreters/AsynchronousMetrics.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 53 ++++++++++++++-------- src/Storages/MergeTree/MergeTreeData.h | 8 +++- src/Storages/MergeTree/MergeTreeSettings.h | 1 + 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 23845e0424e..338ae1bbbfd 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -1420,7 +1420,7 @@ void AsynchronousMetrics::update(TimePoint update_time) { const auto & settings = getContext()->getSettingsRef(); - calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountForPartition()); + calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountAndSizeForPartition().first); total_number_of_bytes += table_merge_tree->totalBytes(settings).value(); total_number_of_rows += table_merge_tree->totalRows(settings).value(); total_number_of_parts += table_merge_tree->getPartsCount(); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4a7d2b2dd63..58b38d8d3ee 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3442,42 +3442,49 @@ size_t MergeTreeData::getPartsCount() const } -size_t MergeTreeData::getMaxPartsCountForPartitionWithState(DataPartState state) const +std::pair MergeTreeData::getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const { auto lock = lockParts(); - size_t res = 0; - size_t cur_count = 0; + size_t cur_parts_count = 0; + size_t cur_parts_size = 0; + size_t max_parts_count = 0; + size_t argmax_parts_size = 0; + const String * cur_partition_id = nullptr; for (const auto & part : getDataPartsStateRange(state)) { - if (cur_partition_id && part->info.partition_id == *cur_partition_id) - { - ++cur_count; - } - else + if (!cur_partition_id || part->info.partition_id != *cur_partition_id) { cur_partition_id = &part->info.partition_id; - cur_count = 1; + cur_parts_count = 0; + cur_parts_size = 0; } - res = std::max(res, cur_count); + ++cur_parts_count; + cur_parts_size += part->getBytesOnDisk(); + + if (cur_parts_count > max_parts_count) + { + max_parts_count = cur_parts_count; + argmax_parts_size = cur_parts_size; + } } - return res; + return {max_parts_count, argmax_parts_size}; } -size_t MergeTreeData::getMaxPartsCountForPartition() const +std::pair MergeTreeData::getMaxPartsCountAndSizeForPartition() const { - return getMaxPartsCountForPartitionWithState(DataPartState::Active); + return getMaxPartsCountAndSizeForPartitionWithState(DataPartState::Active); } size_t MergeTreeData::getMaxInactivePartsCountForPartition() const { - return getMaxPartsCountForPartitionWithState(DataPartState::Outdated); + return getMaxPartsCountAndSizeForPartitionWithState(DataPartState::Outdated).first; } @@ -3507,7 +3514,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); } - size_t parts_count_in_partition = getMaxPartsCountForPartition(); + auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition(); ssize_t k_inactive = -1; if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) { @@ -3526,13 +3533,17 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q auto parts_to_delay_insert = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; auto parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; - if (parts_count_in_partition >= parts_to_throw_insert) + size_t average_part_size = size_of_partition / parts_count_in_partition; + bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts + && average_part_size > settings->max_avg_part_size_for_too_many_parts; + + if (parts_count_in_partition >= parts_to_throw_insert && !parts_are_large_enough_in_average) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, - "Too many parts ({}). Merges are processing significantly slower than inserts", - parts_count_in_partition); + "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", + parts_count_in_partition, ReadableSize(average_part_size)); } if (k_inactive < 0 && parts_count_in_partition < parts_to_delay_insert) @@ -3541,7 +3552,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q const ssize_t k_active = ssize_t(parts_count_in_partition) - ssize_t(parts_to_delay_insert); size_t max_k; size_t k; - if (k_active > k_inactive) + if (k_active > k_inactive && !parts_are_large_enough_in_average) { max_k = parts_to_throw_insert - parts_to_delay_insert; k = k_active + 1; @@ -3558,7 +3569,8 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q CurrentMetrics::Increment metric_increment(CurrentMetrics::DelayedInserts); - LOG_INFO(log, "Delaying inserting block by {} ms. because there are {} parts", delay_milliseconds, parts_count_in_partition); + LOG_INFO(log, "Delaying inserting block by {} ms. because there are {} parts and their average size is {}", + delay_milliseconds, parts_count_in_partition, ReadableSize(average_part_size)); if (until) until->tryWait(delay_milliseconds); @@ -3566,6 +3578,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q std::this_thread::sleep_for(std::chrono::milliseconds(static_cast(delay_milliseconds))); } + MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/) const { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c3a70a9893b..c4a5d66ccbe 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -520,8 +520,12 @@ public: size_t getTotalActiveSizeInRows() const; size_t getPartsCount() const; - size_t getMaxPartsCountForPartitionWithState(DataPartState state) const; - size_t getMaxPartsCountForPartition() const; + + /// Returns a pair with: max number of parts in partition across partitions; sum size of parts inside that partition. + /// (if there are multiple partitions with max number of parts, the sum size of parts is returned for arbitrary of them) + std::pair getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const; + std::pair getMaxPartsCountAndSizeForPartition() const; + size_t getMaxInactivePartsCountForPartition() const; /// Get min value of part->info.getDataVersion() for all active parts. diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 8d7f057e720..2c9db61b26a 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -70,6 +70,7 @@ struct Settings; M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ + M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' accordingly to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is at larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ \ From a45f7c3023770fbb5867d6b89823e33066c11146 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 1 Oct 2022 20:56:20 +0200 Subject: [PATCH 206/266] Relax "too many parts" thresholds --- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 2c9db61b26a..a0db39a97f1 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -70,7 +70,7 @@ struct Settings; M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ - M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' accordingly to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is at larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ + M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ \ From e4ac4a29a95c9c2a9bdc3289e3e3f051332a1d42 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 2 Oct 2022 01:31:44 +0200 Subject: [PATCH 207/266] Fix error --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 58b38d8d3ee..e6001905d03 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3533,7 +3533,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q auto parts_to_delay_insert = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; auto parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; - size_t average_part_size = size_of_partition / parts_count_in_partition; + size_t average_part_size = parts_count_in_partition ? size_of_partition / parts_count_in_partition : 0; bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts; From 6680590abb6aa3da53278c51b257e57a18307247 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Oct 2022 00:42:44 +0200 Subject: [PATCH 208/266] Add a test --- .../02458_relax_too_many_parts.reference | 1 + .../02458_relax_too_many_parts.sql | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/queries/0_stateless/02458_relax_too_many_parts.reference create mode 100644 tests/queries/0_stateless/02458_relax_too_many_parts.sql diff --git a/tests/queries/0_stateless/02458_relax_too_many_parts.reference b/tests/queries/0_stateless/02458_relax_too_many_parts.reference new file mode 100644 index 00000000000..6d532a1e49a --- /dev/null +++ b/tests/queries/0_stateless/02458_relax_too_many_parts.reference @@ -0,0 +1 @@ +4 8000000 diff --git a/tests/queries/0_stateless/02458_relax_too_many_parts.sql b/tests/queries/0_stateless/02458_relax_too_many_parts.sql new file mode 100644 index 00000000000..417fd2dd982 --- /dev/null +++ b/tests/queries/0_stateless/02458_relax_too_many_parts.sql @@ -0,0 +1,38 @@ +-- Check that max_parts_to_throw_insert limit work + +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt64, s String) ENGINE = MergeTree ORDER BY tuple() SETTINGS parts_to_throw_insert = 3; + +-- The "too many parts" threshold works: +SET max_block_size = 1, min_insert_block_size_rows = 1, min_insert_block_size_bytes = 1; +SYSTEM STOP MERGES test; +INSERT INTO test VALUES (1, 'a'); +INSERT INTO test VALUES (2, 'a'); +INSERT INTO test VALUES (3, 'a'); +INSERT INTO test VALUES (4, 'a'); -- { serverError TOO_MANY_PARTS } + +-- But it can be relaxed with a setting: +ALTER TABLE test MODIFY SETTING max_avg_part_size_for_too_many_parts = '1M'; + +-- It works in the same way if parts are small: +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SYSTEM STOP MERGES test; + +INSERT INTO test VALUES (5, 'a'); +INSERT INTO test VALUES (6, 'a'); +INSERT INTO test VALUES (7, 'a'); -- { serverError TOO_MANY_PARTS } + +-- But it allows having more parts if their average size is large: +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SYSTEM STOP MERGES test; + +SET max_block_size = 65000, min_insert_block_size_rows = 65000, min_insert_block_size_bytes = '1M'; +INSERT INTO test SELECT number, randomString(1000) FROM numbers(0, 10000); +INSERT INTO test SELECT number, randomString(1000) FROM numbers(10000, 10000); +INSERT INTO test SELECT number, randomString(1000) FROM numbers(20000, 10000); + +SELECT count(), round(avg(bytes), -6) FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active; + +DROP TABLE test; From ca20358a4c876209e620e0dc34b1a3735b6f4115 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Oct 2022 00:43:29 +0200 Subject: [PATCH 209/266] Add a test --- tests/queries/0_stateless/02458_relax_too_many_parts.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/02458_relax_too_many_parts.sql b/tests/queries/0_stateless/02458_relax_too_many_parts.sql index 417fd2dd982..a1f8e86fce5 100644 --- a/tests/queries/0_stateless/02458_relax_too_many_parts.sql +++ b/tests/queries/0_stateless/02458_relax_too_many_parts.sql @@ -1,5 +1,3 @@ --- Check that max_parts_to_throw_insert limit work - DROP TABLE IF EXISTS test; CREATE TABLE test (x UInt64, s String) ENGINE = MergeTree ORDER BY tuple() SETTINGS parts_to_throw_insert = 3; From 6bb62d4d03d66b6cf3c01ecc1a4e0e4ac8a0050f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 8 Oct 2022 00:01:58 +0000 Subject: [PATCH 210/266] add test for PROXYv1 --- .../configs/config.xml | 6 ++++++ .../test_composable_protocols/test.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index 553128d4386..35673c3e7e5 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -34,6 +34,12 @@ 9001 native protocol endpoint (tcp) + + proxy1 + tcp + 9100 + native protocol with PROXYv1 (tcp_proxy) + http 8123 diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index c0c0e5e0a83..a2e30c4480b 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -5,6 +5,8 @@ import os from helpers.cluster import ClickHouseCluster from helpers.client import Client import urllib.request, urllib.parse +import subprocess +import socket SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -44,6 +46,19 @@ def execute_query_http(host, port, query): response = urllib.request.urlopen(request).read() return response.decode("utf-8") +def netcat(hostname, port, content): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((hostname, port)) + s.sendall(content) + s.shutdown(socket.SHUT_WR) + data = [] + while 1: + d = s.recv(1024) + if len(d) == 0: + break + data.append(d) + s.close() + return b''.join(data) def test_connections(): @@ -67,3 +82,6 @@ def test_connections(): assert execute_query_https(server.ip_address, 8443, "SELECT 1") == "1\n" assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" + + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT \'Hello, world\'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + assert netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find(bytearray("Hello, world", "latin-1")) >= 0 From ec5a32f534bfb5a58f8c21b3120f479239adc62a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Oct 2022 02:05:36 +0200 Subject: [PATCH 211/266] Make thread_ids unique --- src/Common/ThreadStatus.h | 2 +- src/Interpreters/ThreadStatusExt.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index b414a9bccf5..67450d8c779 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -78,7 +78,7 @@ public: InternalProfileEventsQueueWeakPtr profile_queue_ptr; std::function fatal_error_callback; - std::vector thread_ids; + std::unordered_set thread_ids; std::unordered_set threads; /// The first thread created this thread group diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 3e7f08e9d9a..4810174e395 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -109,7 +109,7 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) std::lock_guard lock(thread_group->mutex); /// NOTE: thread may be attached multiple times if it is reused from a thread pool. - thread_group->thread_ids.emplace_back(thread_id); + thread_group->thread_ids.insert(thread_id); thread_group->threads.insert(this); logs_queue_ptr = thread_group->logs_queue_ptr; From 3bed015a1f4994fb77dcc32385fae7c456e2f1e3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Oct 2022 00:09:24 +0000 Subject: [PATCH 212/266] Automatic style fix --- tests/integration/test_composable_protocols/test.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index a2e30c4480b..d861af929c3 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -46,6 +46,7 @@ def execute_query_http(host, port, query): response = urllib.request.urlopen(request).read() return response.decode("utf-8") + def netcat(hostname, port, content): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((hostname, port)) @@ -58,7 +59,8 @@ def netcat(hostname, port, content): break data.append(d) s.close() - return b''.join(data) + return b"".join(data) + def test_connections(): @@ -83,5 +85,10 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" - data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT \'Hello, world\'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" - assert netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find(bytearray("Hello, world", "latin-1")) >= 0 + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + assert ( + netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( + bytearray("Hello, world", "latin-1") + ) + >= 0 + ) From 2f83d8790581dce0ffeec56c137b1d13160cfa7b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Oct 2022 02:42:41 +0200 Subject: [PATCH 213/266] Make thread_ids unique --- src/Interpreters/ProcessList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 0c80b37a429..d5194a02513 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -533,7 +533,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even if (get_thread_list) { std::lock_guard lock(thread_group->mutex); - res.thread_ids = thread_group->thread_ids; + res.thread_ids.assign(thread_group->thread_ids.begin(), thread_group->thread_ids.end()); } if (get_profile_events) From f764f4ce19af2031922c20b2d37501eb3fd58ae4 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Sat, 8 Oct 2022 08:20:17 +0200 Subject: [PATCH 214/266] Update src/Client/QueryFuzzer.cpp --- src/Client/QueryFuzzer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index b28cc89b2df..f0c4313e8a8 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -23,7 +23,6 @@ #include #include #include -#include "Parsers/IAST_fwd.h" #include #include #include From f4792c37eface463e68b03d4e8397373ba7cd279 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 8 Oct 2022 10:09:24 +0000 Subject: [PATCH 215/266] Fix #42185: DISTINCT in order fails with LOGICAL_ERROR ... if first column in sorting key contains function --- .../Optimizations/distinctReadInOrder.cpp | 6 ++++++ ...7_distinct_in_order_optimization.reference | 7 +++++++ .../02317_distinct_in_order_optimization.sql | 20 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index 6ec7ee98d08..38cb6f3d3c5 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -44,6 +44,12 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (!read_from_merge_tree) return 0; + /// if reading from merge tree doesn't provide any output order, we can do nothing + /// it means that no ordering can provided or supported for a particular sorting key + /// for example, tuple() or sipHash(string) + if (read_from_merge_tree->getOutputStream().sort_description.empty()) + return 0; + /// find non-const columns in DISTINCT const ColumnsWithTypeAndName & distinct_columns = pre_distinct->getOutputStream().header.getColumnsWithTypeAndName(); std::set non_const_columns; diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference index 628c2fc0714..26232d1e281 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference @@ -110,3 +110,10 @@ select distinct a, b, x, y from (select a, b, 1 as x, 2 as y from distinct_in_or 0 -- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct 0 +-- bug 42185, distinct in order and empty sort description +-- distinct in order, sorting key tuple() +1 +0 +-- distinct in order, sorting key contains function +2000-01-01 00:00:00 +2000-01-01 diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql index a1e7d7340a3..a794709caba 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql @@ -95,3 +95,23 @@ select count() as diff from (select distinct * from distinct_in_order except sel drop table if exists distinct_in_order; drop table if exists ordinary_distinct; drop table if exists distinct_cardinality_low; + +-- bug 42185 +drop table if exists sorting_key_empty_tuple; +drop table if exists sorting_key_contain_function; + +select '-- bug 42185, distinct in order and empty sort description'; +select '-- distinct in order, sorting key tuple()'; +create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple(); +insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10); +select distinct a from sorting_key_empty_tuple; + +select '-- distinct in order, sorting key contains function'; +create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)); +insert into sorting_key_contain_function values ('2000-01-01', 1); +insert into sorting_key_contain_function values ('2000-01-01', 2); +select distinct datetime from sorting_key_contain_function; +select distinct toDate(datetime) from sorting_key_contain_function; + +drop table sorting_key_empty_tuple; +drop table sorting_key_contain_function; From e07a3e1193cb1939e5c4c591cd0f983b0f451516 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Sat, 8 Oct 2022 10:33:37 +0000 Subject: [PATCH 216/266] add default --- src/Access/SettingsConstraints.cpp | 4 ++++ src/Core/Settings.cpp | 7 ++++++- src/Parsers/ParserSetQuery.cpp | 2 ++ tests/queries/0_stateless/02458_default_setting.reference | 3 +++ tests/queries/0_stateless/02458_default_setting.sql | 7 +++++++ 5 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02458_default_setting.reference create mode 100644 tests/queries/0_stateless/02458_default_setting.sql diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index d97a78c78ab..2c284ff91fe 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -118,6 +118,10 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh bool cannot_cast; auto cast_value = [&](const Field & x) -> Field { + /// `Default` can't be cast to other types, so it should be checked separately + String setting_value; + if (x.tryGet(setting_value) && setting_value == "DEFAULT") + return {}; cannot_cast = false; if (reaction == THROW_ON_VIOLATION) return Settings::castValueUtil(setting_name, x); diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 7bac3f04fc6..e7dc389e38f 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,7 +148,12 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - BaseSettings::set(name, value); + String setting_value; + if (value.tryGet(setting_value) && setting_value == "DEFAULT") { + resetToDefault(name); + } else { + BaseSettings::set(name, value); + } if (name == "compatibility") applyCompatibilitySetting(); diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 20de785ac1b..49abd3a0b44 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -109,6 +109,8 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p value = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); + else if (ParserKeyword("DEFAULT").ignore(pos, expected)) + value = std::make_shared(Field("DEFAULT")); else if (!value_p.parse(pos, value, expected)) return false; diff --git a/tests/queries/0_stateless/02458_default_setting.reference b/tests/queries/0_stateless/02458_default_setting.reference new file mode 100644 index 00000000000..9cbe5c8d87e --- /dev/null +++ b/tests/queries/0_stateless/02458_default_setting.reference @@ -0,0 +1,3 @@ +1048545 +100000 +1048545 diff --git a/tests/queries/0_stateless/02458_default_setting.sql b/tests/queries/0_stateless/02458_default_setting.sql new file mode 100644 index 00000000000..a341decdd1c --- /dev/null +++ b/tests/queries/0_stateless/02458_default_setting.sql @@ -0,0 +1,7 @@ +-- Tags: no-parallel + +SELECT value FROM system.settings where name='max_insert_block_size'; +SET max_insert_block_size=100000; +SELECT value FROM system.settings where name='max_insert_block_size'; +SET max_insert_block_size=DEFAULT; +SELECT value FROM system.settings where name='max_insert_block_size'; From 2bef2b09de57be3de1ac92f9e06aae7341c3b53e Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Sat, 8 Oct 2022 11:07:46 +0000 Subject: [PATCH 217/266] improve test and simplify code --- src/Access/SettingsConstraints.cpp | 4 +--- src/Core/Settings.cpp | 3 +-- src/Parsers/ParserSetQuery.cpp | 2 +- tests/queries/0_stateless/02458_default_setting.reference | 2 ++ tests/queries/0_stateless/02458_default_setting.sql | 2 ++ 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 2c284ff91fe..310b973e668 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -118,9 +118,7 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh bool cannot_cast; auto cast_value = [&](const Field & x) -> Field { - /// `Default` can't be cast to other types, so it should be checked separately - String setting_value; - if (x.tryGet(setting_value) && setting_value == "DEFAULT") + if (x.isNull()) return {}; cannot_cast = false; if (reaction == THROW_ON_VIOLATION) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index e7dc389e38f..b2a81784dda 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,8 +148,7 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - String setting_value; - if (value.tryGet(setting_value) && setting_value == "DEFAULT") { + if (value.isNull()) { resetToDefault(name); } else { BaseSettings::set(name, value); diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 49abd3a0b44..999c70e9c3d 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -110,7 +110,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); else if (ParserKeyword("DEFAULT").ignore(pos, expected)) - value = std::make_shared(Field("DEFAULT")); + value = std::make_shared(Field()); else if (!value_p.parse(pos, value, expected)) return false; diff --git a/tests/queries/0_stateless/02458_default_setting.reference b/tests/queries/0_stateless/02458_default_setting.reference index 9cbe5c8d87e..376553843ac 100644 --- a/tests/queries/0_stateless/02458_default_setting.reference +++ b/tests/queries/0_stateless/02458_default_setting.reference @@ -1,3 +1,5 @@ 1048545 100000 +1 1048545 +0 diff --git a/tests/queries/0_stateless/02458_default_setting.sql b/tests/queries/0_stateless/02458_default_setting.sql index a341decdd1c..712b5ad171b 100644 --- a/tests/queries/0_stateless/02458_default_setting.sql +++ b/tests/queries/0_stateless/02458_default_setting.sql @@ -3,5 +3,7 @@ SELECT value FROM system.settings where name='max_insert_block_size'; SET max_insert_block_size=100000; SELECT value FROM system.settings where name='max_insert_block_size'; +SELECT changed FROM system.settings where name='max_insert_block_size'; SET max_insert_block_size=DEFAULT; SELECT value FROM system.settings where name='max_insert_block_size'; +SELECT changed FROM system.settings where name='max_insert_block_size'; From 8b44e11430bcf2ac2e20d76f7fe0557e420cbb36 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 8 Oct 2022 16:10:46 +0200 Subject: [PATCH 218/266] fix --- base/base/safeExit.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index 44d92643e91..027ad4c8c4d 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -1,6 +1,7 @@ #if defined(OS_LINUX) # include #endif +#include #include #include #include From 023f11fb86bb68686596e90acc70a01902bc10ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Oct 2022 03:54:00 +0300 Subject: [PATCH 219/266] Update developer-instruction.md --- docs/en/development/developer-instruction.md | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 82cb4018625..69afb31e214 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -122,7 +122,7 @@ If you use Arch or Gentoo, you probably know it yourself how to install CMake. ## C++ Compiler {#c-compiler} -Compilers Clang starting from version 12 is supported for building ClickHouse. +Compilers Clang starting from version 15 is supported for building ClickHouse. Clang should be used instead of gcc. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations. @@ -146,7 +146,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-14 CXX=clang++-14`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. @@ -178,7 +178,7 @@ If you get the message: `ninja: error: loading 'build.ninja': No such file or di Upon the successful start of the building process, youโ€™ll see the build progress - the number of processed tasks and the total number of tasks. -While building messages about protobuf files in libhdfs2 library like `libprotobuf WARNING` may show up. They affect nothing and are safe to be ignored. +While building messages about LLVM library may show up. They affect nothing and are safe to be ignored. Upon successful build you get an executable file `ClickHouse//programs/clickhouse`: @@ -272,15 +272,10 @@ Most probably some of the builds will fail at first times. This is due to the fa You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation, semantic highlighting, search and indexing. The code snapshot is updated daily. +You can use GitHub integrated code browser [here](https://github.dev/ClickHouse/ClickHouse). + Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. -## Faster builds for development: Split build configuration {#split-build} - -ClickHouse is normally statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that for every change the entire binary needs to be re-linked, which is slow and inconvenient for development. As an alternative, you can instead build dynamically linked shared libraries, allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation: -``` --DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -``` - If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options ``` -DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0 From b74783455f0dddc64e227c1532a382f09ca4e9f0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sat, 8 Oct 2022 21:57:05 -0400 Subject: [PATCH 220/266] Update Settings.h fix dialect description --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2557e9ccfd8..257448a248a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,7 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ - M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\ + M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ From cebc38fa6e8fe6e9a4bbc5db774fa75f558a76c2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Oct 2022 05:30:25 +0200 Subject: [PATCH 221/266] Fix a bug with projections and aggregate_functions_null_for_empty setting --- src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 5 +++++ ...460_projections_and_aggregate_null_if_empty.reference | 1 + .../02460_projections_and_aggregate_null_if_empty.sh | 9 +++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference create mode 100755 tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b4961a1cdd9..e0dc37a571e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5676,7 +5676,8 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { const auto & metadata_snapshot = storage_snapshot->metadata; const auto & settings = query_context->getSettingsRef(); - if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) + if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query + || settings.aggregate_functions_null_for_empty /* projections don't work correctly with this setting */) return std::nullopt; // Currently projections don't support parallel replicas reading yet. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 91ecb3a37a0..a51aedbf1d6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -176,6 +176,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( "No projection is used when allow_experimental_projection_optimization = 1 and force_optimize_projection = 1", ErrorCodes::PROJECTION_NOT_USED); + if (settings.force_optimize_projection && settings.aggregate_functions_null_for_empty) + throw Exception( + "Projections cannot be used with 'aggregate_functions_null_for_empty' setting enabled.", + ErrorCodes::PROJECTION_NOT_USED); + return plan; } diff --git a/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference new file mode 100644 index 00000000000..54f97aaa2e6 --- /dev/null +++ b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.reference @@ -0,0 +1 @@ +20220920 diff --git a/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh new file mode 100755 index 00000000000..6e96b9b8afc --- /dev/null +++ b/tests/queries/0_stateless/02460_projections_and_aggregate_null_if_empty.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: depends on bzip2 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --aggregate_functions_null_for_empty=1 --multiquery --query "create table test_date (date Int32) ENGINE = MergeTree ORDER BY (date) as select 20220920; SELECT max(date) FROM test_date"; From e27dbf43693e8e42a43749cf242dbfb46e7da69c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Oct 2022 18:27:47 +0300 Subject: [PATCH 222/266] Update MergeTreeDataSelectExecutor.cpp --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a51aedbf1d6..91ecb3a37a0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -176,11 +176,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( "No projection is used when allow_experimental_projection_optimization = 1 and force_optimize_projection = 1", ErrorCodes::PROJECTION_NOT_USED); - if (settings.force_optimize_projection && settings.aggregate_functions_null_for_empty) - throw Exception( - "Projections cannot be used with 'aggregate_functions_null_for_empty' setting enabled.", - ErrorCodes::PROJECTION_NOT_USED); - return plan; } From 8a2ba6cd816ded5718f403a4f9751390e2550649 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 10 Oct 2022 09:32:13 +0200 Subject: [PATCH 223/266] Add logging during merge tree startup (#42163) --- src/Storages/MergeTree/MergeTreeData.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b4961a1cdd9..c9e9a45dc67 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -954,6 +954,8 @@ void MergeTreeData::loadDataPartsFromDisk( /// Parallel loading of data parts. pool.setMaxThreads(std::min(static_cast(settings->max_part_loading_threads), num_parts)); size_t num_threads = pool.getMaxThreads(); + LOG_DEBUG(log, "Going to use {} threads to load parts", num_threads); + std::vector parts_per_thread(num_threads, num_parts / num_threads); for (size_t i = 0ul; i < num_parts % num_threads; ++i) ++parts_per_thread[i]; @@ -1016,6 +1018,8 @@ void MergeTreeData::loadDataPartsFromDisk( auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version); if (!part_opt) return; + + LOG_TRACE(log, "Loading part {} from disk {}", part_name, part_disk_ptr->getName()); const auto & part_info = *part_opt; auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, part_name); @@ -1119,6 +1123,7 @@ void MergeTreeData::loadDataPartsFromDisk( } addPartContributionToDataVolume(part); + LOG_TRACE(log, "Finished part {} load on disk {}", part_name, part_disk_ptr->getName()); }; std::mutex part_select_mutex; @@ -1311,8 +1316,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) size_t num_parts = 0; std::queue>> parts_queue; - for (auto & [_, disk_parts] : disk_part_map) + for (auto & [disk_name, disk_parts] : disk_part_map) { + LOG_INFO(log, "Found {} parts for disk '{}' to load", disk_parts.size(), disk_name); + if (disk_parts.empty()) continue; num_parts += disk_parts.size(); From dbed82e74c82814f0c2f1ee4ef18c0f6509b5300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 10 Oct 2022 12:18:05 +0200 Subject: [PATCH 224/266] Fix LLVM build --- contrib/llvm-project-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 43dfd5950eb..6b962a11e01 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -80,6 +80,7 @@ set(LLVM_ENABLE_LIBXML2 0 CACHE INTERNAL "") set(LLVM_ENABLE_LIBEDIT 0 CACHE INTERNAL "") set(LLVM_ENABLE_LIBPFM 0 CACHE INTERNAL "") set(LLVM_ENABLE_ZLIB 0 CACHE INTERNAL "") +set(LLVM_ENABLE_ZSTD 0 CACHE INTERNAL "") set(LLVM_ENABLE_Z3_SOLVER 0 CACHE INTERNAL "") set(LLVM_INCLUDE_TOOLS 0 CACHE INTERNAL "") set(LLVM_BUILD_TOOLS 0 CACHE INTERNAL "") From 33edab50429705c67feb21303f217364ae00b47f Mon Sep 17 00:00:00 2001 From: "Chun-Sheng, Li" Date: Mon, 10 Oct 2022 19:34:38 +0800 Subject: [PATCH 225/266] Adding cron config checking before running sed cmd (#42081) Co-authored-by: Nikita Mikhaylov --- packages/clickhouse-server.init | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/clickhouse-server.init b/packages/clickhouse-server.init index 13aeffe13a7..f215e52b6f3 100755 --- a/packages/clickhouse-server.init +++ b/packages/clickhouse-server.init @@ -120,7 +120,11 @@ use_cron() if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then return 1 fi - # 2. disabled by config + # 2. checking whether the config is existed + if [ ! -f "$CLICKHOUSE_CRONFILE" ]; then + return 1 + fi + # 3. disabled by config if [ -z "$CLICKHOUSE_CRONFILE" ]; then return 2 fi From ff03181d318258b44f9eb6f80261b2c9fff58c78 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 10 Oct 2022 15:01:03 +0300 Subject: [PATCH 226/266] Update safeExit.cpp --- base/base/safeExit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index 027ad4c8c4d..e4f9e80759e 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -1,7 +1,7 @@ #if defined(OS_LINUX) # include #endif -#include +#include #include #include #include From f6b0dc168d6c18f8e68f4bdeae8b9c53260a9561 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 10 Oct 2022 14:10:04 +0200 Subject: [PATCH 227/266] Disable concurrent parts removal --- src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c9e9a45dc67..46f4eef2e88 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1916,7 +1916,8 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t const auto settings = getSettings(); if (parts_to_remove.size() > 1 && settings->max_part_removal_threads > 1 - && parts_to_remove.size() > settings->concurrent_part_removal_threshold) + && parts_to_remove.size() > settings->concurrent_part_removal_threshold + && (!supportsReplication() || !settings->allow_remote_fs_zero_copy_replication)) /// parts must be removed in order for zero-copy replication { /// Parallel parts removal. size_t num_threads = std::min(settings->max_part_removal_threads, parts_to_remove.size()); From 4b371bd20c20b97d5c36f88c3d9870702113db7a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 10 Oct 2022 16:38:35 +0200 Subject: [PATCH 228/266] fix --- src/Databases/DDLDependencyVisitor.cpp | 2 +- src/Interpreters/DatabaseCatalog.cpp | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index a9131635775..bedaec75565 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -187,7 +187,7 @@ void NormalizeAndEvaluateConstants::visit(const ASTFunctionWithKeyValueArguments for (auto & child : expr_list.children) { ASTPair * pair = child->as(); - if (const auto * func = pair->second->as()) + if (pair->second->as()) { auto ast_literal = evaluateConstantExpressionAsLiteral(pair->children[0], data.create_query_context); pair->replace(pair->second, ast_literal); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index e2c4a9a5082..ab6f088ac69 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1088,12 +1088,6 @@ TableNamesSet DatabaseCatalog::tryRemoveLoadingDependenciesUnlocked(const Qualif TableNamesSet & dependent = it->second.dependent_database_objects; if (!dependent.empty()) { - if (check_dependencies && !is_drop_database) - throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}", - removing_table, fmt::join(dependent, ", ")); - - /// For DROP DATABASE we should ignore dependent tables from the same database. - /// TODO unload tables in reverse topological order and remove this code if (check_dependencies) checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database); From 97d16fd6291d646f4a61deaee00944b14ff091d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 11 Oct 2022 02:17:32 +0200 Subject: [PATCH 229/266] Add a test for #2389 --- ..._materialized_view_default_value.reference | 3 ++ .../02459_materialized_view_default_value.sql | 36 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/queries/0_stateless/02459_materialized_view_default_value.reference create mode 100644 tests/queries/0_stateless/02459_materialized_view_default_value.sql diff --git a/tests/queries/0_stateless/02459_materialized_view_default_value.reference b/tests/queries/0_stateless/02459_materialized_view_default_value.reference new file mode 100644 index 00000000000..0cab6bd33bb --- /dev/null +++ b/tests/queries/0_stateless/02459_materialized_view_default_value.reference @@ -0,0 +1,3 @@ +2019-05-01 test +2019-05-01 test +2019-05-01 test diff --git a/tests/queries/0_stateless/02459_materialized_view_default_value.sql b/tests/queries/0_stateless/02459_materialized_view_default_value.sql new file mode 100644 index 00000000000..16a814233d1 --- /dev/null +++ b/tests/queries/0_stateless/02459_materialized_view_default_value.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS session; +DROP TABLE IF EXISTS queue; +DROP TABLE IF EXISTS forward; + +CREATE TABLE session +( + `day` Date, + `uid` String, + `dummy` String DEFAULT '' +) +ENGINE = MergeTree +ORDER BY (day, uid); + +CREATE TABLE queue +( + `day` Date, + `uid` String +) +ENGINE = MergeTree +ORDER BY (day, uid); + +CREATE MATERIALIZED VIEW IF NOT EXISTS forward TO session AS +SELECT + day, + uid +FROM queue; + +insert into queue values ('2019-05-01', 'test'); + +SELECT * FROM queue; +SELECT * FROM session; +SELECT * FROM forward; + +DROP TABLE session; +DROP TABLE queue; +DROP TABLE forward; From 15aa8a88dc1b865f5b204309d60f3817b205ff88 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 11 Oct 2022 05:04:47 -0700 Subject: [PATCH 230/266] Throw an exception in case of inconsistent state --- src/Interpreters/InterpreterCreateQuery.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 080f464cf08..32c0b439e62 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -727,9 +727,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti { return {}; } + else if (!create.storage || !create.storage->engine) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected application state. CREATE query is missing either its storage or engine."); /// We can have queries like "CREATE TABLE
ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). - else if (create.storage && create.storage->engine && !StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) // NOLINT + else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); /// Even if query has list of columns, canonicalize it (unfold Nested columns). From c08387eb0d8692096090f64f473fddf49d69981c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Oct 2022 14:10:01 +0000 Subject: [PATCH 231/266] Minor cleanups of LLVM integration --- contrib/llvm-project | 2 +- contrib/llvm-project-cmake/CMakeLists.txt | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/contrib/llvm-project b/contrib/llvm-project index c7f7cfc85e4..3a39038345a 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit c7f7cfc85e4b81c1c76cdd633dd8808d2dfd6114 +Subproject commit 3a39038345a400e7e767811b142a94355d511215 diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 43dfd5950eb..fe30b61794c 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined" OR NOT USE_STATIC_LIBRARIES) +if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) @@ -6,15 +6,16 @@ endif() option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) +# If USE_STATIC_LIBRARIES=0 was passed to CMake, we'll still build LLVM statically to keep complexity minimal. + if (NOT ENABLE_EMBEDDED_COMPILER) message(STATUS "Not using LLVM") return() endif() -# TODO: Enable shared library build # TODO: Enable compilation on AArch64 -set (LLVM_VERSION "14.0.0bundled") +set (LLVM_VERSION "15.0.0bundled") set (LLVM_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include" "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include" @@ -62,9 +63,6 @@ set (REQUIRED_LLVM_LIBRARIES # list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) # endif () -# ld: unknown option: --color-diagnostics -# set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "") - set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling @@ -96,9 +94,6 @@ set(LLVM_INCLUDE_DOCS 0 CACHE INTERNAL "") set(LLVM_ENABLE_OCAMLDOC 0 CACHE INTERNAL "") set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "") -# C++20 is currently not supported due to ambiguous operator != etc. -set (CMAKE_CXX_STANDARD 17) - set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm") set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") From b6304c46d4c018909eb1eb21378e52a7c5351dd0 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Tue, 11 Oct 2022 18:25:28 +0000 Subject: [PATCH 232/266] move default settings to different part of ast --- src/Access/SettingsConstraints.cpp | 2 -- src/Client/ClientBase.cpp | 2 ++ src/Core/Settings.cpp | 6 +--- src/Core/Settings.h | 2 ++ src/Interpreters/Context.cpp | 9 +++++ src/Interpreters/Context.h | 3 ++ src/Interpreters/InterpreterSetQuery.cpp | 2 ++ src/Parsers/ASTSetQuery.cpp | 11 ++++++ src/Parsers/ASTSetQuery.h | 2 ++ src/Parsers/ParserSetQuery.cpp | 45 +++++++++++++++++++++--- src/Parsers/ParserSetQuery.h | 1 + 11 files changed, 74 insertions(+), 11 deletions(-) diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 310b973e668..d97a78c78ab 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -118,8 +118,6 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh bool cannot_cast; auto cast_value = [&](const Field & x) -> Field { - if (x.isNull()) - return {}; cannot_cast = false; if (reaction == THROW_ON_VIOLATION) return Settings::castValueUtil(setting_name, x); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index c27effe20a0..0a2fbcf9f46 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1498,6 +1498,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (!old_settings) old_settings.emplace(global_context->getSettingsRef()); global_context->applySettingsChanges(settings_ast.as()->changes); + global_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); }; const auto * insert = parsed_query->as(); @@ -1543,6 +1544,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin else global_context->applySettingChange(change); } + global_context->resetSettingsToDefaultValue(set_query->default_settings); } if (const auto * use_query = parsed_query->as()) { diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index b2a81784dda..7bac3f04fc6 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,11 +148,7 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - if (value.isNull()) { - resetToDefault(name); - } else { - BaseSettings::set(name, value); - } + BaseSettings::set(name, value); if (name == "compatibility") applyCompatibilitySetting(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d43dc0e8ea9..c9613c6360b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -890,6 +890,8 @@ struct Settings : public BaseSettings, public IHints<2, Settings void set(std::string_view name, const Field & value) override; + void setDefaultValue(const String & name) { resetToDefault(name); } + private: void applyCompatibilitySetting(); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 3c406058cb5..b08c2bab81c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1374,6 +1374,15 @@ void Context::clampToSettingsConstraints(SettingsChanges & changes) const getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes); } +void Context::resetSettingsToDefaultValue(const std::vector & names) +{ + auto lock = getLock(); + for (const String & name: names) + { + settings.setDefaultValue(name); + } +} + std::shared_ptr Context::getSettingsConstraintsAndCurrentProfiles() const { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 6a417cbd3de..7711ea34dc7 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -647,6 +647,9 @@ public: void checkSettingsConstraints(SettingsChanges & changes) const; void clampToSettingsConstraints(SettingsChanges & changes) const; + /// Reset settings to default value + void resetSettingsToDefaultValue(const std::vector & names); + /// Returns the current constraints (can return null). std::shared_ptr getSettingsConstraintsAndCurrentProfiles() const; diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 2bd8d648040..2c8611ffb63 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -13,6 +13,7 @@ BlockIO InterpreterSetQuery::execute() auto session_context = getContext()->getSessionContext(); session_context->applySettingsChanges(ast.changes); session_context->addQueryParameters(ast.query_parameters); + session_context->resetSettingsToDefaultValue(ast.default_settings); return {}; } @@ -22,6 +23,7 @@ void InterpreterSetQuery::executeForCurrentContext() const auto & ast = query_ptr->as(); getContext()->checkSettingsConstraints(ast.changes); getContext()->applySettingsChanges(ast.changes); + getContext()->resetSettingsToDefaultValue(ast.default_settings); } } diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index 70e8877a391..26420f4988c 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -37,6 +37,17 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma format.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value); } + for (const auto & setting_name : default_settings) + { + if (!first) + format.ostr << ", "; + else + first = false; + + formatSettingName(setting_name, format.ostr); + format.ostr << " = DEFAULT"; + } + for (const auto & [name, value] : query_parameters) { if (!first) diff --git a/src/Parsers/ASTSetQuery.h b/src/Parsers/ASTSetQuery.h index e716ae2de06..0e1933cbf1a 100644 --- a/src/Parsers/ASTSetQuery.h +++ b/src/Parsers/ASTSetQuery.h @@ -23,6 +23,8 @@ public: bool print_in_format = true; SettingsChanges changes; + /// settings that will be reset to default value + std::vector default_settings; NameToNameMap query_parameters; /** Get the text that identifies this element. */ diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 999c70e9c3d..c840fc8d2b8 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -109,8 +109,6 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p value = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); - else if (ParserKeyword("DEFAULT").ignore(pos, expected)) - value = std::make_shared(Field()); else if (!value_p.parse(pos, value, expected)) return false; @@ -120,6 +118,40 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p return true; } +bool ParserSetQuery::parseNameValuePairWithDefault(SettingChange & change, String & default_settings, IParser::Pos & pos, Expected & expected) +{ + ParserCompoundIdentifier name_p; + ParserLiteralOrMap value_p; + ParserToken s_eq(TokenType::Equals); + + ASTPtr name; + ASTPtr value; + bool is_default = false; + + if (!name_p.parse(pos, name, expected)) + return false; + + if (!s_eq.ignore(pos, expected)) + return false; + + if (ParserKeyword("TRUE").ignore(pos, expected)) + value = std::make_shared(Field(static_cast(1))); + else if (ParserKeyword("FALSE").ignore(pos, expected)) + value = std::make_shared(Field(static_cast(0))); + else if (ParserKeyword("DEFAULT").ignore(pos, expected)) + is_default = true; + else if (!value_p.parse(pos, value, expected)) + return false; + + tryGetIdentifierNameInto(name, change.name); + if (is_default) + default_settings = change.name; + else + change.value = value->as().value; + + return true; +} + bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -139,20 +171,24 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) SettingsChanges changes; NameToNameMap query_parameters; + std::vector default_settings; while (true) { - if ((!changes.empty() || !query_parameters.empty()) && !s_comma.ignore(pos)) + if ((!changes.empty() || !query_parameters.empty() || !default_settings.empty()) && !s_comma.ignore(pos)) break; /// Either a setting or a parameter for prepared statement (if name starts with QUERY_PARAMETER_NAME_PREFIX) SettingChange current; + String name_of_default_setting; - if (!parseNameValuePair(current, pos, expected)) + if (!parseNameValuePairWithDefault(current, name_of_default_setting, pos, expected)) return false; if (current.name.starts_with(QUERY_PARAMETER_NAME_PREFIX)) query_parameters.emplace(convertToQueryParameter(std::move(current))); + else if (!name_of_default_setting.empty()) + default_settings.emplace_back(std::move(name_of_default_setting)); else changes.push_back(std::move(current)); } @@ -163,6 +199,7 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->is_standalone = !parse_only_internals; query->changes = std::move(changes); query->query_parameters = std::move(query_parameters); + query->default_settings = std::move(default_settings); return true; } diff --git a/src/Parsers/ParserSetQuery.h b/src/Parsers/ParserSetQuery.h index 0bc1cec3093..0213667ad7a 100644 --- a/src/Parsers/ParserSetQuery.h +++ b/src/Parsers/ParserSetQuery.h @@ -17,6 +17,7 @@ class ParserSetQuery : public IParserBase public: explicit ParserSetQuery(bool parse_only_internals_ = false) : parse_only_internals(parse_only_internals_) {} static bool parseNameValuePair(SettingChange & change, IParser::Pos & pos, Expected & expected); + static bool parseNameValuePairWithDefault(SettingChange & change, String & default_settings, IParser::Pos & pos, Expected & expected); protected: const char * getName() const override { return "SET query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; From d77ccc8aa4c71bab71ec7366c1f3292a5daeb138 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 11 Oct 2022 23:49:46 +0200 Subject: [PATCH 233/266] Disable test for s3 storage --- .../queries/0_stateless/01810_max_part_removal_threads_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index b1f30a41924..17aa1a1743b 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-s3-storage # NOTE: this done as not .sql since we need to Ordinary database # (to account threads in query_log for DROP TABLE query) From 3fcf188bcc5e9c3102a9a93becd9265d002e0b06 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 11 Oct 2022 23:50:02 +0200 Subject: [PATCH 234/266] Disable test for s3 storage --- tests/queries/0_stateless/01810_max_part_removal_threads_long.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 17aa1a1743b..85662438f33 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash # Tags: long, no-s3-storage +# Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database # (to account threads in query_log for DROP TABLE query) From bbd1196a96d71cda1d9c4460c1823857bd90e4c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Oct 2022 00:57:02 +0300 Subject: [PATCH 235/266] Update MergeTreeData.cpp --- src/Storages/MergeTree/MergeTreeData.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 46f4eef2e88..8cb8b0aacf0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1699,7 +1699,13 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) if (!lock.try_lock()) return res; + /// Concurrent parts removal is disabled for "zero-copy replication" (a non-production feature), + /// because parts removal involves hard links and concurrent hard link operations don't work correctly + /// in the "zero-copy replication" (because it is a non-production feature). + /// Please don't use "zero-copy replication" (a non-production feature) in production. + /// It is not ready for production usage. Don't use it. bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication; + if (need_remove_parts_in_order) { bool has_zero_copy_disk = false; From 312afdc280fe5894488cb756d147d18eae0ad37e Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 12 Oct 2022 03:48:37 +0000 Subject: [PATCH 236/266] remove useless code --- src/Storages/StorageInMemoryMetadata.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index a9ab96909f4..eadce581334 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -77,15 +77,6 @@ struct StorageInMemoryMetadata /// Sets projections void setProjections(ProjectionsDescription projections_); - /// Set partition key for storage (methods below, are just wrappers for this struct). - void setPartitionKey(const KeyDescription & partition_key_); - /// Set sorting key for storage (methods below, are just wrappers for this struct). - void setSortingKey(const KeyDescription & sorting_key_); - /// Set primary key for storage (methods below, are just wrappers for this struct). - void setPrimaryKey(const KeyDescription & primary_key_); - /// Set sampling key for storage (methods below, are just wrappers for this struct). - void setSamplingKey(const KeyDescription & sampling_key_); - /// Set common table TTLs void setTableTTLs(const TTLTableDescription & table_ttl_); From 0178307c278ac90546a302fed89f34523ddb07b9 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 5 Oct 2022 16:35:10 +0000 Subject: [PATCH 237/266] Followup for TemporaryDataOnDisk --- .../settings.md | 15 ++++++ .../operations/settings/query-complexity.md | 15 ++++++ programs/server/Server.cpp | 8 +-- src/Common/CurrentMetrics.cpp | 1 + src/Disks/TemporaryFileOnDisk.cpp | 53 +++++++++++++++---- src/Disks/TemporaryFileOnDisk.h | 17 +++--- src/Interpreters/Aggregator.cpp | 4 +- src/Interpreters/MergeJoin.cpp | 2 +- src/Interpreters/SortedBlocksWriter.cpp | 2 +- src/Interpreters/TemporaryDataOnDisk.cpp | 28 ++++++---- src/Interpreters/TemporaryDataOnDisk.h | 31 ++++++++--- src/Processors/QueryPlan/SortingStep.cpp | 8 ++- .../Transforms/MergeSortingTransform.cpp | 6 +-- 13 files changed, 138 insertions(+), 52 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 4aad13f7230..dcda7536935 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1502,6 +1502,21 @@ If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored. - Policy should have exactly one volume with local disks. ::: +## max_temporary_data_on_disk_size {#max_temporary_data_on_disk_size} + +Limit the amount of disk space consumed by temporary files in `tmp_path` for the server. +Queries that exceed this limit will fail with an exception. + +Default value: `0`. + +**See also** + +- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user) +- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query) +- [tmp_path](#tmp-path) +- [tmp_policy](#tmp-policy) +- [max_server_memory_usage](#max_server_memory_usage) + ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 597d524dd3f..ce374f0f1c8 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -313,4 +313,19 @@ When inserting data, ClickHouse calculates the number of partitions in the inser > โ€œToo many partitions for single INSERT block (more thanโ€ + toString(max_parts) + โ€œ). The limit is controlled by โ€˜max_partitions_per_insert_blockโ€™ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).โ€ +## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user} + +The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. +Zero means unlimited. + +Default value: 0. + + +## max_temporary_data_on_disk_size_for_query {#settings_max_temporary_data_on_disk_size_for_query} + +The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. +Zero means unlimited. + +Default value: 0. + [Original article](https://clickhouse.com/docs/en/operations/settings/query_complexity/) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d1f1df24398..aed586a86f6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -971,10 +971,10 @@ int Server::main(const std::vector & /*args*/) /// Storage with temporary data for processing of heavy queries. { - std::string tmp_path = config().getString("tmp_path", path / "tmp/"); - std::string tmp_policy = config().getString("tmp_policy", ""); - size_t tmp_max_size = config().getUInt64("tmp_max_size", 0); - const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy, tmp_max_size); + std::string temporary_path = config().getString("tmp_path", path / "tmp/"); + std::string temporary_policy = config().getString("tmp_policy", ""); + size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); + const VolumePtr & volume = global_context->setTemporaryStorage(temporary_path, temporary_policy, max_size); for (const DiskPtr & disk : volume->getDisks()) setupTmpPath(log, disk->getPath()); } diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index eeb8b4e2832..6d9fd686765 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -33,6 +33,7 @@ M(TemporaryFilesForSort, "Number of temporary files created for external sorting") \ M(TemporaryFilesForAggregation, "Number of temporary files created for external aggregation") \ M(TemporaryFilesForJoin, "Number of temporary files created for JOIN") \ + M(TemporaryFilesUnknown, "Number of temporary files created without known purpose") \ M(Read, "Number of read (read, pread, io_getevents, etc.) syscalls in fly") \ M(Write, "Number of write (write, pwrite, io_getevents, etc.) syscalls in fly") \ M(NetworkReceive, "Number of threads receiving data from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp index e63500d735f..4f348519037 100644 --- a/src/Disks/TemporaryFileOnDisk.cpp +++ b/src/Disks/TemporaryFileOnDisk.cpp @@ -1,17 +1,31 @@ #include #include #include +#include + +#include namespace ProfileEvents { extern const Event ExternalProcessingFilesTotal; } +namespace CurrentMetrics +{ + extern const Metric TotalTemporaryFiles; +} + + namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_) - : TemporaryFileOnDisk(disk_, disk_->getPath()) + : TemporaryFileOnDisk(disk_, "") {} TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Value metric_scope) @@ -20,33 +34,54 @@ TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics:: sub_metric_increment.emplace(metric_scope); } -TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_) +TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix) : disk(disk_) + , metric_increment(CurrentMetrics::TotalTemporaryFiles) { - /// is is possible to use with disk other than DickLocal ? - disk->createDirectories(prefix_); + if (!disk) + throw Exception("Disk is not specified", ErrorCodes::LOGICAL_ERROR); + + if (fs::path prefix_path(prefix); prefix_path.has_parent_path()) + disk->createDirectories(prefix_path.parent_path()); ProfileEvents::increment(ProfileEvents::ExternalProcessingFilesTotal); /// Do not use default temporaty root path `/tmp/tmpXXXXXX`. /// The `dummy_prefix` is used to know what to replace with the real prefix. String dummy_prefix = "a/"; - filepath = Poco::TemporaryFile::tempName(dummy_prefix); + relative_path = Poco::TemporaryFile::tempName(dummy_prefix); dummy_prefix += "tmp"; /// a/tmpXXXXX -> XXXXX - assert(filepath.starts_with(dummy_prefix)); - filepath.replace(0, dummy_prefix.length(), prefix_); + assert(relative_path.starts_with(dummy_prefix)); + relative_path.replace(0, dummy_prefix.length(), prefix); + + if (relative_path.empty()) + throw Exception("Temporary file name is empty", ErrorCodes::LOGICAL_ERROR); +} + +String TemporaryFileOnDisk::getPath() const +{ + return std::filesystem::path(disk->getPath()) / relative_path; } TemporaryFileOnDisk::~TemporaryFileOnDisk() { try { - if (disk && !filepath.empty() && disk->exists(filepath)) - disk->removeRecursive(filepath); + if (!disk || relative_path.empty()) + return; + + if (!disk->exists(relative_path)) + { + LOG_WARNING(&Poco::Logger::get("TemporaryFileOnDisk"), "Temporary path '{}' does not exist in '{}'", relative_path, disk->getPath()); + return; + } + + disk->removeRecursive(relative_path); } catch (...) { + tryLogCurrentException(__PRETTY_FUNCTION__); } } diff --git a/src/Disks/TemporaryFileOnDisk.h b/src/Disks/TemporaryFileOnDisk.h index de20481c939..9ba59c3eaf0 100644 --- a/src/Disks/TemporaryFileOnDisk.h +++ b/src/Disks/TemporaryFileOnDisk.h @@ -5,12 +5,6 @@ #include #include - -namespace CurrentMetrics -{ - extern const Metric TotalTemporaryFiles; -} - namespace DB { using DiskPtr = std::shared_ptr; @@ -24,20 +18,21 @@ class TemporaryFileOnDisk public: explicit TemporaryFileOnDisk(const DiskPtr & disk_); explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Value metric_scope); - explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_); + explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix); ~TemporaryFileOnDisk(); DiskPtr getDisk() const { return disk; } - const String & getPath() const { return filepath; } - const String & path() const { return filepath; } + String getPath() const; private: DiskPtr disk; - String filepath; + /// Relative path in disk to the temporary file or directory + String relative_path; + + CurrentMetrics::Increment metric_increment; - CurrentMetrics::Increment metric_increment{CurrentMetrics::TotalTemporaryFiles}; /// Specified if we know what for file is used (sort/aggregate/join). std::optional sub_metric_increment = {}; }; diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 3a838d373e0..b5d15b0927b 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -570,7 +570,7 @@ Aggregator::Aggregator(const Block & header_, const Params & params_) : header(header_) , keys_positions(calculateKeysPositions(header, params_)) , params(params_) - , tmp_data(params.tmp_data_scope ? std::make_unique(params.tmp_data_scope) : nullptr) + , tmp_data(params.tmp_data_scope ? std::make_unique(params.tmp_data_scope, CurrentMetrics::TemporaryFilesForAggregation) : nullptr) , min_bytes_for_prefetch(getMinBytesForPrefetch()) { /// Use query-level memory tracker @@ -1573,7 +1573,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, si Stopwatch watch; size_t rows = data_variants.size(); - auto & out_stream = tmp_data->createStream(getHeader(false), CurrentMetrics::TemporaryFilesForAggregation, max_temp_file_size); + auto & out_stream = tmp_data->createStream(getHeader(false), max_temp_file_size); ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart); LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", out_stream.path()); diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 8957cb9c694..bb9c7bf3f90 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1033,7 +1033,7 @@ std::shared_ptr MergeJoin::loadRightBlock(size_t pos) const { auto load_func = [&]() -> std::shared_ptr { - TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->path(), materializeBlock(right_sample_block)); + TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getPath(), materializeBlock(right_sample_block)); return std::make_shared(input.block_in->read()); }; diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index 16c0e6c2c2b..d8c42cba9c1 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -264,7 +264,7 @@ SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function(file->path(), materializeBlock(sample_block))); + return Pipe(std::make_shared(file->getPath(), materializeBlock(sample_block))); } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index d603877a9e0..3eb93f1f20e 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -41,7 +41,7 @@ void TemporaryDataOnDiskScope::deltaAllocAndCheck(int compressed_delta, int unco stat.uncompressed_size += uncompressed_delta; } -TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, CurrentMetrics::Value metric_scope, size_t max_file_size) +TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size) { DiskPtr disk; if (max_file_size > 0) @@ -56,7 +56,7 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, Cu disk = volume->getDisk(); } - auto tmp_file = std::make_unique(disk, metric_scope); + auto tmp_file = std::make_unique(disk, current_metric_scope); std::lock_guard lock(mutex); TemporaryFileStreamPtr & tmp_stream = streams.emplace_back(std::make_unique(std::move(tmp_file), header, this)); @@ -94,9 +94,9 @@ struct TemporaryFileStream::OutputWriter if (finalized) throw Exception("Cannot write to finalized stream", ErrorCodes::LOGICAL_ERROR); out_writer.write(block); + num_rows += block.rows(); } - void finalize() { if (finalized) @@ -127,6 +127,8 @@ struct TemporaryFileStream::OutputWriter CompressedWriteBuffer out_compressed_buf; NativeWriter out_writer; + std::atomic_size_t num_rows = 0; + bool finalized = false; }; @@ -157,7 +159,7 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const : parent(parent_) , header(header_) , file(std::move(file_)) - , out_writer(std::make_unique(file->path(), header)) + , out_writer(std::make_unique(file->getPath(), header)) { } @@ -172,6 +174,9 @@ void TemporaryFileStream::write(const Block & block) TemporaryFileStream::Stat TemporaryFileStream::finishWriting() { + if (isWriteFinished()) + return stat; + if (out_writer) { out_writer->finalize(); @@ -196,19 +201,19 @@ Block TemporaryFileStream::read() if (!isWriteFinished()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been not finished"); - if (isFinalized()) + if (isEof()) return {}; if (!in_reader) { - in_reader = std::make_unique(file->path(), header); + in_reader = std::make_unique(file->getPath(), header); } Block block = in_reader->read(); if (!block) { /// finalize earlier to release resources, do not wait for the destructor - this->finalize(); + this->release(); } return block; } @@ -223,20 +228,21 @@ void TemporaryFileStream::updateAllocAndCheck() { throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary file {} size decreased after write: compressed: {} -> {}, uncompressed: {} -> {}", - file->path(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size); + file->getPath(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size); } parent->deltaAllocAndCheck(new_compressed_size - stat.compressed_size, new_uncompressed_size - stat.uncompressed_size); stat.compressed_size = new_compressed_size; stat.uncompressed_size = new_uncompressed_size; + stat.num_rows = out_writer->num_rows; } -bool TemporaryFileStream::isFinalized() const +bool TemporaryFileStream::isEof() const { return file == nullptr; } -void TemporaryFileStream::finalize() +void TemporaryFileStream::release() { if (file) { @@ -258,7 +264,7 @@ TemporaryFileStream::~TemporaryFileStream() { try { - finalize(); + release(); } catch (...) { diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 44ff20935af..81bd2067650 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -5,6 +5,13 @@ #include #include #include +#include + + +namespace CurrentMetrics +{ + extern const Metric TemporaryFilesUnknown; +} namespace DB { @@ -18,7 +25,6 @@ using TemporaryDataOnDiskPtr = std::unique_ptr; class TemporaryFileStream; using TemporaryFileStreamPtr = std::unique_ptr; - /* * Used to account amount of temporary data written to disk. * If limit is set, throws exception if limit is exceeded. @@ -65,15 +71,21 @@ protected: class TemporaryDataOnDisk : private TemporaryDataOnDiskScope { friend class TemporaryFileStream; /// to allow it to call `deltaAllocAndCheck` to account data + public: using TemporaryDataOnDiskScope::StatAtomic; explicit TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_) - : TemporaryDataOnDiskScope(std::move(parent_), 0) + : TemporaryDataOnDiskScope(std::move(parent_), /* limit_ = */ 0) + {} + + explicit TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_, CurrentMetrics::Value metric_scope) + : TemporaryDataOnDiskScope(std::move(parent_), /* limit_ = */ 0) + , current_metric_scope(metric_scope) {} /// If max_file_size > 0, then check that there's enough space on the disk and throw an exception in case of lack of free space - TemporaryFileStream & createStream(const Block & header, CurrentMetrics::Value metric_scope, size_t max_file_size = 0); + TemporaryFileStream & createStream(const Block & header, size_t max_file_size = 0); std::vector getStreams() const; bool empty() const; @@ -83,6 +95,8 @@ public: private: mutable std::mutex mutex; std::vector streams TSA_GUARDED_BY(mutex); + + typename CurrentMetrics::Value current_metric_scope = CurrentMetrics::TemporaryFilesUnknown; }; /* @@ -99,6 +113,7 @@ public: /// Non-atomic because we don't allow to `read` or `write` into single file from multiple threads size_t compressed_size = 0; size_t uncompressed_size = 0; + size_t num_rows = 0; }; TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_); @@ -109,17 +124,19 @@ public: Block read(); - const String & path() const { return file->getPath(); } + const String path() const { return file->getPath(); } Block getHeader() const { return header; } + /// Read finished and file released + bool isEof() const; + ~TemporaryFileStream(); private: void updateAllocAndCheck(); - /// Finalize everything, close reader and writer, delete file - void finalize(); - bool isFinalized() const; + /// Release everything, close reader and writer, delete file + void release(); TemporaryDataOnDisk * parent; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index d5066f5987c..bb58eff2f13 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -9,6 +9,12 @@ #include #include + +namespace CurrentMetrics +{ + extern const Metric TemporaryFilesForSort; +} + namespace DB { @@ -197,7 +203,7 @@ void SortingStep::mergeSorting(QueryPipelineBuilder & pipeline, const SortDescri max_bytes_before_remerge / pipeline.getNumStreams(), remerge_lowered_memory_bytes_ratio, max_bytes_before_external_sort, - std::make_unique(tmp_data), + std::make_unique(tmp_data, CurrentMetrics::TemporaryFilesForSort), min_free_disk_space); }); } diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index c0717f6810e..b039109c3f5 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -22,10 +22,6 @@ namespace ProfileEvents extern const Event ExternalProcessingUncompressedBytesTotal; } -namespace CurrentMetrics -{ - extern const Metric TemporaryFilesForSort; -} namespace DB { @@ -171,7 +167,7 @@ void MergeSortingTransform::consume(Chunk chunk) { /// If there's less free disk space than reserve_size, an exception will be thrown size_t reserve_size = sum_bytes_in_blocks + min_free_disk_space; - auto & tmp_stream = tmp_data->createStream(header_without_constants, CurrentMetrics::TemporaryFilesForSort, reserve_size); + auto & tmp_stream = tmp_data->createStream(header_without_constants, reserve_size); merge_sorter = std::make_unique(header_without_constants, std::move(chunks), description, max_merged_block_size, limit); auto current_processor = std::make_shared(header_without_constants, tmp_stream, log); From 338f41913c04d0c81394c912399a58174fcf2e60 Mon Sep 17 00:00:00 2001 From: lhuang09287750 Date: Tue, 20 Sep 2022 03:26:43 +0000 Subject: [PATCH 238/266] add a function to process floating point literal comparing with decimal type in IN opertor --- src/Interpreters/convertFieldToType.cpp | 19 ++++++++++++ ...ecimal_in_floating_point_literal.reference | 20 ++++++++++++ ...2428_decimal_in_floating_point_literal.sql | 31 +++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference create mode 100644 tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 4e7562ef451..762908c0de7 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -109,6 +109,22 @@ Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & return DecimalField(value, type.getScale()); } +template +Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & type) +{ + From dValue = from.get(); + if (!type.canStoreWhole(dValue)) + throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + std::stringstream stream; + stream<, DataTypeDecimal>(dValue, fromScale); + return DecimalField(scaled_value, fromScale); +} + template Field convertDecimalType(const Field & from, const To & type) { @@ -135,6 +151,9 @@ Field convertDecimalType(const Field & from, const To & type) if (from.getType() == Field::Types::Decimal128) return convertDecimalToDecimalType(from, type); + if (from.getType() == Field::Types::Float64) + return convertFloatToDecimalType(from, type); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in IN or VALUES section. Expected: {}. Got: {}", type.getName(), from.getType()); } diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference new file mode 100644 index 00000000000..3c58c27aef9 --- /dev/null +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference @@ -0,0 +1,20 @@ +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql new file mode 100644 index 00000000000..aeb0f9a2fba --- /dev/null +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql @@ -0,0 +1,31 @@ +SELECT toDecimal32(1.555,3) IN (1.5551); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555); +SELECT toDecimal32(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal32(1.555,3) IN (1.550,1.5); + +SELECT toDecimal64(1.555,3) IN (1.5551); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555); +SELECT toDecimal64(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal64(1.555,3) IN (1.550,1.5); + +SELECT toDecimal128(1.555,3) IN (1.5551); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555); +SELECT toDecimal128(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal128(1.555,3) IN (1.550,1.5); + +SELECT toDecimal256(1.555,3) IN (1.5551); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555); +SELECT toDecimal256(1.555,3) IN (1.5551,1.555000); +SELECT toDecimal256(1.555,3) IN (1.550,1.5); + +DROP TABLE IF EXISTS decimal_in_float_test; + +CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory; +INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); + +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); +SELECT count() == 1 FROM decimal_in_float_test WHERE a NOT IN (33.333); +SELECT count() == 1 FROM decimal_in_float_test WHERE b IN (44.44); +SELECT count() == 1 FROM decimal_in_float_test WHERE b NOT IN (44.4,44.444); + +DROP TABLE IF EXISTS decimal_in_float_test; From cab0291873cfbc8455d817ff3fc11ec015fa85eb Mon Sep 17 00:00:00 2001 From: lhuang09287750 Date: Wed, 21 Sep 2022 03:17:02 +0000 Subject: [PATCH 239/266] for checkstyle:use a internal function to get field string ,instead of std::stringstream --- src/Interpreters/convertFieldToType.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 762908c0de7..6f161c0bd7a 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include @@ -116,13 +118,11 @@ Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & t if (!type.canStoreWhole(dValue)) throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - std::stringstream stream; - stream<, DataTypeDecimal>(dValue, fromScale); - return DecimalField(scaled_value, fromScale); + auto scaledValue = convertToDecimal, DataTypeDecimal>(dValue, fromScale); + return DecimalField(scaledValue, fromScale); } template From 4b0ac268e867b30beb849cb4762126340c507295 Mon Sep 17 00:00:00 2001 From: lhuang09287750 Date: Wed, 21 Sep 2022 03:29:20 +0000 Subject: [PATCH 240/266] delete an unusable include --- src/Interpreters/convertFieldToType.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 6f161c0bd7a..b6c72340472 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -3,8 +3,6 @@ #include #include -#include - #include #include #include From 215acf5e5bc0f77e392c6dca7d002dcc5889beb0 Mon Sep 17 00:00:00 2001 From: lhuang09287750 Date: Sun, 9 Oct 2022 05:30:01 +0000 Subject: [PATCH 241/266] check the result of conversion for decimal IN float --- src/Interpreters/ActionsVisitor.cpp | 24 +++++++++++++++++-- src/Interpreters/convertFieldToType.cpp | 13 +++++----- ...ecimal_in_floating_point_literal.reference | 1 + 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 54faf37f236..980207c7f9c 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -91,15 +91,35 @@ static size_t getTypeDepth(const DataTypePtr & type) return 0; } +template +static bool decimalEqualsFloat(Field field, Float64 float_value) +{ + auto decimal_field = field.get>(); + auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); + return decimal_to_float == float_value; +} + /// Applies stricter rules than convertFieldToType: /// Doesn't allow : -/// - loss of precision with `Decimals` +/// - loss of precision converting to Decimal static bool convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type, Field & result_value) { result_value = convertFieldToType(from_value, to_type); if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) return applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value); - + if (from_value.getType() == Field::Types::Float64 && Field::isDecimal(result_value.getType())) + { + /// Convert back to Float64 and compare + if (result_value.getType() == Field::Types::Decimal32) + return decimalEqualsFloat(result_value, from_value.get()); + if (result_value.getType() == Field::Types::Decimal64) + return decimalEqualsFloat(result_value, from_value.get()); + if (result_value.getType() == Field::Types::Decimal128) + return decimalEqualsFloat(result_value, from_value.get()); + if (result_value.getType() == Field::Types::Decimal256) + return decimalEqualsFloat(result_value, from_value.get()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); + } return true; } diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index b6c72340472..55156cde7be 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -112,15 +112,16 @@ Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & template Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & type) { - From dValue = from.get(); - if (!type.canStoreWhole(dValue)) + From value = from.get(); + if (!type.canStoreWhole(value)) throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - String sValue = convertFieldToString(from); - int fromScale = sValue.length()- sValue.find('.') - 1; + //String sValue = convertFieldToString(from); + //int fromScale = sValue.length()- sValue.find('.') - 1; + UInt32 scale = type.getScale(); - auto scaledValue = convertToDecimal, DataTypeDecimal>(dValue, fromScale); - return DecimalField(scaledValue, fromScale); + auto scaled_value = convertToDecimal, DataTypeDecimal>(value, scale); + return DecimalField(scaled_value, scale); } template diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference index 3c58c27aef9..378b7d8cec4 100644 --- a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.reference @@ -18,3 +18,4 @@ 1 1 1 +1 From 23deba59cf06d3424176da1da23c72aac11a25a0 Mon Sep 17 00:00:00 2001 From: lhuang09287750 Date: Mon, 10 Oct 2022 01:52:39 +0000 Subject: [PATCH 242/266] add a case in test sql --- .../0_stateless/02428_decimal_in_floating_point_literal.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql index aeb0f9a2fba..579f468ee54 100644 --- a/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql +++ b/tests/queries/0_stateless/02428_decimal_in_floating_point_literal.sql @@ -24,6 +24,7 @@ CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) EN INSERT INTO decimal_in_float_test VALUES ('33', '44.44'); SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33); +SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0); SELECT count() == 1 FROM decimal_in_float_test WHERE a NOT IN (33.333); SELECT count() == 1 FROM decimal_in_float_test WHERE b IN (44.44); SELECT count() == 1 FROM decimal_in_float_test WHERE b NOT IN (44.4,44.444); From 9cb2052c7f881e8a9ecbe39b77d6eebff7feee89 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 11 Oct 2022 13:36:25 +0200 Subject: [PATCH 243/266] Consider Poco::Net::HostNotFoundException as ZCONNECTIONLOSS. --- src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 55b793c2a70..347d8c53c77 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -97,7 +97,7 @@ void ZooKeeper::init(ZooKeeperArgs args_) if (dns_error) throw KeeperException("Cannot resolve any of provided ZooKeeper hosts due to DNS error", Coordination::Error::ZCONNECTIONLOSS); else - throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS); + throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZCONNECTIONLOSS); } impl = std::make_unique(nodes, args, zk_log); From ea47ab1400913c98ecc2e67503b69ccc50e42599 Mon Sep 17 00:00:00 2001 From: nvartolomei Date: Wed, 12 Oct 2022 18:12:25 +0100 Subject: [PATCH 244/266] assert unused value in test_replicated_merge_tree_compatibility --- .../test_replicated_merge_tree_compatibility/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py index eb2b14ffb1a..68f2776e955 100644 --- a/tests/integration/test_replicated_merge_tree_compatibility/test.py +++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py @@ -73,4 +73,4 @@ def test_replicated_merge_tree_defaults_compatibility(started_cluster): node2.restart_with_latest_version() node1.query(create_query.format(replica=1)) - node1.query("EXISTS TABLE test.table") == "1\n" + assert node1.query("EXISTS TABLE test.table") == "1\n" From 5b6be92042dc1ec200f2e50a372b9e042c0503db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 13 Oct 2022 05:31:50 +0300 Subject: [PATCH 245/266] Update build.md --- docs/en/development/build.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 8712aa3e2bc..f397dc0d037 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -38,13 +38,13 @@ For other Linux distribution - check the availability of the [prebuild packages] #### Use the latest clang for Builds ``` bash -export CC=clang-14 -export CXX=clang++-14 +export CC=clang-15 +export CXX=clang++-15 ``` -In this example we use version 14 that is the latest as of Feb 2022. +In this example we use version 15 that is the latest as of Sept 2022. -Gcc can also be used though it is discouraged. +Gcc cannot be used. ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} From 6bbbbda868a12de303f1df09fcb386c2f3ca98ea Mon Sep 17 00:00:00 2001 From: Dom Del Nano Date: Wed, 12 Oct 2022 21:46:51 -0700 Subject: [PATCH 246/266] Update cctz to pull in upstream 2022e changes --- contrib/cctz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz b/contrib/cctz index 49c656c62fb..05ec08ce61e 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 49c656c62fbd36a1bc20d64c476853bdb7cf7bb9 +Subproject commit 05ec08ce61e4b5c44692cc2f1ce4b6d8596679bf From 56ef17e083d59a2a9800cbee350cb9ddb0c830a9 Mon Sep 17 00:00:00 2001 From: zimv <472953586@qq.com> Date: Thu, 13 Oct 2022 14:57:35 +0800 Subject: [PATCH 247/266] Docs: Add "node-clickhouse-orm" to the client-libraries.md --- docs/en/interfaces/third-party/client-libraries.md | 1 + docs/ru/interfaces/third-party/client-libraries.md | 1 + docs/zh/interfaces/third-party/client-libraries.md | 3 +++ 3 files changed, 5 insertions(+) diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index e085566aa7e..c26532c98cb 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -41,6 +41,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnโ€™t don - [node-clickhouse](https://github.com/apla/node-clickhouse) - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index ce9f94d5d74..b000208b53b 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -34,6 +34,7 @@ sidebar_label: "ะšะปะธะตะฝั‚ัะบะธะต ะฑะธะฑะปะธะพั‚ะตะบะธ ะพั‚ ัั‚ะพั€ะพะฝะฝะธ - [node-clickhouse](https://github.com/apla/node-clickhouse) - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md index d4959e37668..1b7bff02b1a 100644 --- a/docs/zh/interfaces/third-party/client-libraries.md +++ b/docs/zh/interfaces/third-party/client-libraries.md @@ -35,6 +35,9 @@ Yandex**ๆฒกๆœ‰**็ปดๆŠคไธ‹้ขๅˆ—ๅ‡บ็š„ๅบ“๏ผŒไนŸๆฒกๆœ‰ๅš่ฟ‡ไปปไฝ•ๅนฟๆณ›็š„ๆต‹่ฏ• - NodeJs - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - [node-clickhouse](https://github.com/apla/node-clickhouse) + - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) + - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) From 2e59d671fc9d59c9684ca0e202b9723a2c1b94c5 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 24 Sep 2022 23:24:39 +0200 Subject: [PATCH 248/266] Split UserDefinedSQLObjectsLoader to interface and implementation. --- programs/local/LocalServer.cpp | 16 +- programs/server/Server.cpp | 22 +- src/CMakeLists.txt | 1 + ...alUserDefinedExecutableFunctionsLoader.cpp | 4 +- ...rnalUserDefinedExecutableFunctionsLoader.h | 2 +- .../IUserDefinedSQLObjectsLoader.h | 47 +++ .../UserDefinedExecutableFunction.cpp | 0 .../UserDefinedExecutableFunction.h | 0 .../UserDefinedExecutableFunctionFactory.cpp | 2 +- .../UserDefinedExecutableFunctionFactory.h | 0 .../UserDefinedSQLFunctionFactory.cpp | 301 ++++++++++++++++++ .../UserDefinedSQLFunctionFactory.h | 70 ++++ .../UserDefinedSQLFunctionVisitor.cpp | 2 +- .../UserDefinedSQLFunctionVisitor.h | 0 .../UserDefined/UserDefinedSQLObjectType.h | 12 + .../UserDefinedSQLObjectsBackup.cpp | 103 ++++++ .../UserDefined/UserDefinedSQLObjectsBackup.h | 25 ++ .../UserDefinedSQLObjectsLoaderFromDisk.cpp | 265 +++++++++++++++ .../UserDefinedSQLObjectsLoaderFromDisk.h | 46 +++ .../createUserDefinedSQLObjectsLoader.cpp | 21 ++ .../createUserDefinedSQLObjectsLoader.h | 12 + src/Interpreters/ActionsVisitor.cpp | 2 +- src/Interpreters/Context.cpp | 45 +-- src/Interpreters/Context.h | 16 +- .../InterpreterCreateFunctionQuery.cpp | 84 +---- .../InterpreterCreateFunctionQuery.h | 16 +- .../InterpreterDropFunctionQuery.cpp | 21 +- src/Interpreters/InterpreterFactory.cpp | 2 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 5 +- .../UserDefinedSQLFunctionFactory.cpp | 168 ---------- .../UserDefinedSQLFunctionFactory.h | 54 ---- .../UserDefinedSQLObjectsLoader.cpp | 184 ----------- .../UserDefinedSQLObjectsLoader.h | 37 --- .../System/StorageSystemFunctions.cpp | 73 +---- 36 files changed, 995 insertions(+), 667 deletions(-) rename src/{Interpreters => Functions/UserDefined}/ExternalUserDefinedExecutableFunctionsLoader.cpp (98%) rename src/{Interpreters => Functions/UserDefined}/ExternalUserDefinedExecutableFunctionsLoader.h (94%) create mode 100644 src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h rename src/{Interpreters => Functions/UserDefined}/UserDefinedExecutableFunction.cpp (100%) rename src/{Interpreters => Functions/UserDefined}/UserDefinedExecutableFunction.h (100%) rename src/{Interpreters => Functions/UserDefined}/UserDefinedExecutableFunctionFactory.cpp (99%) rename src/{Interpreters => Functions/UserDefined}/UserDefinedExecutableFunctionFactory.h (100%) create mode 100644 src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp create mode 100644 src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h rename src/{Interpreters => Functions/UserDefined}/UserDefinedSQLFunctionVisitor.cpp (98%) rename src/{Interpreters => Functions/UserDefined}/UserDefinedSQLFunctionVisitor.h (100%) create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectType.h create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp create mode 100644 src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h create mode 100644 src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp create mode 100644 src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h delete mode 100644 src/Interpreters/UserDefinedSQLFunctionFactory.cpp delete mode 100644 src/Interpreters/UserDefinedSQLFunctionFactory.h delete mode 100644 src/Interpreters/UserDefinedSQLObjectsLoader.cpp delete mode 100644 src/Interpreters/UserDefinedSQLObjectsLoader.h diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2b9d819f5eb..a3f7ed8300f 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -32,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -602,8 +602,6 @@ void LocalServer::processConfig() global_context->setCurrentDatabase(default_database); applyCmdOptions(global_context); - bool enable_objects_loader = false; - if (config().has("path")) { String path = global_context->getPath(); @@ -611,12 +609,6 @@ void LocalServer::processConfig() /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); - LOG_DEBUG(log, "Loading user defined objects from {}", path); - Poco::File(path + "user_defined/").createDirectories(); - UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); - enable_objects_loader = true; - LOG_DEBUG(log, "Loaded user defined objects."); - LOG_DEBUG(log, "Loading metadata from {}", path); loadMetadataSystem(global_context); attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); @@ -630,6 +622,9 @@ void LocalServer::processConfig() DatabaseCatalog::instance().loadDatabases(); } + /// For ClickHouse local if path is not set the loader will be disabled. + global_context->getUserDefinedSQLObjectsLoader().loadObjects(); + LOG_DEBUG(log, "Loaded metadata."); } else if (!config().has("no-system-tables")) @@ -639,9 +634,6 @@ void LocalServer::processConfig() attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } - /// Persist SQL user defined objects only if user_defined folder was created - UserDefinedSQLObjectsLoader::instance().enable(enable_objects_loader); - server_display_name = config().getString("display_name", getFQDNOrHostName()); prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) "); std::map prompt_substitutions{{"display_name", server_display_name}}; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8a0ce75ca70..52043c1e86f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -62,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -1007,12 +1007,6 @@ int Server::main(const std::vector & /*args*/) fs::create_directories(user_scripts_path); } - { - std::string user_defined_path = config().getString("user_defined_path", path / "user_defined/"); - global_context->setUserDefinedPath(user_defined_path); - fs::create_directories(user_defined_path); - } - /// top_level_domains_lists { const std::string & top_level_domains_path = config().getString("top_level_domains_path", path / "top_level_domains/"); @@ -1551,18 +1545,6 @@ int Server::main(const std::vector & /*args*/) /// system logs may copy global context. global_context->setCurrentDatabaseNameInGlobalContext(default_database); - LOG_INFO(log, "Loading user defined objects from {}", path_str); - try - { - UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); - } - catch (...) - { - tryLogCurrentException(log, "Caught exception while loading user defined objects"); - throw; - } - LOG_DEBUG(log, "Loaded user defined objects"); - LOG_INFO(log, "Loading metadata from {}", path_str); try @@ -1590,6 +1572,8 @@ int Server::main(const std::vector & /*args*/) database_catalog.loadDatabases(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); + /// Load user-defined SQL functions. + global_context->getUserDefinedSQLObjectsLoader().loadObjects(); } catch (...) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3dc42746d67..4c42f29b0ea 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -284,6 +284,7 @@ add_object_library(clickhouse_processors_ttl Processors/TTL) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) +add_object_library(clickhouse_user_defined_functions Functions/UserDefined) if (TARGET ch_contrib::nuraft) add_object_library(clickhouse_coordination Coordination) diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp similarity index 98% rename from src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp rename to src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index 8c7220a85da..d4ecbf66987 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -5,8 +5,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h similarity index 94% rename from src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h rename to src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h index 4d4843e8677..1a62175eb0c 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h b/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h new file mode 100644 index 00000000000..4c7850951b5 --- /dev/null +++ b/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h @@ -0,0 +1,47 @@ +#pragma once + +#include + + +namespace DB +{ +class IAST; +struct Settings; +enum class UserDefinedSQLObjectType; + +/// Interface for a loader of user-defined SQL objects. +/// Implementations: UserDefinedSQLLoaderFromDisk, UserDefinedSQLLoaderFromZooKeeper +class IUserDefinedSQLObjectsLoader +{ +public: + virtual ~IUserDefinedSQLObjectsLoader() = default; + + /// Whether this loader can replicate SQL objects to another node. + virtual bool isReplicated() const { return false; } + virtual String getReplicationID() const { return ""; } + + /// Loads all objects. Can be called once - if objects are already loaded the function does nothing. + virtual void loadObjects() = 0; + + /// Stops watching. + virtual void stopWatching() {} + + /// Immediately reloads all objects, throws an exception if failed. + virtual void reloadObjects() = 0; + + /// Immediately reloads a specified object only. + virtual void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) = 0; + + /// Stores an object (must be called only by UserDefinedSQLFunctionFactory::registerFunction). + virtual bool storeObject( + UserDefinedSQLObjectType object_type, + const String & object_name, + const IAST & create_object_query, + bool throw_if_exists, + bool replace_if_exists, + const Settings & settings) = 0; + + /// Removes an object (must be called only by UserDefinedSQLFunctionFactory::unregisterFunction). + virtual bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) = 0; +}; +} diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Functions/UserDefined/UserDefinedExecutableFunction.cpp similarity index 100% rename from src/Interpreters/UserDefinedExecutableFunction.cpp rename to src/Functions/UserDefined/UserDefinedExecutableFunction.cpp diff --git a/src/Interpreters/UserDefinedExecutableFunction.h b/src/Functions/UserDefined/UserDefinedExecutableFunction.h similarity index 100% rename from src/Interpreters/UserDefinedExecutableFunction.h rename to src/Functions/UserDefined/UserDefinedExecutableFunction.h diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.cpp similarity index 99% rename from src/Interpreters/UserDefinedExecutableFunctionFactory.cpp rename to src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.cpp index 18784609397..3f3cfc4c8e3 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.cpp @@ -12,9 +12,9 @@ #include #include +#include #include #include -#include #include #include diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.h b/src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.h similarity index 100% rename from src/Interpreters/UserDefinedExecutableFunctionFactory.h rename to src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.h diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp new file mode 100644 index 00000000000..622854b3508 --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -0,0 +1,301 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FUNCTION_ALREADY_EXISTS; + extern const int UNKNOWN_FUNCTION; + extern const int CANNOT_DROP_FUNCTION; + extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; + extern const int UNSUPPORTED_METHOD; +} + + +namespace +{ + void validateFunctionRecursiveness(const IAST & node, const String & function_to_create) + { + for (const auto & child : node.children) + { + auto function_name_opt = tryGetFunctionName(child); + if (function_name_opt && function_name_opt.value() == function_to_create) + throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function"); + + validateFunctionRecursiveness(*child, function_to_create); + } + } + + void validateFunction(ASTPtr function, const String & name) + { + ASTFunction * lambda_function = function->as(); + + if (!lambda_function) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected function, got: {}", function->formatForErrorMessage()); + + auto & lambda_function_expression_list = lambda_function->arguments->children; + + if (lambda_function_expression_list.size() != 2) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have arguments and body"); + + const ASTFunction * tuple_function_arguments = lambda_function_expression_list[0]->as(); + + if (!tuple_function_arguments || !tuple_function_arguments->arguments) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid arguments"); + + std::unordered_set arguments; + + for (const auto & argument : tuple_function_arguments->arguments->children) + { + const auto * argument_identifier = argument->as(); + + if (!argument_identifier) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda argument must be identifier"); + + const auto & argument_name = argument_identifier->name(); + auto [_, inserted] = arguments.insert(argument_name); + if (!inserted) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name); + } + + ASTPtr function_body = lambda_function_expression_list[1]; + if (!function_body) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid function body"); + + validateFunctionRecursiveness(*function_body, name); + } + + ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) + { + auto ptr = create_function_query.clone(); + auto & res = typeid_cast(*ptr); + res.if_not_exists = false; + res.or_replace = false; + FunctionNameNormalizer().visit(res.function_core.get()); + return ptr; + } +} + + +UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() +{ + static UserDefinedSQLFunctionFactory result; + return result; +} + +void UserDefinedSQLFunctionFactory::checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query) +{ + if (FunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); + + if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); + + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name); + + validateFunction(assert_cast(create_function_query).function_core, function_name); +} + +void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & context, const String & function_name) +{ + if (FunctionFactory::instance().hasNameOrAlias(function_name) || + AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name); + + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name); +} + +bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists) +{ + checkCanBeRegistered(context, function_name, *create_function_query); + create_function_query = normalizeCreateFunctionQuery(*create_function_query); + + std::lock_guard lock{mutex}; + auto it = function_name_to_create_query_map.find(function_name); + if (it != function_name_to_create_query_map.end()) + { + if (throw_if_exists) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", function_name); + else if (!replace_if_exists) + return false; + } + + try + { + auto & loader = context->getUserDefinedSQLObjectsLoader(); + bool stored = loader.storeObject(UserDefinedSQLObjectType::Function, function_name, *create_function_query, throw_if_exists, replace_if_exists, context->getSettingsRef()); + if (!stored) + return false; + } + catch (Exception & exception) + { + exception.addMessage(fmt::format("while storing user defined function {}", backQuote(function_name))); + throw; + } + + function_name_to_create_query_map[function_name] = create_function_query; + return true; +} + +bool UserDefinedSQLFunctionFactory::unregisterFunction(const ContextMutablePtr & context, const String & function_name, bool throw_if_not_exists) +{ + checkCanBeUnregistered(context, function_name); + + std::lock_guard lock(mutex); + auto it = function_name_to_create_query_map.find(function_name); + if (it == function_name_to_create_query_map.end()) + { + if (throw_if_not_exists) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", function_name); + else + return false; + } + + try + { + auto & loader = context->getUserDefinedSQLObjectsLoader(); + bool removed = loader.removeObject(UserDefinedSQLObjectType::Function, function_name, throw_if_not_exists); + if (!removed) + return false; + } + catch (Exception & exception) + { + exception.addMessage(fmt::format("while removing user defined function {}", backQuote(function_name))); + throw; + } + + function_name_to_create_query_map.erase(function_name); + return true; +} + +ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const +{ + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query_map.find(function_name); + if (it == function_name_to_create_query_map.end()) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, + "The function name '{}' is not registered", + function_name); + + return it->second; +} + +ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const +{ + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query_map.find(function_name); + if (it == function_name_to_create_query_map.end()) + return nullptr; + + return it->second; +} + +bool UserDefinedSQLFunctionFactory::has(const String & function_name) const +{ + return tryGet(function_name) != nullptr; +} + +std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const +{ + std::vector registered_names; + + std::lock_guard lock(mutex); + registered_names.reserve(function_name_to_create_query_map.size()); + + for (const auto & [name, _] : function_name_to_create_query_map) + registered_names.emplace_back(name); + + return registered_names; +} + +bool UserDefinedSQLFunctionFactory::empty() const +{ + std::lock_guard lock(mutex); + return function_name_to_create_query_map.empty(); +} + +void UserDefinedSQLFunctionFactory::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const +{ + backupUserDefinedSQLObjects(backup_entries_collector, data_path_in_backup, UserDefinedSQLObjectType::Function, getAllFunctions()); +} + +void UserDefinedSQLFunctionFactory::restore(RestorerFromBackup & restorer, const String & data_path_in_backup) +{ + auto restored_functions = restoreUserDefinedSQLObjects(restorer, data_path_in_backup, UserDefinedSQLObjectType::Function); + const auto & restore_settings = restorer.getRestoreSettings(); + bool throw_if_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreate); + bool replace_if_exists = (restore_settings.create_function == RestoreUDFCreationMode::kReplace); + auto context = restorer.getContext(); + for (const auto & [function_name, create_function_query] : restored_functions) + registerFunction(context, function_name, create_function_query, throw_if_exists, replace_if_exists); +} + +void UserDefinedSQLFunctionFactory::setAllFunctions(const std::vector> & new_functions) +{ + std::unordered_map normalized_functions; + for (const auto & [function_name, create_query] : new_functions) + normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query); + + std::lock_guard lock(mutex); + function_name_to_create_query_map = std::move(normalized_functions); +} + +std::vector> UserDefinedSQLFunctionFactory::getAllFunctions() const +{ + std::lock_guard lock{mutex}; + std::vector> all_functions; + all_functions.reserve(function_name_to_create_query_map.size()); + std::copy(function_name_to_create_query_map.begin(), function_name_to_create_query_map.end(), std::back_inserter(all_functions)); + return all_functions; +} + +void UserDefinedSQLFunctionFactory::setFunction(const String & function_name, const IAST & create_function_query) +{ + std::lock_guard lock(mutex); + function_name_to_create_query_map[function_name] = normalizeCreateFunctionQuery(create_function_query); +} + +void UserDefinedSQLFunctionFactory::removeFunction(const String & function_name) +{ + std::lock_guard lock(mutex); + function_name_to_create_query_map.erase(function_name); +} + +void UserDefinedSQLFunctionFactory::removeAllFunctionsExcept(const Strings & function_names_to_keep) +{ + boost::container::flat_set names_set_to_keep{function_names_to_keep.begin(), function_names_to_keep.end()}; + std::lock_guard lock(mutex); + for (auto it = function_name_to_create_query_map.begin(); it != function_name_to_create_query_map.end();) + { + auto current = it++; + if (!names_set_to_keep.contains(current->first)) + function_name_to_create_query_map.erase(current); + } +} + +std::unique_lock UserDefinedSQLFunctionFactory::getLock() const +{ + return std::unique_lock{mutex}; +} + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h new file mode 100644 index 00000000000..45196759d3b --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include + +#include + +#include +#include + + +namespace DB +{ +class BackupEntriesCollector; +class RestorerFromBackup; + +/// Factory for SQLUserDefinedFunctions +class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> +{ +public: + static UserDefinedSQLFunctionFactory & instance(); + + /// Register function for function_name in factory for specified create_function_query. + bool registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists); + + /// Unregister function for function_name. + bool unregisterFunction(const ContextMutablePtr & context, const String & function_name, bool throw_if_not_exists); + + /// Get function create query for function_name. If no function registered with function_name throws exception. + ASTPtr get(const String & function_name) const; + + /// Get function create query for function_name. If no function registered with function_name return nullptr. + ASTPtr tryGet(const String & function_name) const; + + /// Check if function with function_name registered. + bool has(const String & function_name) const; + + /// Get all user defined functions registered names. + std::vector getAllRegisteredNames() const override; + + /// Check whether any UDFs have been registered + bool empty() const; + + /// Makes backup entries for all user-defined SQL functions. + void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const; + + /// Restores user-defined SQL functions from the backup. + void restore(RestorerFromBackup & restorer, const String & data_path_in_backup); + +private: + friend class UserDefinedSQLObjectsLoaderFromDisk; + friend class UserDefinedSQLObjectsLoaderFromZooKeeper; + + /// Checks that a specified function can be registered, throws an exception if not. + static void checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query); + static void checkCanBeUnregistered(const ContextPtr & context, const String & function_name); + + /// The following functions must be called only by the loader. + void setAllFunctions(const std::vector> & new_functions); + std::vector> getAllFunctions() const; + void setFunction(const String & function_name, const IAST & create_function_query); + void removeFunction(const String & function_name); + void removeAllFunctionsExcept(const Strings & function_names_to_keep); + std::unique_lock getLock() const; + + std::unordered_map function_name_to_create_query_map; + mutable std::recursive_mutex mutex; +}; + +} diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp similarity index 98% rename from src/Interpreters/UserDefinedSQLFunctionVisitor.cpp rename to src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp index 1adb3d5819a..9bb0abc6369 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.h b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h similarity index 100% rename from src/Interpreters/UserDefinedSQLFunctionVisitor.h rename to src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectType.h b/src/Functions/UserDefined/UserDefinedSQLObjectType.h new file mode 100644 index 00000000000..f7e6fff5cad --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectType.h @@ -0,0 +1,12 @@ +#pragma once + + +namespace DB +{ + +enum class UserDefinedSQLObjectType +{ + Function +}; + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp new file mode 100644 index 00000000000..60f0219e92d --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp @@ -0,0 +1,103 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_RESTORE_TABLE; +} + +void backupUserDefinedSQLObjects( + BackupEntriesCollector & backup_entries_collector, + const String & data_path_in_backup, + UserDefinedSQLObjectType /* object_type */, + const std::vector> & objects) +{ + std::vector> backup_entries; + backup_entries.reserve(objects.size()); + for (const auto & [function_name, create_function_query] : objects) + backup_entries.emplace_back( + escapeForFileName(function_name) + ".sql", std::make_shared(queryToString(create_function_query))); + + fs::path data_path_in_backup_fs{data_path_in_backup}; + for (const auto & entry : backup_entries) + backup_entries_collector.addBackupEntry(data_path_in_backup_fs / entry.first, entry.second); +} + + +std::vector> +restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type) +{ + auto context = restorer.getContext(); + auto backup = restorer.getBackup(); + fs::path data_path_in_backup_fs{data_path_in_backup}; + + Strings filenames = backup->listFiles(data_path_in_backup); + if (filenames.empty()) + return {}; /// Nothing to restore. + + for (const auto & filename : filenames) + { + if (!filename.ends_with(".sql")) + { + throw Exception( + ErrorCodes::CANNOT_RESTORE_TABLE, + "Cannot restore user-defined SQL objects: File name {} doesn't have the extension .sql", + String{data_path_in_backup_fs / filename}); + } + } + + std::vector> res; + + for (const auto & filename : filenames) + { + String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql")); + String function_name = unescapeForFileName(escaped_function_name); + + String filepath = data_path_in_backup_fs / filename; + auto backup_entry = backup->readFile(filepath); + auto in = backup_entry->getReadBuffer(); + String statement_def; + readStringUntilEOF(statement_def, *in); + + ASTPtr ast; + + switch (object_type) + { + case UserDefinedSQLObjectType::Function: + { + ParserCreateFunctionQuery parser; + ast = parseQuery( + parser, + statement_def.data(), + statement_def.data() + statement_def.size(), + "in file " + filepath + " from backup " + backup->getName(), + 0, + context->getSettingsRef().max_parser_depth); + break; + } + } + + res.emplace_back(std::move(function_name), ast); + } + + return res; +} + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h new file mode 100644 index 00000000000..a1e970d8af5 --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class BackupEntriesCollector; +class RestorerFromBackup; +enum class UserDefinedSQLObjectType; +class IBackupEntry; +using BackupEntryPtr = std::shared_ptr; + +/// Makes backup entries to backup user-defined SQL objects. +void backupUserDefinedSQLObjects( + BackupEntriesCollector & backup_entries_collector, + const String & data_path_in_backup, + UserDefinedSQLObjectType object_type, + const std::vector> & objects); + +/// Restores user-defined SQL objects from the backup. +std::vector> +restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type); +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp new file mode 100644 index 00000000000..93466be54fb --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp @@ -0,0 +1,265 @@ +#include "Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h" + +#include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h" +#include "Functions/UserDefined/UserDefinedSQLObjectType.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DIRECTORY_DOESNT_EXIST; + extern const int FUNCTION_ALREADY_EXISTS; + extern const int UNKNOWN_FUNCTION; +} + + +namespace +{ + /// Converts a path to an absolute path and append it with a separator. + String makeDirectoryPathCanonical(const String & directory_path) + { + auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path); + if (canonical_directory_path.has_filename()) + canonical_directory_path += std::filesystem::path::preferred_separator; + return canonical_directory_path; + } +} + +UserDefinedSQLObjectsLoaderFromDisk::UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_) + : global_context(global_context_) + , dir_path{makeDirectoryPathCanonical(dir_path_)} + , log{&Poco::Logger::get("UserDefinedSQLObjectsLoaderFromDisk")} +{ + createDirectory(); +} + + +ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name) +{ + return tryLoadObject(object_type, object_name, getFilePath(object_type, object_name), /* check_file_exists= */ true); +} + + +ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & path, bool check_file_exists) +{ + LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(object_name), path); + + try + { + if (check_file_exists && !fs::exists(path)) + return nullptr; + + /// There is .sql file with user defined object creation statement. + ReadBufferFromFile in(path); + + String object_create_query; + readStringUntilEOF(object_create_query, in); + + switch (object_type) + { + case UserDefinedSQLObjectType::Function: + { + ParserCreateFunctionQuery parser; + ASTPtr ast = parseQuery( + parser, + object_create_query.data(), + object_create_query.data() + object_create_query.size(), + "", + 0, + global_context->getSettingsRef().max_parser_depth); + UserDefinedSQLFunctionFactory::checkCanBeRegistered(global_context, object_name, *ast); + return ast; + } + } + } + catch (...) + { + tryLogCurrentException(log, fmt::format("while loading user defined SQL object {} from path {}", backQuote(object_name), path)); + return nullptr; /// Failed to load this sql object, will ignore it + } +} + + +void UserDefinedSQLObjectsLoaderFromDisk::loadObjects() +{ + if (!objects_loaded) + loadObjectsImpl(); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::reloadObjects() +{ + loadObjectsImpl(); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::loadObjectsImpl() +{ + LOG_INFO(log, "Loading user defined objects from {}", dir_path); + createDirectory(); + + std::vector> function_names_and_queries; + + Poco::DirectoryIterator dir_end; + for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it) + { + if (it->isDirectory()) + continue; + + const String & file_name = it.name(); + if (!startsWith(file_name, "function_") || !endsWith(file_name, ".sql")) + continue; + + size_t prefix_length = strlen("function_"); + size_t suffix_length = strlen(".sql"); + String function_name = unescapeForFileName(file_name.substr(prefix_length, file_name.length() - prefix_length - suffix_length)); + + if (function_name.empty()) + continue; + + ASTPtr ast = tryLoadObject(UserDefinedSQLObjectType::Function, function_name, dir_path + it.name(), /* check_file_exists= */ false); + if (ast) + function_names_and_queries.emplace_back(function_name, ast); + } + + UserDefinedSQLFunctionFactory::instance().setAllFunctions(function_names_and_queries); + objects_loaded = true; + + LOG_DEBUG(log, "User defined objects loaded"); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) +{ + createDirectory(); + auto ast = tryLoadObject(object_type, object_name); + auto & factory = UserDefinedSQLFunctionFactory::instance(); + if (ast) + factory.setFunction(object_name, *ast); + else + factory.removeFunction(object_name); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::createDirectory() +{ + std::error_code create_dir_error_code; + fs::create_directories(dir_path, create_dir_error_code); + if (!fs::exists(dir_path) || !fs::is_directory(dir_path) || create_dir_error_code) + throw Exception("Couldn't create directory " + dir_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST); +} + + +bool UserDefinedSQLObjectsLoaderFromDisk::storeObject( + UserDefinedSQLObjectType object_type, + const String & object_name, + const IAST & create_object_query, + bool throw_if_exists, + bool replace_if_exists, + const Settings & settings) +{ + String file_path = getFilePath(object_type, object_name); + LOG_DEBUG(log, "Storing user-defined object {} to file {}", backQuote(object_name), file_path); + + if (fs::exists(file_path)) + { + if (throw_if_exists) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", object_name); + else if (!replace_if_exists) + return false; + } + + WriteBufferFromOwnString create_statement_buf; + formatAST(create_object_query, create_statement_buf, false); + writeChar('\n', create_statement_buf); + String create_statement = create_statement_buf.str(); + + String temp_file_path = file_path + ".tmp"; + + try + { + WriteBufferFromFile out(temp_file_path, create_statement.size()); + writeString(create_statement, out); + out.next(); + if (settings.fsync_metadata) + out.sync(); + out.close(); + + if (replace_if_exists) + fs::rename(temp_file_path, file_path); + else + renameNoReplace(temp_file_path, file_path); + } + catch (...) + { + fs::remove(temp_file_path); + throw; + } + + LOG_TRACE(log, "Object {} stored", backQuote(object_name)); + return true; +} + + +bool UserDefinedSQLObjectsLoaderFromDisk::removeObject( + UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) +{ + String file_path = getFilePath(object_type, object_name); + LOG_DEBUG(log, "Removing user defined object {} stored in file {}", backQuote(object_name), file_path); + + bool existed = fs::remove(file_path); + + if (!existed) + { + if (throw_if_not_exists) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", object_name); + else + return false; + } + + LOG_TRACE(log, "Object {} removed", backQuote(object_name)); + return true; +} + + +String UserDefinedSQLObjectsLoaderFromDisk::getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const +{ + String file_path; + switch (object_type) + { + case UserDefinedSQLObjectType::Function: + { + file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; + break; + } + } + return file_path; +} + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h new file mode 100644 index 00000000000..7b0bb291f42 --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Loads user-defined sql objects from a specified folder. +class UserDefinedSQLObjectsLoaderFromDisk : public IUserDefinedSQLObjectsLoader +{ +public: + UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_); + + void loadObjects() override; + + void reloadObjects() override; + + void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) override; + + bool storeObject( + UserDefinedSQLObjectType object_type, + const String & object_name, + const IAST & create_object_query, + bool throw_if_exists, + bool replace_if_exists, + const Settings & settings) override; + + bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) override; + +private: + void createDirectory(); + void loadObjectsImpl(); + ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name); + ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & file_path, bool check_file_exists); + String getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const; + + ContextPtr global_context; + String dir_path; + Poco::Logger * log; + std::atomic objects_loaded = false; +}; + +} diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp new file mode 100644 index 00000000000..9d0137328d1 --- /dev/null +++ b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +std::unique_ptr createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context) +{ + const auto & config = global_context->getConfigRef(); + String default_path = fs::path{global_context->getPath()} / "user_defined/"; + String path = config.getString("user_defined_path", default_path); + return std::make_unique(global_context, path); +} + +} diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h new file mode 100644 index 00000000000..b3a4623dba3 --- /dev/null +++ b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h @@ -0,0 +1,12 @@ +#pragma once + +#include + + +namespace DB +{ +class IUserDefinedSQLObjectsLoader; + +std::unique_ptr createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context); + +} diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 54faf37f236..58397fc4ada 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -52,7 +52,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d69878e6af0..4ab3261cb70 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -51,7 +51,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include @@ -180,7 +182,6 @@ struct ContextSharedPart : boost::noncopyable String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries. String user_scripts_path; /// Path to the directory with user provided scripts. - String user_defined_path; /// Path to the directory with user defined objects. ConfigurationPtr config; /// Global configuration settings. String tmp_path; /// Path to the temporary files that occur when processing the request. @@ -188,16 +189,18 @@ struct ContextSharedPart : boost::noncopyable mutable std::unique_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. mutable std::unique_ptr external_dictionaries_loader; - mutable std::unique_ptr external_user_defined_executable_functions_loader; scope_guard models_repository_guard; ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository = nullptr; scope_guard dictionaries_xmls; + mutable std::unique_ptr external_user_defined_executable_functions_loader; ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository = nullptr; scope_guard user_defined_executable_functions_xmls; + mutable std::unique_ptr user_defined_sql_objects_loader; + #if USE_NLP mutable std::optional synonyms_extensions; mutable std::optional lemmatizers; @@ -355,6 +358,8 @@ struct ContextSharedPart : boost::noncopyable external_dictionaries_loader->enablePeriodicUpdates(false); if (external_user_defined_executable_functions_loader) external_user_defined_executable_functions_loader->enablePeriodicUpdates(false); + if (user_defined_sql_objects_loader) + user_defined_sql_objects_loader->stopWatching(); Session::shutdownNamedSessions(); @@ -385,6 +390,7 @@ struct ContextSharedPart : boost::noncopyable std::unique_ptr delete_embedded_dictionaries; std::unique_ptr delete_external_dictionaries_loader; std::unique_ptr delete_external_user_defined_executable_functions_loader; + std::unique_ptr delete_user_defined_sql_objects_loader; std::unique_ptr delete_buffer_flush_schedule_pool; std::unique_ptr delete_schedule_pool; std::unique_ptr delete_distributed_schedule_pool; @@ -423,6 +429,7 @@ struct ContextSharedPart : boost::noncopyable delete_embedded_dictionaries = std::move(embedded_dictionaries); delete_external_dictionaries_loader = std::move(external_dictionaries_loader); delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader); + delete_user_defined_sql_objects_loader = std::move(user_defined_sql_objects_loader); delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool); delete_schedule_pool = std::move(schedule_pool); delete_distributed_schedule_pool = std::move(distributed_schedule_pool); @@ -450,6 +457,7 @@ struct ContextSharedPart : boost::noncopyable delete_embedded_dictionaries.reset(); delete_external_dictionaries_loader.reset(); delete_external_user_defined_executable_functions_loader.reset(); + delete_user_defined_sql_objects_loader.reset(); delete_ddl_worker.reset(); delete_buffer_flush_schedule_pool.reset(); delete_schedule_pool.reset(); @@ -592,12 +600,6 @@ String Context::getUserScriptsPath() const return shared->user_scripts_path; } -String Context::getUserDefinedPath() const -{ - auto lock = getLock(); - return shared->user_defined_path; -} - Strings Context::getWarnings() const { Strings common_warnings; @@ -643,9 +645,6 @@ void Context::setPath(const String & path) if (shared->user_scripts_path.empty()) shared->user_scripts_path = shared->path + "user_scripts/"; - - if (shared->user_defined_path.empty()) - shared->user_defined_path = shared->path + "user_defined/"; } VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name) @@ -700,12 +699,6 @@ void Context::setUserScriptsPath(const String & path) shared->user_scripts_path = path; } -void Context::setUserDefinedPath(const String & path) -{ - auto lock = getLock(); - shared->user_defined_path = path; -} - void Context::addWarningMessage(const String & msg) const { auto lock = getLock(); @@ -1539,6 +1532,22 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr shared->user_defined_executable_functions_xmls = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository)); } +const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() const +{ + auto lock = getLock(); + if (!shared->user_defined_sql_objects_loader) + shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); + return *shared->user_defined_sql_objects_loader; +} + +IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() +{ + auto lock = getLock(); + if (!shared->user_defined_sql_objects_loader) + shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); + return *shared->user_defined_sql_objects_loader; +} + #if USE_NLP SynonymsExtensions & Context::getSynonymsExtensions() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 67cf584d5a7..df94b416904 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -54,6 +54,7 @@ enum class RowPolicyFilterType; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalUserDefinedExecutableFunctionsLoader; +class IUserDefinedSQLObjectsLoader; class InterserverCredentials; using InterserverCredentialsPtr = std::shared_ptr; class InterserverIOHandler; @@ -430,7 +431,6 @@ public: String getUserFilesPath() const; String getDictionariesLibPath() const; String getUserScriptsPath() const; - String getUserDefinedPath() const; /// A list of warnings about server configuration to place in `system.warnings` table. Strings getWarnings() const; @@ -442,7 +442,6 @@ public: void setUserFilesPath(const String & path); void setDictionariesLibPath(const String & path); void setUserScriptsPath(const String & path); - void setUserDefinedPath(const String & path); void addWarningMessage(const String & msg) const; @@ -642,16 +641,19 @@ public: /// Returns the current constraints (can return null). std::shared_ptr getSettingsConstraintsAndCurrentProfiles() const; - const EmbeddedDictionaries & getEmbeddedDictionaries() const; const ExternalDictionariesLoader & getExternalDictionariesLoader() const; - const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const; - EmbeddedDictionaries & getEmbeddedDictionaries(); ExternalDictionariesLoader & getExternalDictionariesLoader(); ExternalDictionariesLoader & getExternalDictionariesLoaderUnlocked(); - ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader(); - ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); + const EmbeddedDictionaries & getEmbeddedDictionaries() const; + EmbeddedDictionaries & getEmbeddedDictionaries(); void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const; void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config); + + const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const; + ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader(); + ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); + const IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader() const; + IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader(); void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config); #if USE_NLP diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index dfd18ad28de..d56b5029e41 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -1,16 +1,11 @@ #include #include +#include +#include #include -#include -#include -#include -#include -#include #include #include -#include -#include namespace DB @@ -18,13 +13,11 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; - extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_QUERY; } BlockIO InterpreterCreateFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); ASTCreateFunctionQuery & create_function_query = query_ptr->as(); AccessRightsElements access_rights_elements; @@ -33,80 +26,27 @@ BlockIO InterpreterCreateFunctionQuery::execute() if (create_function_query.or_replace) access_rights_elements.emplace_back(AccessType::DROP_FUNCTION); + auto current_context = getContext(); + if (!create_function_query.cluster.empty()) { + if (current_context->getUserDefinedSQLObjectsLoader().isReplicated()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically"); + DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(query_ptr, current_context, params); } - auto current_context = getContext(); current_context->checkAccess(access_rights_elements); - auto & user_defined_function_factory = UserDefinedSQLFunctionFactory::instance(); - auto function_name = create_function_query.getFunctionName(); + bool throw_if_exists = !create_function_query.if_not_exists && !create_function_query.or_replace; + bool replace_if_exists = create_function_query.or_replace; - bool if_not_exists = create_function_query.if_not_exists; - bool replace = create_function_query.or_replace; - - create_function_query.if_not_exists = false; - create_function_query.or_replace = false; - - validateFunction(create_function_query.function_core, function_name); - user_defined_function_factory.registerFunction(current_context, function_name, query_ptr, replace, if_not_exists, persist_function); + UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, query_ptr, throw_if_exists, replace_if_exists); return {}; } -void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const String & name) -{ - ASTFunction * lambda_function = function->as(); - - if (!lambda_function) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected function, got: {}", function->formatForErrorMessage()); - - auto & lambda_function_expression_list = lambda_function->arguments->children; - - if (lambda_function_expression_list.size() != 2) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have arguments and body"); - - const ASTFunction * tuple_function_arguments = lambda_function_expression_list[0]->as(); - - if (!tuple_function_arguments || !tuple_function_arguments->arguments) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid arguments"); - - std::unordered_set arguments; - - for (const auto & argument : tuple_function_arguments->arguments->children) - { - const auto * argument_identifier = argument->as(); - - if (!argument_identifier) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda argument must be identifier"); - - const auto & argument_name = argument_identifier->name(); - auto [_, inserted] = arguments.insert(argument_name); - if (!inserted) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name); - } - - ASTPtr function_body = lambda_function_expression_list[1]; - if (!function_body) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid function body"); - - validateFunctionRecursiveness(function_body, name); -} - -void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create) -{ - for (const auto & child : node->children) - { - auto function_name_opt = tryGetFunctionName(child); - if (function_name_opt && function_name_opt.value() == function_to_create) - throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function"); - - validateFunctionRecursiveness(child, function_to_create); - } -} } diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.h b/src/Interpreters/InterpreterCreateFunctionQuery.h index a67fdb9605d..d5fedd5ca6b 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.h +++ b/src/Interpreters/InterpreterCreateFunctionQuery.h @@ -8,24 +8,18 @@ namespace DB class Context; -class InterpreterCreateFunctionQuery : public IInterpreter, WithContext +class InterpreterCreateFunctionQuery : public IInterpreter, WithMutableContext { public: - InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextPtr context_, bool persist_function_) - : WithContext(context_) - , query_ptr(query_ptr_) - , persist_function(persist_function_) {} + InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) + { + } BlockIO execute() override; - void setInternal(bool internal_); - private: - static void validateFunction(ASTPtr function, const String & name); - static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create); - ASTPtr query_ptr; - bool persist_function; }; } diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index bb2032f355a..df81ae661c7 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -1,17 +1,22 @@ #include #include +#include +#include #include #include #include -#include -#include #include namespace DB { +namespace ErrorCodes +{ + extern const int INCORRECT_QUERY; +} + BlockIO InterpreterDropFunctionQuery::execute() { FunctionNameNormalizer().visit(query_ptr.get()); @@ -20,17 +25,23 @@ BlockIO InterpreterDropFunctionQuery::execute() AccessRightsElements access_rights_elements; access_rights_elements.emplace_back(AccessType::DROP_FUNCTION); + auto current_context = getContext(); + if (!drop_function_query.cluster.empty()) { + if (current_context->getUserDefinedSQLObjectsLoader().isReplicated()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically"); + DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(query_ptr, current_context, params); } - auto current_context = getContext(); current_context->checkAccess(access_rights_elements); - UserDefinedSQLFunctionFactory::instance().unregisterFunction(current_context, drop_function_query.function_name, drop_function_query.if_exists); + bool throw_if_not_exists = !drop_function_query.if_exists; + + UserDefinedSQLFunctionFactory::instance().unregisterFunction(current_context, drop_function_query.function_name, throw_if_not_exists); return {}; } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 170f3c463b4..ca0a59c0c1a 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -296,7 +296,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else if (query->as()) { - return std::make_unique(query, context, true /*persist_function*/); + return std::make_unique(query, context); } else if (query->as()) { diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 56e87d6a4fb..e3c219c15d1 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 74f084df40b..e4301bad1e8 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -35,6 +34,7 @@ #include #include +#include #include #include diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index d31a1ca7a8a..0b8a01fc862 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -25,13 +25,14 @@ #include #include #include -#include -#include #include #include #include #include +#include +#include + #include #include #include diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp deleted file mode 100644 index 2f876f00cc3..00000000000 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "UserDefinedSQLFunctionFactory.h" - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FUNCTION_ALREADY_EXISTS; - extern const int UNKNOWN_FUNCTION; - extern const int CANNOT_DROP_FUNCTION; -} - -UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() -{ - static UserDefinedSQLFunctionFactory result; - return result; -} - -void UserDefinedSQLFunctionFactory::registerFunction(ContextPtr context, const String & function_name, ASTPtr create_function_query, bool replace, bool if_not_exists, bool persist) -{ - if (FunctionFactory::instance().hasNameOrAlias(function_name)) - { - if (if_not_exists) - return; - - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); - } - - if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) - { - if (if_not_exists) - return; - - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); - } - - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) - { - if (if_not_exists) - return; - - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name); - } - - std::lock_guard lock(mutex); - - auto [it, inserted] = function_name_to_create_query.emplace(function_name, create_function_query); - - if (!inserted) - { - if (if_not_exists) - return; - - if (replace) - it->second = create_function_query; - else - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, - "The function name '{}' is not unique", - function_name); - } - - if (persist) - { - try - { - UserDefinedSQLObjectsLoader::instance().storeObject(context, UserDefinedSQLObjectType::Function, function_name, *create_function_query, replace); - } - catch (Exception & exception) - { - function_name_to_create_query.erase(it); - exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name))); - throw; - } - } -} - -void UserDefinedSQLFunctionFactory::unregisterFunction(ContextPtr context, const String & function_name, bool if_exists) -{ - if (FunctionFactory::instance().hasNameOrAlias(function_name) || - AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) - throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name); - - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) - throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name); - - std::lock_guard lock(mutex); - - auto it = function_name_to_create_query.find(function_name); - if (it == function_name_to_create_query.end()) - { - if (if_exists) - return; - - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, - "The function name '{}' is not registered", - function_name); - } - - try - { - UserDefinedSQLObjectsLoader::instance().removeObject(context, UserDefinedSQLObjectType::Function, function_name); - } - catch (Exception & exception) - { - exception.addMessage(fmt::format("while removing user defined function {} from disk", backQuote(function_name))); - throw; - } - - function_name_to_create_query.erase(it); -} - -ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const -{ - std::lock_guard lock(mutex); - - auto it = function_name_to_create_query.find(function_name); - if (it == function_name_to_create_query.end()) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, - "The function name '{}' is not registered", - function_name); - - return it->second; -} - -ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const -{ - std::lock_guard lock(mutex); - - auto it = function_name_to_create_query.find(function_name); - if (it == function_name_to_create_query.end()) - return nullptr; - - return it->second; -} - -bool UserDefinedSQLFunctionFactory::has(const String & function_name) const -{ - return tryGet(function_name) != nullptr; -} - -std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const -{ - std::vector registered_names; - - std::lock_guard lock(mutex); - registered_names.reserve(function_name_to_create_query.size()); - - for (const auto & [name, _] : function_name_to_create_query) - registered_names.emplace_back(name); - - return registered_names; -} - -bool UserDefinedSQLFunctionFactory::empty() const -{ - std::lock_guard lock(mutex); - return function_name_to_create_query.empty(); -} -} diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h deleted file mode 100644 index db43bb7298e..00000000000 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include - -#include - -#include -#include - - -namespace DB -{ - -/// Factory for SQLUserDefinedFunctions -class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> -{ -public: - static UserDefinedSQLFunctionFactory & instance(); - - /** Register function for function_name in factory for specified create_function_query. - * If function exists and if_not_exists = false and replace = false throws exception. - * If replace = true and sql user defined function with function_name already exists replace it with create_function_query. - * If persist = true persist function on disk. - */ - void registerFunction(ContextPtr context, const String & function_name, ASTPtr create_function_query, bool replace, bool if_not_exists, bool persist); - - /** Unregister function for function_name. - * If if_exists = true then do not throw exception if function is not registered. - * If if_exists = false then throw exception if function is not registered. - */ - void unregisterFunction(ContextPtr context, const String & function_name, bool if_exists); - - /// Get function create query for function_name. If no function registered with function_name throws exception. - ASTPtr get(const String & function_name) const; - - /// Get function create query for function_name. If no function registered with function_name return nullptr. - ASTPtr tryGet(const String & function_name) const; - - /// Check if function with function_name registered. - bool has(const String & function_name) const; - - /// Get all user defined functions registered names. - std::vector getAllRegisteredNames() const override; - - /// Check whether any UDFs have been registered - bool empty() const; - -private: - std::unordered_map function_name_to_create_query; - mutable std::mutex mutex; -}; - -} diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp deleted file mode 100644 index c6f50fc4a0a..00000000000 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp +++ /dev/null @@ -1,184 +0,0 @@ -#include "UserDefinedSQLObjectsLoader.h" - -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int OBJECT_ALREADY_STORED_ON_DISK; - extern const int OBJECT_WAS_NOT_STORED_ON_DISK; -} - -UserDefinedSQLObjectsLoader & UserDefinedSQLObjectsLoader::instance() -{ - static UserDefinedSQLObjectsLoader ret; - return ret; -} - -UserDefinedSQLObjectsLoader::UserDefinedSQLObjectsLoader() - : log(&Poco::Logger::get("UserDefinedSQLObjectsLoader")) -{} - -void UserDefinedSQLObjectsLoader::loadUserDefinedObject(ContextPtr context, UserDefinedSQLObjectType object_type, std::string_view name, const String & path) -{ - auto name_ref = StringRef(name.data(), name.size()); - LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(name_ref), path); - - /// There is .sql file with user defined object creation statement. - ReadBufferFromFile in(path); - - String object_create_query; - readStringUntilEOF(object_create_query, in); - - try - { - switch (object_type) - { - case UserDefinedSQLObjectType::Function: - { - ParserCreateFunctionQuery parser; - ASTPtr ast = parseQuery( - parser, - object_create_query.data(), - object_create_query.data() + object_create_query.size(), - "in file " + path, - 0, - context->getSettingsRef().max_parser_depth); - - InterpreterCreateFunctionQuery interpreter(ast, context, false /*persist_function*/); - interpreter.execute(); - } - } - } - catch (Exception & e) - { - e.addMessage(fmt::format("while loading user defined objects {} from path {}", backQuote(name_ref), path)); - throw; - } -} - -void UserDefinedSQLObjectsLoader::loadObjects(ContextPtr context) -{ - if (unlikely(!enable_persistence)) - return; - - LOG_DEBUG(log, "Loading user defined objects"); - - String dir_path = context->getUserDefinedPath(); - Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it) - { - if (it->isDirectory()) - continue; - - const std::string & file_name = it.name(); - - /// For '.svn', '.gitignore' directory and similar. - if (file_name.at(0) == '.') - continue; - - if (!startsWith(file_name, "function_") || !endsWith(file_name, ".sql")) - continue; - - std::string_view object_name = file_name; - - object_name.remove_prefix(strlen("function_")); - object_name.remove_suffix(strlen(".sql")); - - if (object_name.empty()) - continue; - - loadUserDefinedObject(context, UserDefinedSQLObjectType::Function, object_name, dir_path + it.name()); - } -} - -void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace) -{ - if (unlikely(!enable_persistence)) - return; - - String dir_path = context->getUserDefinedPath(); - String file_path; - - switch (object_type) - { - case UserDefinedSQLObjectType::Function: - { - file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; - } - } - - if (!replace && std::filesystem::exists(file_path)) - throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path)); - - LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path); - - WriteBufferFromOwnString create_statement_buf; - formatAST(ast, create_statement_buf, false); - writeChar('\n', create_statement_buf); - String create_statement = create_statement_buf.str(); - - WriteBufferFromFile out(file_path, create_statement.size()); - writeString(create_statement, out); - out.next(); - if (context->getSettingsRef().fsync_metadata) - out.sync(); - out.close(); - - LOG_DEBUG(log, "Stored object {}", backQuote(object_name)); -} - -void UserDefinedSQLObjectsLoader::removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name) -{ - if (unlikely(!enable_persistence)) - return; - - String dir_path = context->getUserDefinedPath(); - LOG_DEBUG(log, "Removing file for user defined object {} from {}", backQuote(object_name), dir_path); - - std::filesystem::path file_path; - - switch (object_type) - { - case UserDefinedSQLObjectType::Function: - { - file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; - } - } - - if (!std::filesystem::exists(file_path)) - throw Exception(ErrorCodes::OBJECT_WAS_NOT_STORED_ON_DISK, "User defined object {} was not stored on disk", backQuote(file_path.string())); - - std::filesystem::remove(file_path); -} - -void UserDefinedSQLObjectsLoader::enable(bool enable_persistence_) -{ - enable_persistence = enable_persistence_; -} - -} diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.h b/src/Interpreters/UserDefinedSQLObjectsLoader.h deleted file mode 100644 index 9dfba1181c1..00000000000 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -#include - - -namespace DB -{ - -enum class UserDefinedSQLObjectType -{ - Function -}; - -class UserDefinedSQLObjectsLoader : private boost::noncopyable -{ -public: - static UserDefinedSQLObjectsLoader & instance(); - UserDefinedSQLObjectsLoader(); - - void loadObjects(ContextPtr context); - void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace); - void removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name); - - /// For ClickHouse local if path is not set we can disable loader. - void enable(bool enable_persistence); - -private: - - void loadUserDefinedObject(ContextPtr context, UserDefinedSQLObjectType object_type, std::string_view object_name, const String & file_path); - Poco::Logger * log; - bool enable_persistence = true; -}; - -} diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index db6b51cb4f1..a0a406a974c 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -6,18 +6,9 @@ #include #include #include -#include -#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; namespace DB @@ -30,11 +21,6 @@ enum class FunctionOrigin : Int8 EXECUTABLE_USER_DEFINED = 2 }; -namespace ErrorCodes -{ - extern const int CANNOT_RESTORE_TABLE; -} - namespace { template @@ -134,63 +120,12 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); - const auto & user_defined_sql_functions_names = user_defined_sql_functions_factory.getAllRegisteredNames(); - fs::path data_path_in_backup_fs{data_path_in_backup}; - for (const auto & function_name : user_defined_sql_functions_names) - { - auto ast = user_defined_sql_functions_factory.tryGet(function_name); - if (!ast) - continue; - backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / (escapeForFileName(function_name) + ".sql"), - std::make_shared(queryToString(ast))); - } + UserDefinedSQLFunctionFactory::instance().backup(backup_entries_collector, data_path_in_backup); } void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - auto backup = restorer.getBackup(); - fs::path data_path_in_backup_fs{data_path_in_backup}; - - Strings filenames = backup->listFiles(data_path_in_backup); - for (const auto & filename : filenames) - { - if (!filename.ends_with(".sql")) - { - throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't have the extension .sql", - getStorageID().getFullTableName(), String{data_path_in_backup_fs / filename}); - } - } - - auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); - const auto & restore_settings = restorer.getRestoreSettings(); - auto context = restorer.getContext(); - - for (const auto & filename : filenames) - { - String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql")); - String function_name = unescapeForFileName(escaped_function_name); - - String filepath = data_path_in_backup_fs / filename; - auto function_def_entry = backup->readFile(filepath); - auto function_def_in = function_def_entry->getReadBuffer(); - String function_def; - readStringUntilEOF(function_def, *function_def_in); - - ParserCreateFunctionQuery parser; - ASTPtr ast = parseQuery( - parser, - function_def.data(), - function_def.data() + function_def.size(), - "in file " + filepath + " from backup " + backup->getName(), - 0, - context->getSettingsRef().max_parser_depth); - - bool replace = (restore_settings.create_function == RestoreUDFCreationMode::kReplace); - bool if_not_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreateIfNotExists); - user_defined_sql_functions_factory.registerFunction(context, function_name, ast, replace, if_not_exists, true); - } + UserDefinedSQLFunctionFactory::instance().restore(restorer, data_path_in_backup); } } From a901faeb84fe67dbfd13052f38081f8fcf7e3581 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Thu, 13 Oct 2022 18:00:54 +0800 Subject: [PATCH 249/266] Fix add/minus Date Interval over Date32 type Signed-off-by: sperlingxx --- src/Functions/FunctionBinaryArithmetic.h | 10 ++-- ..._time_intervals_months_underflow.reference | 60 +++++++++++++++++++ .../00524_time_intervals_months_underflow.sql | 14 ++++- 3 files changed, 77 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 174e98dd81f..baa3c65537d 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -659,8 +659,8 @@ class FunctionBinaryArithmetic : public IFunction static FunctionOverloadResolverPtr getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) { - bool first_is_date_or_datetime = isDate(type0) || isDateTime(type0) || isDateTime64(type0); - bool second_is_date_or_datetime = isDate(type1) || isDateTime(type1) || isDateTime64(type1); + bool first_is_date_or_datetime = isDateOrDate32(type0) || isDateTime(type0) || isDateTime64(type0); + bool second_is_date_or_datetime = isDateOrDate32(type1) || isDateTime(type1) || isDateTime64(type1); /// Exactly one argument must be Date or DateTime if (first_is_date_or_datetime == second_is_date_or_datetime) @@ -699,7 +699,7 @@ class FunctionBinaryArithmetic : public IFunction } else { - if (isDate(type_time)) + if (isDateOrDate32(type_time)) function_name = is_plus ? "addDays" : "subtractDays"; else function_name = is_plus ? "addSeconds" : "subtractSeconds"; @@ -895,7 +895,7 @@ class FunctionBinaryArithmetic : public IFunction ColumnsWithTypeAndName new_arguments = arguments; /// Interval argument must be second. - if (isDate(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) + if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) std::swap(new_arguments[0], new_arguments[1]); /// Change interval argument type to its representation @@ -1099,7 +1099,7 @@ public: new_arguments[i].type = arguments[i]; /// Interval argument must be second. - if (isDate(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type)) + if (isDateOrDate32(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type)) std::swap(new_arguments[0], new_arguments[1]); /// Change interval argument to its representation diff --git a/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference b/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference index 6e5555b0df8..9c6bb9d0b91 100644 --- a/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference +++ b/tests/queries/0_stateless/00524_time_intervals_months_underflow.reference @@ -166,3 +166,63 @@ 2005-01-01 2004-01-01 2003-01-01 +2216-09-23 +2216-10-13 +2216-11-02 +2216-11-22 +2216-12-12 +2217-01-01 +2217-01-21 +2217-02-10 +2217-03-02 +2217-03-22 +2217-04-11 +2217-03-22 +2217-03-02 +2217-02-10 +2217-01-21 +2217-01-01 +2216-12-12 +2216-11-22 +2216-11-02 +2216-10-13 +2215-05-01 +2215-09-01 +2216-01-01 +2216-05-01 +2216-09-01 +2217-01-01 +2217-05-01 +2217-09-01 +2218-01-01 +2218-05-01 +2218-09-01 +2218-05-01 +2218-01-01 +2217-09-01 +2217-05-01 +2217-01-01 +2216-09-01 +2216-05-01 +2216-01-01 +2215-09-01 +2197-01-01 +2201-01-01 +2205-01-01 +2209-01-01 +2213-01-01 +2217-01-01 +2221-01-01 +2225-01-01 +2229-01-01 +2233-01-01 +2237-01-01 +2233-01-01 +2229-01-01 +2225-01-01 +2221-01-01 +2217-01-01 +2213-01-01 +2209-01-01 +2205-01-01 +2201-01-01 diff --git a/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql b/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql index 6b8ecc3a9fb..09c1ce9bf6b 100644 --- a/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql +++ b/tests/queries/0_stateless/00524_time_intervals_months_underflow.sql @@ -53,8 +53,18 @@ SELECT toDate('2017-01-01') - INTERVAL 1 YEAR AS x; SELECT toDate('2017-01-01') - INTERVAL -1 YEAR AS x; -SELECT toDate('2017-01-01') + INTERVAL number - 15 MONTH AS x FROM system.numbers LIMIT 30; +SELECT INTERVAL number - 15 MONTH + toDate('2017-01-01') AS x FROM system.numbers LIMIT 30; SELECT toDate('2017-01-01') - INTERVAL number - 15 MONTH AS x FROM system.numbers LIMIT 30; -SELECT toDate('2017-01-01') + INTERVAL number - 15 YEAR AS x FROM system.numbers LIMIT 30; +SELECT INTERVAL number - 15 YEAR + toDate('2017-01-01') AS x FROM system.numbers LIMIT 30; SELECT toDate('2017-01-01') - INTERVAL number - 15 YEAR AS x FROM system.numbers LIMIT 30; + + +SELECT toDate32('2217-01-01') + INTERVAL number * 20 - 100 DAY AS x FROM system.numbers LIMIT 10; +SELECT INTERVAL 100 - number * 20 DAY + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10; + +SELECT INTERVAL number * 4 - 20 MONTH + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10; +SELECT toDate32('2217-01-01') - INTERVAL number * 4 - 20 MONTH AS x FROM system.numbers LIMIT 10; + +SELECT INTERVAL number * 4 - 20 YEAR + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10; +SELECT toDate32('2217-01-01') - INTERVAL number * 4 - 20 YEAR AS x FROM system.numbers LIMIT 10; From 51e55db617ce668040fac9f6bed41661e397c93a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 14 Oct 2022 05:41:58 +0200 Subject: [PATCH 250/266] Fix log-level --- programs/disks/DisksApp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index b662921a3b1..749ccb3e503 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -58,7 +58,7 @@ void DisksApp::addOptions( ("disk", po::value(), "Set disk name") ("command_name", po::value(), "Name for command to do") ("send-logs", "Send logs") - ("log-level", "Logging level") + ("log-level", po::value(), "Logging level") ; positional_options_description.add("command_name", 1); From 4e1a10fb341c2dbd837c05933e44e6cbd9dd33ac Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 Oct 2022 13:32:08 +0200 Subject: [PATCH 251/266] better check --- src/Storages/MergeTree/MergeTreeData.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8cb8b0aacf0..410c136c753 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1920,10 +1920,19 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts, bool void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_to_remove, NameSet * part_names_succeed) { const auto settings = getSettings(); + bool has_zero_copy_parts = false; + if (supportsReplication() && settings->allow_remote_fs_zero_copy_replication) + { + has_zero_copy_parts = std::any_of( + parts_to_remove.begin(), parts_to_remove.end(), + [] (const auto & data_part) { return data_part->isStoredOnRemoteDiskWithZeroCopySupport(); } + ); + } + if (parts_to_remove.size() > 1 && settings->max_part_removal_threads > 1 && parts_to_remove.size() > settings->concurrent_part_removal_threshold - && (!supportsReplication() || !settings->allow_remote_fs_zero_copy_replication)) /// parts must be removed in order for zero-copy replication + && !has_zero_copy_parts) /// parts must be removed in order for zero-copy replication { /// Parallel parts removal. size_t num_threads = std::min(settings->max_part_removal_threads, parts_to_remove.size()); From d43b5bbcd015f8aca8fc1229675bda13fb62acaf Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 14 Oct 2022 15:47:46 +0200 Subject: [PATCH 252/266] Remove forgotten debug logging --- src/Interpreters/executeQuery.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 63641d4bdcb..86686b3eb13 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -549,15 +549,9 @@ static std::tuple executeQueryImpl( if (insert_query) { if (insert_query->table_id) - { insert_query->table_id = context->resolveStorageID(insert_query->table_id); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "2) database: {}", insert_query->table_id.getDatabaseName()); - } else if (auto table = insert_query->getTable(); !table.empty()) - { insert_query->table_id = context->resolveStorageID(StorageID{insert_query->getDatabase(), table}); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "2) database: {}", insert_query->table_id.getDatabaseName()); - } } if (insert_query && insert_query->select) From 6cdb7b5a9414b175238f44ed8d6d71e455850b32 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Fri, 14 Oct 2022 07:16:18 -0700 Subject: [PATCH 253/266] Fix Codec T64 on s390x --- src/Compression/CompressionCodecT64.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index 9ed37c2d676..887c8b9e9d2 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -307,7 +307,19 @@ void reverseTransposeBytes(const UInt64 * matrix, UInt32 col, T & value) template void load(const char * src, T * buf, UInt32 tail = 64) { - memcpy(buf, src, tail * sizeof(T)); + if constexpr (std::endian::native == std::endian::little) + { + memcpy(buf, src, tail * sizeof(T)); + } + else + { + /// Since the algorithm uses little-endian integers, data is loaded + /// as little-endian types on big-endian machine(s390x, etc.) + for (UInt32 i = 0; i < tail; i++) + { + buf[i] = unalignedLoadLE(src + i * sizeof(T)); + } + } } template From edfc388b7c55841ac80aaebbd960ddee3c7dc483 Mon Sep 17 00:00:00 2001 From: jferroal Date: Fri, 14 Oct 2022 22:29:03 +0800 Subject: [PATCH 254/266] Doc: add zh translation (#42312) --- .../example-datasets/uk-price-paid.mdx | 448 +++++++++++++++++- 1 file changed, 444 insertions(+), 4 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 1583af60843..3a14a3ce55d 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -1,10 +1,450 @@ --- slug: /zh/getting-started/example-datasets/uk-price-paid -sidebar_label: UK Property Price Paid +sidebar_label: ่‹ฑๅ›ฝๆˆฟๅœฐไบงๆ”ฏไป˜ไปทๆ ผ sidebar_position: 1 -title: "UK Property Price Paid" +title: "่‹ฑๅ›ฝๆˆฟๅœฐไบงๆ”ฏไป˜ไปทๆ ผ" --- -import Content from '@site/docs/en/getting-started/example-datasets/uk-price-paid.md'; +่ฏฅๆ•ฐๆฎ้›†ๅŒ…ๅซ่‡ช 1995 ๅนดไปฅๆฅๆœ‰ๅ…ณ่‹ฑๆ ผๅ…ฐๅ’Œๅจๅฐ”ๅฃซๆˆฟๅœฐไบงไปทๆ ผ็š„ๆ•ฐๆฎใ€‚ๆœชๅŽ‹็ผฉ็š„ๅคงๅฐ็บฆไธบ 4 GiB๏ผŒๅœจ ClickHouse ไธญๅคง็บฆ้œ€่ฆ 278 MiBใ€‚ - +ๆฅๆบ๏ผšhttps://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads +ๅญ—ๆฎต่ฏดๆ˜Ž๏ผšhttps://www.gov.uk/guidance/about-the-price-data + +ๅŒ…ๅซ HM Land Registry data ยฉ Crown copyright and database right 2021.ใ€‚ๆญคๆ•ฐๆฎ้›†้œ€ๅœจ Open Government License v3.0 ็š„่ฎธๅฏไธ‹ไฝฟ็”จใ€‚ + +## ๅˆ›ๅปบ่กจ {#create-table} + +```sql +CREATE TABLE uk_price_paid +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0), + is_new UInt8, + duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2); +``` + +## ้ข„ๅค„็†ๅ’Œๆ’ๅ…ฅๆ•ฐๆฎ {#preprocess-import-data} + +ๆˆ‘ไปฌๅฐ†ไฝฟ็”จ `url` ๅ‡ฝๆ•ฐๅฐ†ๆ•ฐๆฎๆตๅผไผ ่พ“ๅˆฐ ClickHouseใ€‚ๆˆ‘ไปฌ้œ€่ฆ้ฆ–ๅ…ˆ้ข„ๅค„็†ไธ€ไบ›ไผ ๅ…ฅ็š„ๆ•ฐๆฎ๏ผŒๅ…ถไธญๅŒ…ๆ‹ฌ๏ผš + +- ๅฐ†`postcode` ๆ‹†ๅˆ†ไธบไธคไธชไธๅŒ็š„ๅˆ— - `postcode1` ๅ’Œ `postcode2`๏ผŒๅ› ไธบ่ฟ™ๆ›ด้€‚ๅˆๅญ˜ๅ‚จๅ’ŒๆŸฅ่ฏข +- ๅฐ†`time` ๅญ—ๆฎต่ฝฌๆขไธบๆ—ฅๆœŸไธบๅฎƒๅชๅŒ…ๅซ 00:00 ๆ—ถ้—ด +- ๅฟฝ็•ฅ [UUid](../../sql-reference/data-types/uuid.md) ๅญ—ๆฎต๏ผŒๅ› ไธบๆˆ‘ไปฌไธ้œ€่ฆๅฎƒ่ฟ›่กŒๅˆ†ๆž +- ไฝฟ็”จ [transform](../../sql-reference/functions/other-functions.md#transform) ๅ‡ฝๆ•ฐๅฐ† `Enum` ๅญ—ๆฎต `type` ๅ’Œ `duration` ่ฝฌๆขไธบๆ›ดๆ˜“่ฏป็š„ `Enum` ๅญ—ๆฎต +- ๅฐ† `is_new` ๅญ—ๆฎตไปŽๅ•ๅญ—็ฌฆไธฒ๏ผˆ` Y`/`N`) ๅˆฐ [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) ๅญ—ๆฎตไธบ 0 ๆˆ– 1 +- ๅˆ ้™คๆœ€ๅŽไธคๅˆ—๏ผŒๅ› ไธบๅฎƒไปฌ้ƒฝๅ…ทๆœ‰็›ธๅŒ็š„ๅ€ผ๏ผˆๅณ 0๏ผ‰ + +`url` ๅ‡ฝๆ•ฐๅฐ†ๆฅ่‡ช็ฝ‘็ปœๆœๅŠกๅ™จ็š„ๆ•ฐๆฎๆตๅผไผ ่พ“ๅˆฐ ClickHouse ่กจไธญใ€‚ไปฅไธ‹ๅ‘ฝไปคๅฐ† 500 ไธ‡่กŒๆ’ๅ…ฅๅˆฐ `uk_price_paid` ่กจไธญ๏ผš + +```sql +INSERT INTO uk_price_paid +WITH + splitByChar(' ', postcode) AS p +SELECT + toUInt32(price_string) AS price, + parseDateTimeBestEffortUS(time) AS date, + p[1] AS postcode1, + p[2] AS postcode2, + transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type, + b = 'Y' AS is_new, + transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration, + addr1, + addr2, + street, + locality, + town, + district, + county +FROM url( + 'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv', + 'CSV', + 'uuid_string String, + price_string String, + time String, + postcode String, + a String, + b String, + c String, + addr1 String, + addr2 String, + street String, + locality String, + town String, + district String, + county String, + d String, + e String' +) SETTINGS max_http_get_redirects=10; +``` + +้œ€่ฆ็ญ‰ๅพ…ไธ€ไธคๅˆ†้’Ÿไปฅไพฟๆ•ฐๆฎๆ’ๅ…ฅ๏ผŒๅ…ทไฝ“ๆ—ถ้—ดๅ–ๅ†ณไบŽ็ฝ‘็ปœ้€Ÿๅบฆใ€‚ + +## ้ชŒ่ฏๆ•ฐๆฎ {#validate-data} + +่ฎฉๆˆ‘ไปฌ้€š่ฟ‡ๆŸฅ็œ‹ๆ’ๅ…ฅไบ†ๅคšๅฐ‘่กŒๆฅ้ชŒ่ฏๅฎƒๆ˜ฏๅฆๆœ‰ๆ•ˆ๏ผš + +```sql +SELECT count() +FROM uk_price_paid +``` + +ๅœจๆ‰ง่กŒๆญคๆŸฅ่ฏขๆ—ถ๏ผŒๆ•ฐๆฎ้›†ๆœ‰ 27,450,499 ่กŒใ€‚่ฎฉๆˆ‘ไปฌ็œ‹็œ‹ ClickHouse ไธญ่กจ็š„ๅคงๅฐๆ˜ฏๅคšๅฐ‘๏ผš + +```sql +SELECT formatReadableSize(total_bytes) +FROM system.tables +WHERE name = 'uk_price_paid' +``` + +่ฏทๆณจๆ„๏ผŒ่กจ็š„ๅคงๅฐไป…ไธบ 221.43 MiB๏ผ + +## ่ฟ่กŒไธ€ไบ›ๆŸฅ่ฏข {#run-queries} + +่ฎฉๆˆ‘ไปฌ่ฟ่กŒไธ€ไบ›ๆŸฅ่ฏขๆฅๅˆ†ๆžๆ•ฐๆฎ๏ผš + +### ๆŸฅ่ฏข 1. ๆฏๅนดๅนณๅ‡ไปทๆ ผ {#average-price} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 1000000, 80 +) +FROM uk_price_paid +GROUP BY year +ORDER BY year +``` + +็ป“ๆžœๅฆ‚ไธ‹ๆ‰€็คบ๏ผš + +```response +โ”Œโ”€yearโ”€โ”ฌโ”€โ”€priceโ”€โ”ฌโ”€bar(round(avg(price)), 0, 1000000, 80)โ”€โ” +โ”‚ 1995 โ”‚ 67934 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 1996 โ”‚ 71508 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 1997 โ”‚ 78536 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ 1998 โ”‚ 85441 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 1999 โ”‚ 96038 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2000 โ”‚ 107487 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ 2001 โ”‚ 118888 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ 2002 โ”‚ 137948 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2003 โ”‚ 155893 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2004 โ”‚ 178888 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ 2005 โ”‚ 189359 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2006 โ”‚ 203532 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ 2007 โ”‚ 219375 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ 2008 โ”‚ 217056 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ 2009 โ”‚ 213419 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2010 โ”‚ 236110 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ 2011 โ”‚ 232805 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ 2012 โ”‚ 238381 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2013 โ”‚ 256927 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ 2014 โ”‚ 280008 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2015 โ”‚ 297263 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2016 โ”‚ 313518 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2017 โ”‚ 346371 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2018 โ”‚ 350556 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2019 โ”‚ 352184 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2020 โ”‚ 375808 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2021 โ”‚ 381105 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2022 โ”‚ 362572 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### ๆŸฅ่ฏข 2. ไผฆๆ•ฆๆฏๅนด็š„ๅนณๅ‡ไปทๆ ผ {#average-price-london} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 2000000, 100 +) +FROM uk_price_paid +WHERE town = 'LONDON' +GROUP BY year +ORDER BY year +``` + +็ป“ๆžœๅฆ‚ไธ‹ๆ‰€็คบ๏ผš + +```response +โ”Œโ”€yearโ”€โ”ฌโ”€โ”€โ”€priceโ”€โ”ฌโ”€bar(round(avg(price)), 0, 2000000, 100)โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ 1995 โ”‚ 109110 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 1996 โ”‚ 118659 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ 1997 โ”‚ 136526 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 1998 โ”‚ 153002 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 1999 โ”‚ 180633 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2000 โ”‚ 215849 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2001 โ”‚ 232987 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2002 โ”‚ 263668 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2003 โ”‚ 278424 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ 2004 โ”‚ 304664 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2005 โ”‚ 322887 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2006 โ”‚ 356195 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2007 โ”‚ 404062 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2008 โ”‚ 420741 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2009 โ”‚ 427754 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2010 โ”‚ 480322 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2011 โ”‚ 496278 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2012 โ”‚ 519482 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ 2013 โ”‚ 616195 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2014 โ”‚ 724121 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2015 โ”‚ 792101 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ 2016 โ”‚ 843589 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2017 โ”‚ 983523 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ 2018 โ”‚ 1016753 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ 2019 โ”‚ 1041673 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2020 โ”‚ 1060027 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ 2021 โ”‚ 958249 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ 2022 โ”‚ 902596 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +2020 ๅนดๆˆฟไปทๅ‡บไบ‹ไบ†๏ผไฝ†่ฟ™ๅนถไธไปคไบบๆ„ๅค–โ€ฆโ€ฆ + +### ๆŸฅ่ฏข 3. ๆœ€ๆ˜‚่ดต็š„็คพๅŒบ {#most-expensive-neighborhoods} + +```sql +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM uk_price_paid +WHERE date >= '2020-01-01' +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 100 +``` + +็ป“ๆžœๅฆ‚ไธ‹ๆ‰€็คบ๏ผš + +```response +โ”Œโ”€townโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€districtโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€cโ”€โ”ฌโ”€โ”€โ”€priceโ”€โ”ฌโ”€bar(round(avg(price)), 0, 5000000, 100)โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ LONDON โ”‚ CITY OF LONDON โ”‚ 578 โ”‚ 3149590 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ LONDON โ”‚ CITY OF WESTMINSTER โ”‚ 7083 โ”‚ 2903794 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ LONDON โ”‚ KENSINGTON AND CHELSEA โ”‚ 4986 โ”‚ 2333782 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ LEATHERHEAD โ”‚ ELMBRIDGE โ”‚ 203 โ”‚ 2071595 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ VIRGINIA WATER โ”‚ RUNNYMEDE โ”‚ 308 โ”‚ 1939465 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ LONDON โ”‚ CAMDEN โ”‚ 5750 โ”‚ 1673687 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ WINDLESHAM โ”‚ SURREY HEATH โ”‚ 182 โ”‚ 1428358 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ NORTHWOOD โ”‚ THREE RIVERS โ”‚ 112 โ”‚ 1404170 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ BARNET โ”‚ ENFIELD โ”‚ 259 โ”‚ 1338299 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ LONDON โ”‚ ISLINGTON โ”‚ 5504 โ”‚ 1275520 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ LONDON โ”‚ RICHMOND UPON THAMES โ”‚ 1345 โ”‚ 1261935 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ COBHAM โ”‚ ELMBRIDGE โ”‚ 727 โ”‚ 1251403 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ BEACONSFIELD โ”‚ BUCKINGHAMSHIRE โ”‚ 680 โ”‚ 1199970 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ LONDON โ”‚ TOWER HAMLETS โ”‚ 10012 โ”‚ 1157827 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ LONDON โ”‚ HOUNSLOW โ”‚ 1278 โ”‚ 1144389 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ BURFORD โ”‚ WEST OXFORDSHIRE โ”‚ 182 โ”‚ 1139393 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ RICHMOND โ”‚ RICHMOND UPON THAMES โ”‚ 1649 โ”‚ 1130076 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ KINGSTON UPON THAMES โ”‚ RICHMOND UPON THAMES โ”‚ 147 โ”‚ 1126111 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ ASCOT โ”‚ WINDSOR AND MAIDENHEAD โ”‚ 773 โ”‚ 1106109 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ LONDON โ”‚ HAMMERSMITH AND FULHAM โ”‚ 6162 โ”‚ 1056198 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ RADLETT โ”‚ HERTSMERE โ”‚ 513 โ”‚ 1045758 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ LEATHERHEAD โ”‚ GUILDFORD โ”‚ 354 โ”‚ 1045175 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ WEYBRIDGE โ”‚ ELMBRIDGE โ”‚ 1275 โ”‚ 1036702 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ FARNHAM โ”‚ EAST HAMPSHIRE โ”‚ 107 โ”‚ 1033682 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ ESHER โ”‚ ELMBRIDGE โ”‚ 915 โ”‚ 1032753 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ FARNHAM โ”‚ HART โ”‚ 102 โ”‚ 1002692 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ GERRARDS CROSS โ”‚ BUCKINGHAMSHIRE โ”‚ 845 โ”‚ 983639 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ CHALFONT ST GILES โ”‚ BUCKINGHAMSHIRE โ”‚ 286 โ”‚ 973993 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ SALCOMBE โ”‚ SOUTH HAMS โ”‚ 215 โ”‚ 965724 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ SURBITON โ”‚ ELMBRIDGE โ”‚ 181 โ”‚ 960346 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ BROCKENHURST โ”‚ NEW FOREST โ”‚ 226 โ”‚ 951278 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ SUTTON COLDFIELD โ”‚ LICHFIELD โ”‚ 110 โ”‚ 930757 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ EAST MOLESEY โ”‚ ELMBRIDGE โ”‚ 372 โ”‚ 927026 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ LLANGOLLEN โ”‚ WREXHAM โ”‚ 127 โ”‚ 925681 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ OXFORD โ”‚ SOUTH OXFORDSHIRE โ”‚ 638 โ”‚ 923830 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ LONDON โ”‚ MERTON โ”‚ 4383 โ”‚ 923194 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ GUILDFORD โ”‚ WAVERLEY โ”‚ 261 โ”‚ 905733 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ TEDDINGTON โ”‚ RICHMOND UPON THAMES โ”‚ 1147 โ”‚ 894856 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ HARPENDEN โ”‚ ST ALBANS โ”‚ 1271 โ”‚ 893079 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ HENLEY-ON-THAMES โ”‚ SOUTH OXFORDSHIRE โ”‚ 1042 โ”‚ 887557 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ POTTERS BAR โ”‚ WELWYN HATFIELD โ”‚ 314 โ”‚ 863037 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ LONDON โ”‚ WANDSWORTH โ”‚ 13210 โ”‚ 857318 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ BILLINGSHURST โ”‚ CHICHESTER โ”‚ 255 โ”‚ 856508 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ LONDON โ”‚ SOUTHWARK โ”‚ 7742 โ”‚ 843145 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ LONDON โ”‚ HACKNEY โ”‚ 6656 โ”‚ 839716 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ LUTTERWORTH โ”‚ HARBOROUGH โ”‚ 1096 โ”‚ 836546 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ KINGSTON UPON THAMES โ”‚ KINGSTON UPON THAMES โ”‚ 1846 โ”‚ 828990 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ LONDON โ”‚ EALING โ”‚ 5583 โ”‚ 820135 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ INGATESTONE โ”‚ CHELMSFORD โ”‚ 120 โ”‚ 815379 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ MARLOW โ”‚ BUCKINGHAMSHIRE โ”‚ 718 โ”‚ 809943 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ EAST GRINSTEAD โ”‚ TANDRIDGE โ”‚ 105 โ”‚ 809461 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ CHIGWELL โ”‚ EPPING FOREST โ”‚ 484 โ”‚ 809338 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ EGHAM โ”‚ RUNNYMEDE โ”‚ 989 โ”‚ 807858 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ HASLEMERE โ”‚ CHICHESTER โ”‚ 223 โ”‚ 804173 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ PETWORTH โ”‚ CHICHESTER โ”‚ 288 โ”‚ 803206 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ TWICKENHAM โ”‚ RICHMOND UPON THAMES โ”‚ 2194 โ”‚ 802616 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ WEMBLEY โ”‚ BRENT โ”‚ 1698 โ”‚ 801733 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ HINDHEAD โ”‚ WAVERLEY โ”‚ 233 โ”‚ 801482 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ LONDON โ”‚ BARNET โ”‚ 8083 โ”‚ 792066 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ WOKING โ”‚ GUILDFORD โ”‚ 343 โ”‚ 789360 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ STOCKBRIDGE โ”‚ TEST VALLEY โ”‚ 318 โ”‚ 777909 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ BERKHAMSTED โ”‚ DACORUM โ”‚ 1049 โ”‚ 776138 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ MAIDENHEAD โ”‚ BUCKINGHAMSHIRE โ”‚ 236 โ”‚ 775572 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ SOLIHULL โ”‚ STRATFORD-ON-AVON โ”‚ 142 โ”‚ 770727 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ GREAT MISSENDEN โ”‚ BUCKINGHAMSHIRE โ”‚ 431 โ”‚ 764493 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ TADWORTH โ”‚ REIGATE AND BANSTEAD โ”‚ 920 โ”‚ 757511 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ LONDON โ”‚ BRENT โ”‚ 4124 โ”‚ 757194 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ THAMES DITTON โ”‚ ELMBRIDGE โ”‚ 470 โ”‚ 750828 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ LONDON โ”‚ LAMBETH โ”‚ 10431 โ”‚ 750532 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ RICKMANSWORTH โ”‚ THREE RIVERS โ”‚ 1500 โ”‚ 747029 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ KINGS LANGLEY โ”‚ DACORUM โ”‚ 281 โ”‚ 746536 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ HARLOW โ”‚ EPPING FOREST โ”‚ 172 โ”‚ 739423 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ TONBRIDGE โ”‚ SEVENOAKS โ”‚ 103 โ”‚ 738740 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ BELVEDERE โ”‚ BEXLEY โ”‚ 686 โ”‚ 736385 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ CRANBROOK โ”‚ TUNBRIDGE WELLS โ”‚ 769 โ”‚ 734328 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ SOLIHULL โ”‚ WARWICK โ”‚ 116 โ”‚ 733286 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ ALDERLEY EDGE โ”‚ CHESHIRE EAST โ”‚ 357 โ”‚ 732882 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ”‚ WELWYN โ”‚ WELWYN HATFIELD โ”‚ 404 โ”‚ 730281 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ CHISLEHURST โ”‚ BROMLEY โ”‚ 870 โ”‚ 730279 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ LONDON โ”‚ HARINGEY โ”‚ 6488 โ”‚ 726715 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ AMERSHAM โ”‚ BUCKINGHAMSHIRE โ”‚ 965 โ”‚ 725426 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ SEVENOAKS โ”‚ SEVENOAKS โ”‚ 2183 โ”‚ 725102 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ โ”‚ +โ”‚ BOURNE END โ”‚ BUCKINGHAMSHIRE โ”‚ 269 โ”‚ 724595 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ NORTHWOOD โ”‚ HILLINGDON โ”‚ 568 โ”‚ 722436 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ PURFLEET โ”‚ THURROCK โ”‚ 143 โ”‚ 722205 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ SLOUGH โ”‚ BUCKINGHAMSHIRE โ”‚ 832 โ”‚ 721529 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ INGATESTONE โ”‚ BRENTWOOD โ”‚ 301 โ”‚ 718292 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž โ”‚ +โ”‚ EPSOM โ”‚ REIGATE AND BANSTEAD โ”‚ 315 โ”‚ 709264 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ ASHTEAD โ”‚ MOLE VALLEY โ”‚ 524 โ”‚ 708646 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ BETCHWORTH โ”‚ MOLE VALLEY โ”‚ 155 โ”‚ 708525 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ OXTED โ”‚ TANDRIDGE โ”‚ 645 โ”‚ 706946 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– โ”‚ +โ”‚ READING โ”‚ SOUTH OXFORDSHIRE โ”‚ 593 โ”‚ 705466 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ FELTHAM โ”‚ HOUNSLOW โ”‚ 1536 โ”‚ 703815 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ TUNBRIDGE WELLS โ”‚ WEALDEN โ”‚ 207 โ”‚ 703296 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ LEWES โ”‚ WEALDEN โ”‚ 116 โ”‚ 701349 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ OXFORD โ”‚ OXFORD โ”‚ 3656 โ”‚ 700813 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚ +โ”‚ MAYFIELD โ”‚ WEALDEN โ”‚ 177 โ”‚ 698158 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ PINNER โ”‚ HARROW โ”‚ 997 โ”‚ 697876 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ LECHLADE โ”‚ COTSWOLD โ”‚ 155 โ”‚ 696262 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š โ”‚ +โ”‚ WALTON-ON-THAMES โ”‚ ELMBRIDGE โ”‚ 1850 โ”‚ 690102 โ”‚ โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +## ไฝฟ็”จ Projection ๅŠ ้€ŸๆŸฅ่ฏข {#speedup-with-projections} + +[Projections](../../sql-reference/statements/alter/projection.md) ๅ…่ฎธๆˆ‘ไปฌ้€š่ฟ‡ๅญ˜ๅ‚จไปปๆ„ๆ ผๅผ็š„้ข„ๅ…ˆ่šๅˆ็š„ๆ•ฐๆฎๆฅๆ้ซ˜ๆŸฅ่ฏข้€Ÿๅบฆใ€‚ๅœจๆญค็คบไพ‹ไธญ๏ผŒๆˆ‘ไปฌๅˆ›ๅปบไบ†ไธ€ไธชๆŒ‰ๅนดไปฝใ€ๅœฐๅŒบๅ’ŒๅŸŽ้•‡ๅˆ†็ป„็š„ๆˆฟไบง็š„ๅนณๅ‡ไปทๆ ผใ€ๆ€ปไปทๆ ผๅ’Œๆ•ฐ้‡็š„ Projectionใ€‚ๅœจๆ‰ง่กŒๆ—ถ๏ผŒๅฆ‚ๆžœ ClickHouse ่ฎคไธบ Projection ๅฏไปฅๆ้ซ˜ๆŸฅ่ฏข็š„ๆ€ง่ƒฝ๏ผŒๅฎƒๅฐ†ไฝฟ็”จ Projection๏ผˆไฝ•ๆ—ถไฝฟ็”จ็”ฑ ClickHouse ๅ†ณๅฎš๏ผ‰ใ€‚ + +### ๆž„ๅปบๆŠ•ๅฝฑ{#build-projection} + +่ฎฉๆˆ‘ไปฌ้€š่ฟ‡็ปดๅบฆ `toYear(date)`ใ€`district` ๅ’Œ `town` ๅˆ›ๅปบไธ€ไธช่šๅˆ Projection๏ผš + +```sql +ALTER TABLE uk_price_paid + ADD PROJECTION projection_by_year_district_town + ( + SELECT + toYear(date), + district, + town, + avg(price), + sum(price), + count() + GROUP BY + toYear(date), + district, + town + ) +``` + +ๅกซๅ……็Žฐๆœ‰ๆ•ฐๆฎ็š„ Projectionใ€‚ ๏ผˆๅฆ‚ๆžœไธ่ฟ›่กŒ materialize ๆ“ไฝœ๏ผŒๅˆ™ ClickHouse ๅชไผšไธบๆ–ฐๆ’ๅ…ฅ็š„ๆ•ฐๆฎๅˆ›ๅปบ Projection๏ผ‰๏ผš + +```sql +ALTER TABLE uk_price_paid + MATERIALIZE PROJECTION projection_by_year_district_town +SETTINGS mutations_sync = 1 +``` + +## Test Performance {#test-performance} + +่ฎฉๆˆ‘ไปฌๅ†ๆฌก่ฟ่กŒ็›ธๅŒ็š„ 3 ไธชๆŸฅ่ฏข๏ผš + +### ๆŸฅ่ฏข 1. ๆฏๅนดๅนณๅ‡ไปทๆ ผ {#average-price-projections} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 1000000, 80) +FROM uk_price_paid +GROUP BY year +ORDER BY year ASC +``` + +็ป“ๆžœๆ˜ฏไธ€ๆ ท็š„๏ผŒไฝ†ๆ˜ฏๆ€ง่ƒฝๆ›ดๅฅฝ๏ผ +```response +No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.) +With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.) +``` + + +### ๆŸฅ่ฏข 2. ไผฆๆ•ฆๆฏๅนด็š„ๅนณๅ‡ไปทๆ ผ {#average-price-london-projections} + +```sql +SELECT + toYear(date) AS year, + round(avg(price)) AS price, + bar(price, 0, 2000000, 100) +FROM uk_price_paid +WHERE town = 'LONDON' +GROUP BY year +ORDER BY year ASC +``` + +Same result, but notice the improvement in query performance: + +```response +No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.) +With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.) +``` + +### ๆŸฅ่ฏข 3. ๆœ€ๆ˜‚่ดต็š„็คพๅŒบ {#most-expensive-neighborhoods-projections} + +ๆณจๆ„๏ผš้œ€่ฆไฟฎๆ”น (date >= '2020-01-01') ไปฅไฝฟๅ…ถไธŽ Projection ๅฎšไน‰็š„็ปดๅบฆ (`toYear(date) >= 2020)` ๅŒน้…๏ผš + +```sql +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM uk_price_paid +WHERE toYear(date) >= 2020 +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 100 +``` + +ๅŒๆ ท๏ผŒ็ป“ๆžœๆ˜ฏ็›ธๅŒ็š„๏ผŒไฝ†่ฏทๆณจๆ„ๆŸฅ่ฏขๆ€ง่ƒฝ็š„ๆ”น่ฟ›๏ผš + +```response +No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.) +With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.) +``` + +### ๅœจ Playground ไธŠๆต‹่ฏ•{#playground} + +ไนŸๅฏไปฅๅœจ [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==) ไธŠๆ‰พๅˆฐๆญคๆ•ฐๆฎ้›†ใ€‚ From b18c6fd8e62e32cb3c327eb5ad5422b89a0a91ab Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Oct 2022 14:52:26 +0000 Subject: [PATCH 255/266] Fix an invalid type of a column after attach and alter. --- src/Storages/MergeTree/MutateTask.cpp | 31 ++++-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 +- ...ate_respect_part_column_type_bug.reference | 9 ++ ...er_update_respect_part_column_type_bug.sql | 94 +++++++++++++++++++ 4 files changed, 134 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference create mode 100644 tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9f3c3100349..3d964e60798 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -220,8 +220,11 @@ getColumnsForNewDataPart( if (!isWidePart(source_part)) return {updated_header.getNamesAndTypesList(), new_serialization_infos}; - Names source_column_names = source_part->getColumns().getNames(); - NameSet source_columns_name_set(source_column_names.begin(), source_column_names.end()); + const auto & source_columns = source_part->getColumns(); + std::unordered_map source_columns_name_to_type; + for (const auto & it : source_columns) + source_columns_name_to_type[it.name] = it.type; + for (auto it = storage_columns.begin(); it != storage_columns.end();) { if (updated_header.has(it->name)) @@ -233,14 +236,25 @@ getColumnsForNewDataPart( } else { - if (!source_columns_name_set.contains(it->name)) + auto source_col = source_columns_name_to_type.find(it->name); + if (source_col == source_columns_name_to_type.end()) { /// Source part doesn't have column but some other column /// was renamed to it's name. auto renamed_it = renamed_columns_to_from.find(it->name); - if (renamed_it != renamed_columns_to_from.end() - && source_columns_name_set.contains(renamed_it->second)) - ++it; + if (renamed_it != renamed_columns_to_from.end()) + { + source_col = source_columns_name_to_type.find(renamed_it->second); + if (source_col == source_columns_name_to_type.end()) + it = storage_columns.erase(it); + else + { + /// Take a type from source part column. + /// It may differ from column type in storage. + it->type = source_col->second; + ++it; + } + } else it = storage_columns.erase(it); } @@ -262,7 +276,12 @@ getColumnsForNewDataPart( if (!renamed_columns_to_from.contains(it->name) && (was_renamed || was_removed)) it = storage_columns.erase(it); else + { + /// Take a type from source part column. + /// It may differ from column type in storage. + it->type = source_col->second; ++it; + } } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b9bd027cde2..0abea5977c3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -315,7 +315,12 @@ void ReplicatedMergeTreeSink::commitPart( DataPartStorageBuilderPtr builder, size_t replicas_num) { - metadata_snapshot->check(part->getColumns()); + /// It is possible that we alter a part with different types of source columns. + /// In this case, if column was not altered, the result type will be different with what we have in metadata. + /// For now, consider it is ok. See 02461_alter_update_respect_part_column_type_bug for an example. + /// + /// metadata_snapshot->check(part->getColumns()); + assertSessionIsNotExpired(zookeeper); String temporary_part_relative_path = part->data_part_storage->getPartDirectory(); diff --git a/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference new file mode 100644 index 00000000000..99a39410cae --- /dev/null +++ b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference @@ -0,0 +1,9 @@ +1 one test1 +one one test1 +one one test +one one test +----- +1 one test1 +one one test1 +one one test +one one test diff --git a/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql new file mode 100644 index 00000000000..7f48b41aa1e --- /dev/null +++ b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql @@ -0,0 +1,94 @@ +drop table if exists src; +create table src( A Int64, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values(1, 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src modify column C LowCardinality(String); +select * from src; + +drop table if exists src; +create table src( A String, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src rename column B to D; +select * from src; + +select '-----'; + +drop table if exists src; +create table src( A Int64, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src1', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values(1, 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src2', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src3', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src modify column C LowCardinality(String); +select * from src; + +drop table if exists src; +create table src( A String, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src4', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src rename column B to D; +select * from src; + From cd9afdcb7c10cffd59cde8bfb401712222f387c5 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Fri, 14 Oct 2022 17:19:14 +0200 Subject: [PATCH 256/266] Increase request_timeout_ms for s3 disks. --- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index b5efc11db8b..1635cb5c552 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -125,7 +125,7 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000); + client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; From 7d9f097d3b8e432329d31c35fdc2e69c850e4807 Mon Sep 17 00:00:00 2001 From: nvartolomei Date: Fri, 14 Oct 2022 18:19:14 +0100 Subject: [PATCH 257/266] Fix bad copy & paste when loading background_merges_mutations_concurrency_ratio setting (#42315) --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b08c2bab81c..99be9d8739a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3410,7 +3410,7 @@ void Context::initializeBackgroundExecutorsIfNeeded() size_t background_merges_mutations_concurrency_ratio = 2; if (config.has("background_merges_mutations_concurrency_ratio")) background_merges_mutations_concurrency_ratio = config.getUInt64("background_merges_mutations_concurrency_ratio"); - else if (config.has("profiles.default.background_pool_size")) + else if (config.has("profiles.default.background_merges_mutations_concurrency_ratio")) background_merges_mutations_concurrency_ratio = config.getUInt64("profiles.default.background_merges_mutations_concurrency_ratio"); size_t background_move_pool_size = 8; From bd88f0b1d9e35403ec20b737fdc0a6dd79fa52d1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Oct 2022 17:28:57 +0000 Subject: [PATCH 258/266] Update test. --- .../02012_zookeeper_changed_enum_type_incompatible.reference | 4 ++++ .../02012_zookeeper_changed_enum_type_incompatible.sql | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.reference b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.reference index e69de29bb2d..338e1edb715 100644 --- a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.reference +++ b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.reference @@ -0,0 +1,4 @@ +one 1 +two 1 +one 1 +two 1 diff --git a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql index e236e6d2767..b83f02dc79d 100644 --- a/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql +++ b/tests/queries/0_stateless/02012_zookeeper_changed_enum_type_incompatible.sql @@ -11,5 +11,6 @@ alter table enum_alter_issue detach partition id 'all'; alter table enum_alter_issue modify column a Enum8('one' = 1, 'two' = 2, 'three' = 3); insert into enum_alter_issue values ('one', 1), ('two', 1); -alter table enum_alter_issue attach partition id 'all'; -- {serverError TYPE_MISMATCH} +alter table enum_alter_issue attach partition id 'all'; +select * from enum_alter_issue; drop table enum_alter_issue; From fb637818eabe384b3d17771c85f65731d9d630d4 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 14 Oct 2022 18:47:21 +0000 Subject: [PATCH 259/266] Make test better --- ...02267_file_globs_schema_inference.reference | 1 + .../02267_file_globs_schema_inference.sh | 18 ++++++++++++++++++ .../02267_file_globs_schema_inference.sql | 11 ----------- 3 files changed, 19 insertions(+), 11 deletions(-) create mode 100755 tests/queries/0_stateless/02267_file_globs_schema_inference.sh delete mode 100644 tests/queries/0_stateless/02267_file_globs_schema_inference.sql diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.reference b/tests/queries/0_stateless/02267_file_globs_schema_inference.reference index 98da2074df6..ad94d5181ef 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.reference +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.reference @@ -1,2 +1,3 @@ 1 \N +OK diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh new file mode 100755 index 00000000000..701e18a0259 --- /dev/null +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1"; + +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') order by x"; + +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl', 'TSV') select 1 as x"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1"; + +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "INCORRECT_DATA" && echo "OK" || echo "FAIL"; + diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql deleted file mode 100644 index b2a2997beab..00000000000 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Tags: no-fasttest, no-parallel - -insert into function file('02267_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1; -insert into function file('02267_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1; -insert into function file('02267_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1; -select * from file('02267_data*.jsonl') order by x; - -insert into function file('02267_data4.jsonl', 'TSV') select 1 as x; -insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1; - -select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} From 87296eb90bf0c376fc2d68b45871803331a8dc76 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Oct 2022 22:01:26 +0200 Subject: [PATCH 260/266] Update CCTZ to 2022e. --- contrib/cctz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz b/contrib/cctz index 05ec08ce61e..7a454c25c7d 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 05ec08ce61e4b5c44692cc2f1ce4b6d8596679bf +Subproject commit 7a454c25c7d16053bcd327cdd16329212a08fa4a From 4608b70ddab757db44bb467549c1aa93dbaf5ad0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Oct 2022 00:31:33 +0300 Subject: [PATCH 261/266] Update CompressionCodecT64.cpp --- src/Compression/CompressionCodecT64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index 887c8b9e9d2..cbc1750ed59 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -315,7 +315,7 @@ void load(const char * src, T * buf, UInt32 tail = 64) { /// Since the algorithm uses little-endian integers, data is loaded /// as little-endian types on big-endian machine(s390x, etc.) - for (UInt32 i = 0; i < tail; i++) + for (UInt32 i = 0; i < tail; ++i) { buf[i] = unalignedLoadLE(src + i * sizeof(T)); } From bfd8811a4d5cc05a65010b336d0d6169e25acc9e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Oct 2022 00:32:37 +0300 Subject: [PATCH 262/266] Update CompressionCodecT64.cpp --- src/Compression/CompressionCodecT64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index cbc1750ed59..bfcebad9676 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -314,7 +314,7 @@ void load(const char * src, T * buf, UInt32 tail = 64) else { /// Since the algorithm uses little-endian integers, data is loaded - /// as little-endian types on big-endian machine(s390x, etc.) + /// as little-endian types on big-endian machine (s390x, etc). for (UInt32 i = 0; i < tail; ++i) { buf[i] = unalignedLoadLE(src + i * sizeof(T)); From ec81d61f4874c47b98d42a299ca40c0666a9d82c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sun, 16 Oct 2022 12:26:38 +0000 Subject: [PATCH 263/266] better logging for async insert --- src/Interpreters/executeQuery.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 86686b3eb13..abca563de55 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -582,10 +582,28 @@ static std::tuple executeQueryImpl( std::shared_ptr quota; std::unique_ptr interpreter; + bool async_insert = false; auto * queue = context->getAsynchronousInsertQueue(); - const bool async_insert = queue - && insert_query && !insert_query->select - && insert_query->hasInlinedData() && settings.async_insert; + + if (insert_query && settings.async_insert) + { + String reason; + + if (!queue) + reason = "asynchronous insert queue is not configured"; + else if (insert_query->select) + reason = "insert query has select"; + else if (!insert_query->hasInlinedData()) + reason = "insert query doesn't have inlined data"; + else + async_insert = true; + + if (!async_insert) + { + LOG_DEBUG(&Poco::Logger::get("executeQuery"), + "Setting async_insert=1, but INSERT query will be executed synchronously (reason: {})", reason); + } + } if (async_insert) { From b3aea48b9de84d4f4389a430b521d2f66343f216 Mon Sep 17 00:00:00 2001 From: jferroal Date: Mon, 17 Oct 2022 09:22:46 +0800 Subject: [PATCH 264/266] Doc: zh-CN translation docs/zh/getting-started/example-datasets/brown-benchmark --- .../example-datasets/brown-benchmark.mdx | 462 +++++++++++++++++- 1 file changed, 456 insertions(+), 6 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx index c35e96718b1..621d31a6673 100644 --- a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx +++ b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx @@ -1,10 +1,460 @@ --- -slug: /zh/getting-started/example-datasets/brown-benchmark -sidebar_label: Brown University Benchmark -description: A new analytical benchmark for machine-generated log data -title: "Brown University Benchmark" +slug: /en/getting-started/example-datasets/brown-benchmark +sidebar_label: ๅธƒๆœ—ๅคงๅญฆๅŸบๅ‡† +description: ๆœบๅ™จ็”Ÿๆˆๆ—ฅๅฟ—ๆ•ฐๆฎ็š„ๆ–ฐๅˆ†ๆžๅŸบๅ‡† +title: "ๅธƒๆœ—ๅคงๅญฆๅŸบๅ‡†" --- -import Content from '@site/docs/en/getting-started/example-datasets/brown-benchmark.md'; +`MgBench` ๆ˜ฏๆœบๅ™จ็”Ÿๆˆ็š„ๆ—ฅๅฟ—ๆ•ฐๆฎ็š„ๆ–ฐๅˆ†ๆžๅŸบๅ‡†๏ผŒ[Andrew Crotty](http://cs.brown.edu/people/acrotty/)ใ€‚ - +ไธ‹่ฝฝๆ•ฐๆฎ๏ผš + +```bash +wget https://datasets.clickhouse.com/mgbench{1..3}.csv.xz +``` + +่งฃๅŽ‹ๆ•ฐๆฎ๏ผš + +```bash +xz -v -d mgbench{1..3}.csv.xz +``` + +ๅˆ›ๅปบๆ•ฐๆฎๅบ“ๅ’Œ่กจ๏ผš + +```sql +CREATE DATABASE mgbench; +``` + +```sql +USE mgbench; +``` + +```sql +CREATE TABLE mgbench.logs1 ( + log_time DateTime, + machine_name LowCardinality(String), + machine_group LowCardinality(String), + cpu_idle Nullable(Float32), + cpu_nice Nullable(Float32), + cpu_system Nullable(Float32), + cpu_user Nullable(Float32), + cpu_wio Nullable(Float32), + disk_free Nullable(Float32), + disk_total Nullable(Float32), + part_max_used Nullable(Float32), + load_fifteen Nullable(Float32), + load_five Nullable(Float32), + load_one Nullable(Float32), + mem_buffers Nullable(Float32), + mem_cached Nullable(Float32), + mem_free Nullable(Float32), + mem_shared Nullable(Float32), + swap_free Nullable(Float32), + bytes_in Nullable(Float32), + bytes_out Nullable(Float32) +) +ENGINE = MergeTree() +ORDER BY (machine_group, machine_name, log_time); +``` + + +```sql +CREATE TABLE mgbench.logs2 ( + log_time DateTime, + client_ip IPv4, + request String, + status_code UInt16, + object_size UInt64 +) +ENGINE = MergeTree() +ORDER BY log_time; +``` + + +```sql +CREATE TABLE mgbench.logs3 ( + log_time DateTime64, + device_id FixedString(15), + device_name LowCardinality(String), + device_type LowCardinality(String), + device_floor UInt8, + event_type LowCardinality(String), + event_unit FixedString(1), + event_value Nullable(Float32) +) +ENGINE = MergeTree() +ORDER BY (event_type, log_time); +``` + +ๆ’ๅ…ฅๆ•ฐๆฎ๏ผš + +``` +clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv +clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv +clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv +``` + +## ่ฟ่กŒๅŸบๅ‡†ๆŸฅ่ฏข๏ผš + +```sql +USE mgbench; +``` + +```sql +-- Q1.1: ่‡ชๅˆๅคœไปฅๆฅๆฏไธช Web ๆœๅŠกๅ™จ็š„ CPU/็ฝ‘็ปœๅˆฉ็”จ็Ž‡ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT machine_name, + MIN(cpu) AS cpu_min, + MAX(cpu) AS cpu_max, + AVG(cpu) AS cpu_avg, + MIN(net_in) AS net_in_min, + MAX(net_in) AS net_in_max, + AVG(net_in) AS net_in_avg, + MIN(net_out) AS net_out_min, + MAX(net_out) AS net_out_max, + AVG(net_out) AS net_out_avg +FROM ( + SELECT machine_name, + COALESCE(cpu_user, 0.0) AS cpu, + COALESCE(bytes_in, 0.0) AS net_in, + COALESCE(bytes_out, 0.0) AS net_out + FROM logs1 + WHERE machine_name IN ('anansi','aragog','urd') + AND log_time >= TIMESTAMP '2017-01-11 00:00:00' +) AS r +GROUP BY machine_name; +``` + + +```sql +-- Q1.2๏ผšๆœ€่ฟ‘ไธ€ๅคฉๆœ‰ๅ“ชไบ›ๆœบๆˆฟ็š„ๆœบๅ™จ็ฆป็บฟ๏ผŸ + +SELECT machine_name, + log_time +FROM logs1 +WHERE (machine_name LIKE 'cslab%' OR + machine_name LIKE 'mslab%') + AND load_one IS NULL + AND log_time >= TIMESTAMP '2017-01-10 00:00:00' +ORDER BY machine_name, + log_time; +``` + +```sql +-- Q1.3๏ผš็‰นๅฎšๅทฅไฝœ็ซ™่ฟ‡ๅŽป 10 ๅคฉ็š„ๆฏๅฐๆ—ถ็š„ๅนณๅ‡ๆŒ‡ๆ ‡ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT dt, + hr, + AVG(load_fifteen) AS load_fifteen_avg, + AVG(load_five) AS load_five_avg, + AVG(load_one) AS load_one_avg, + AVG(mem_free) AS mem_free_avg, + AVG(swap_free) AS swap_free_avg +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + load_fifteen, + load_five, + load_one, + mem_free, + swap_free + FROM logs1 + WHERE machine_name = 'babbage' + AND load_fifteen IS NOT NULL + AND load_five IS NOT NULL + AND load_one IS NOT NULL + AND mem_free IS NOT NULL + AND swap_free IS NOT NULL + AND log_time >= TIMESTAMP '2017-01-01 00:00:00' +) AS r +GROUP BY dt, + hr +ORDER BY dt, + hr; +``` + +```sql +-- Q1.4: 1 ไธชๆœˆๅ†…๏ผŒๆฏๅฐๆœๅŠกๅ™จ็š„็ฃ็›˜ I/O ้˜ปๅกž็š„้ข‘็Ž‡ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT machine_name, + COUNT(*) AS spikes +FROM logs1 +WHERE machine_group = 'Servers' + AND cpu_wio > 0.99 + AND log_time >= TIMESTAMP '2016-12-01 00:00:00' + AND log_time < TIMESTAMP '2017-01-01 00:00:00' +GROUP BY machine_name +ORDER BY spikes DESC +LIMIT 10; +``` + +```sql +-- Q1.5๏ผšๅ“ชไบ›ๅค–้ƒจๅฏ่ฎฟ้—ฎ็š„่™šๆ‹Ÿๆœบ็š„่ฟ่กŒๅ†…ๅญ˜ไธ่ถณ๏ผŸ + +SELECT machine_name, + dt, + MIN(mem_free) AS mem_free_min +FROM ( + SELECT machine_name, + CAST(log_time AS DATE) AS dt, + mem_free + FROM logs1 + WHERE machine_group = 'DMZ' + AND mem_free IS NOT NULL +) AS r +GROUP BY machine_name, + dt +HAVING MIN(mem_free) < 10000 +ORDER BY machine_name, + dt; +``` + +```sql +-- Q1.6: ๆฏๅฐๆ—ถๆ‰€ๆœ‰ๆ–‡ไปถๆœๅŠกๅ™จ็š„ๆ€ป็ฝ‘็ปœๆต้‡ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT dt, + hr, + SUM(net_in) AS net_in_sum, + SUM(net_out) AS net_out_sum, + SUM(net_in) + SUM(net_out) AS both_sum +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in, + COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out + FROM logs1 + WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon', + 'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey', + 'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps', + 'poprocks','razzles','runts','smarties','smuggler','spree','stride', + 'tootsie','trident','wrigley','york') +) AS r +GROUP BY dt, + hr +ORDER BY both_sum DESC +LIMIT 10; +``` + +```sql +-- Q2.1๏ผš่ฟ‡ๅŽป 2 ๅ‘จๅ†…ๅ“ชไบ›่ฏทๆฑ‚ๅฏผ่‡ดไบ†ๆœๅŠกๅ™จ้”™่ฏฏ๏ผŸ + +SELECT * +FROM logs2 +WHERE status_code >= 500 + AND log_time >= TIMESTAMP '2012-12-18 00:00:00' +ORDER BY log_time; +``` + +```sql +-- Q2.2๏ผšๅœจ็‰นๅฎš็š„ๆŸ 2 ๅ‘จๅ†…๏ผŒ็”จๆˆทๅฏ†็ ๆ–‡ไปถๆ˜ฏๅฆ่ขซๆณ„้œฒไบ†๏ผŸ + +SELECT * +FROM logs2 +WHERE status_code >= 200 + AND status_code < 300 + AND request LIKE '%/etc/passwd%' + AND log_time >= TIMESTAMP '2012-05-06 00:00:00' + AND log_time < TIMESTAMP '2012-05-20 00:00:00'; +``` + + +```sql +-- Q2.3๏ผš่ฟ‡ๅŽปไธ€ไธชๆœˆ้กถ็บง่ฏทๆฑ‚็š„ๅนณๅ‡่ทฏๅพ„ๆทฑๅบฆๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT top_level, + AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg +FROM ( + SELECT SUBSTRING(request FROM 1 FOR len) AS top_level, + request + FROM ( + SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len, + request + FROM logs2 + WHERE status_code >= 200 + AND status_code < 300 + AND log_time >= TIMESTAMP '2012-12-01 00:00:00' + ) AS r + WHERE len > 0 +) AS s +WHERE top_level IN ('/about','/courses','/degrees','/events', + '/grad','/industry','/news','/people', + '/publications','/research','/teaching','/ugrad') +GROUP BY top_level +ORDER BY top_level; +``` + + +```sql +-- Q2.4๏ผšๅœจ่ฟ‡ๅŽป็š„ 3 ไธชๆœˆ้‡Œ๏ผŒๅ“ชไบ›ๅฎขๆˆท็ซฏๅ‘ๅ‡บไบ†่ฟ‡ๅคš็š„่ฏทๆฑ‚๏ผŸ + +SELECT client_ip, + COUNT(*) AS num_requests +FROM logs2 +WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00' +GROUP BY client_ip +HAVING COUNT(*) >= 100000 +ORDER BY num_requests DESC; +``` + + +```sql +-- Q2.5๏ผšๆฏๅคฉ็š„็‹ฌ็ซ‹่ฎฟ้—ฎ่€…ๆ•ฐ้‡ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT dt, + COUNT(DISTINCT client_ip) +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + client_ip + FROM logs2 +) AS r +GROUP BY dt +ORDER BY dt; +``` + + +```sql +-- Q2.6๏ผšๅนณๅ‡ๅ’Œๆœ€ๅคงๆ•ฐๆฎไผ ่พ“้€Ÿ็Ž‡๏ผˆGbps๏ผ‰ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT AVG(transfer) / 125000000.0 AS transfer_avg, + MAX(transfer) / 125000000.0 AS transfer_max +FROM ( + SELECT log_time, + SUM(object_size) AS transfer + FROM logs2 + GROUP BY log_time +) AS r; +``` + + +```sql +-- Q3.1๏ผš่‡ช 2019/11/29 17:00 ไปฅๆฅ๏ผŒๅฎคๆธฉๆ˜ฏๅฆ่พพๅˆฐ่ฟ‡ๅ†ฐ็‚น๏ผŸ + +SELECT * +FROM logs3 +WHERE event_type = 'temperature' + AND event_value <= 32.0 + AND log_time >= '2019-11-29 17:00:00.000'; +``` + + +```sql +-- Q3.4๏ผšๅœจ่ฟ‡ๅŽป็š„ 6 ไธชๆœˆ้‡Œ๏ผŒๆฏๆ‰‡้—จๆ‰“ๅผ€็š„้ข‘็Ž‡ๆ˜ฏๅคšๅฐ‘๏ผŸ + +SELECT device_name, + device_floor, + COUNT(*) AS ct +FROM logs3 +WHERE event_type = 'door_open' + AND log_time >= '2019-06-01 00:00:00.000' +GROUP BY device_name, + device_floor +ORDER BY ct DESC; +``` + +ไธ‹้ข็š„ๆŸฅ่ฏข 3.5 ไฝฟ็”จไบ† UNION ๅ…ณ้”ฎ่ฏใ€‚่ฎพ็ฝฎ่ฏฅๆจกๅผไปฅไพฟ็ป„ๅˆ SELECT ็š„ๆŸฅ่ฏข็ป“ๆžœใ€‚่ฏฅ่ฎพ็ฝฎไป…ๅœจๆœชๆ˜Ž็กฎๆŒ‡ๅฎš UNION ALL ๆˆ– UNION DISTINCT ไฝ†ไฝฟ็”จไบ† UNION ่ฟ›่กŒๅ…ฑไบซๆ—ถไฝฟ็”จใ€‚ + +```sql +SET union_default_mode = 'DISTINCT' +``` + +```sql +-- Q3.5: ๅœจๅ†ฌๅญฃๅ’Œๅคๅญฃ๏ผŒๅปบ็ญ‘็‰ฉๅ†…ๅ“ชไบ›ๅœฐๆ–นไผšๅ‡บ็Žฐ่พƒๅคง็š„ๆธฉๅบฆๅ˜ๅŒ–๏ผŸ + +WITH temperature AS ( + SELECT dt, + device_name, + device_type, + device_floor + FROM ( + SELECT dt, + hr, + device_name, + device_type, + device_floor, + AVG(event_value) AS temperature_hourly_avg + FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + device_name, + device_type, + device_floor, + event_value + FROM logs3 + WHERE event_type = 'temperature' + ) AS r + GROUP BY dt, + hr, + device_name, + device_type, + device_floor + ) AS s + GROUP BY dt, + device_name, + device_type, + device_floor + HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0 +) +SELECT DISTINCT device_name, + device_type, + device_floor, + 'WINTER' +FROM temperature +WHERE dt >= DATE '2018-12-01' + AND dt < DATE '2019-03-01' +UNION +SELECT DISTINCT device_name, + device_type, + device_floor, + 'SUMMER' +FROM temperature +WHERE dt >= DATE '2019-06-01' + AND dt < DATE '2019-09-01'; +``` + + +```sql +-- Q3.6๏ผšๅฏนไบŽๆฏ็ง็ฑปๅˆซ็š„่ฎพๅค‡๏ผŒๆฏๆœˆ็š„ๅŠŸ่€—ๆŒ‡ๆ ‡ๆ˜ฏไป€ไนˆ๏ผŸ + +SELECT yr, + mo, + SUM(coffee_hourly_avg) AS coffee_monthly_sum, + AVG(coffee_hourly_avg) AS coffee_monthly_avg, + SUM(printer_hourly_avg) AS printer_monthly_sum, + AVG(printer_hourly_avg) AS printer_monthly_avg, + SUM(projector_hourly_avg) AS projector_monthly_sum, + AVG(projector_hourly_avg) AS projector_monthly_avg, + SUM(vending_hourly_avg) AS vending_monthly_sum, + AVG(vending_hourly_avg) AS vending_monthly_avg +FROM ( + SELECT dt, + yr, + mo, + hr, + AVG(coffee) AS coffee_hourly_avg, + AVG(printer) AS printer_hourly_avg, + AVG(projector) AS projector_hourly_avg, + AVG(vending) AS vending_hourly_avg + FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(YEAR FROM log_time) AS yr, + EXTRACT(MONTH FROM log_time) AS mo, + EXTRACT(HOUR FROM log_time) AS hr, + CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee, + CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer, + CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector, + CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending + FROM logs3 + WHERE device_type = 'meter' + ) AS r + GROUP BY dt, + yr, + mo, + hr +) AS s +GROUP BY yr, + mo +ORDER BY yr, + mo; +``` + +ๆญคๆ•ฐๆฎ้›†ๅฏๅœจ [Playground](https://play.clickhouse.com/play?user=play) ไธญ่ฟ›่กŒไบคไบ’ๅผ็š„่ฏทๆฑ‚, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). From 2376df7b984bb2767f4ef53cf92c5b041ca639d3 Mon Sep 17 00:00:00 2001 From: jferroal Date: Mon, 17 Oct 2022 09:27:05 +0800 Subject: [PATCH 265/266] Fix: page slug --- docs/zh/getting-started/example-datasets/brown-benchmark.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx index 621d31a6673..6db4982f50f 100644 --- a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx +++ b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx @@ -1,5 +1,5 @@ --- -slug: /en/getting-started/example-datasets/brown-benchmark +slug: /zh/getting-started/example-datasets/brown-benchmark sidebar_label: ๅธƒๆœ—ๅคงๅญฆๅŸบๅ‡† description: ๆœบๅ™จ็”Ÿๆˆๆ—ฅๅฟ—ๆ•ฐๆฎ็š„ๆ–ฐๅˆ†ๆžๅŸบๅ‡† title: "ๅธƒๆœ—ๅคงๅญฆๅŸบๅ‡†" From 9af817bb438c8ae0cd47655cb81a73bf9d3bd2bf Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 17 Oct 2022 20:25:31 +0800 Subject: [PATCH 266/266] Fix read from buffer with read in order (#42236) --- src/Storages/StorageBuffer.cpp | 10 ++++++++++ .../0_stateless/02459_read_in_order_bufer.reference | 5 +++++ .../0_stateless/02459_read_in_order_bufer.sql | 13 +++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/02459_read_in_order_bufer.reference create mode 100644 tests/queries/0_stateless/02459_read_in_order_bufer.sql diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 3fc00a79bbe..f6b397950ed 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -334,6 +336,14 @@ void StorageBuffer::read( pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, storage_snapshot)); pipe_from_buffers = Pipe::unitePipes(std::move(pipes_from_buffers)); + if (query_info.getInputOrderInfo()) + { + /// Each buffer has one block, and it not guaranteed that rows in each block are sorted by order keys + pipe_from_buffers.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, query_info.getInputOrderInfo()->sort_description_for_merging, 0); + }); + } } if (pipe_from_buffers.empty()) diff --git a/tests/queries/0_stateless/02459_read_in_order_bufer.reference b/tests/queries/0_stateless/02459_read_in_order_bufer.reference new file mode 100644 index 00000000000..b040bdf6167 --- /dev/null +++ b/tests/queries/0_stateless/02459_read_in_order_bufer.reference @@ -0,0 +1,5 @@ +9 +8 +7 +6 +5 diff --git a/tests/queries/0_stateless/02459_read_in_order_bufer.sql b/tests/queries/0_stateless/02459_read_in_order_bufer.sql new file mode 100644 index 00000000000..5a6e0a3dbc6 --- /dev/null +++ b/tests/queries/0_stateless/02459_read_in_order_bufer.sql @@ -0,0 +1,13 @@ +CREATE TABLE mytable_stored (`a` UInt8) ENGINE = MergeTree ORDER BY a; +CREATE TABLE mytable (`a` UInt8) ENGINE = Buffer(currentDatabase(), 'mytable_stored', 4, 600, 3600, 10, 100, 10000, 10000000); +INSERT INTO mytable VALUES (0); +INSERT INTO mytable VALUES (1); +INSERT INTO mytable VALUES (2); +INSERT INTO mytable VALUES (3); +INSERT INTO mytable VALUES (4); +INSERT INTO mytable VALUES (5); +INSERT INTO mytable VALUES (6); +INSERT INTO mytable VALUES (7); +INSERT INTO mytable VALUES (8); +INSERT INTO mytable VALUES (9); +SELECT a FROM mytable ORDER BY a DESC LIMIT 5;