From 1f85889f889bfa35c8c7bc27fade1762b20906dd Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 29 Mar 2024 18:53:03 -0400 Subject: [PATCH 001/141] FuzzQuery table function --- src/Client/ClientBase.h | 2 +- src/{Client => Common}/QueryFuzzer.cpp | 50 +++-- src/{Client => Common}/QueryFuzzer.h | 1 + src/Storages/StorageFuzzQuery.cpp | 174 ++++++++++++++++++ src/Storages/StorageFuzzQuery.h | 89 +++++++++ src/Storages/registerStorages.cpp | 2 + src/TableFunctions/TableFunctionFuzzQuery.cpp | 54 ++++++ src/TableFunctions/TableFunctionFuzzQuery.h | 42 +++++ src/TableFunctions/registerTableFunctions.cpp | 1 + src/TableFunctions/registerTableFunctions.h | 1 + .../03031_table_function_fuzzquery.reference | 11 ++ .../03031_table_function_fuzzquery.sql | 18 ++ 12 files changed, 426 insertions(+), 19 deletions(-) rename src/{Client => Common}/QueryFuzzer.cpp (97%) rename src/{Client => Common}/QueryFuzzer.h (99%) create mode 100644 src/Storages/StorageFuzzQuery.cpp create mode 100644 src/Storages/StorageFuzzQuery.h create mode 100644 src/TableFunctions/TableFunctionFuzzQuery.cpp create mode 100644 src/TableFunctions/TableFunctionFuzzQuery.h create mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.reference create mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.sql diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ec87ababfc..c0188253904 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,13 +6,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 97% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index 7be01686258..137d545f82f 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_output) + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_output) + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_output) + std::cerr << "inserted (0)\n"; } } @@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_output) + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_output) + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_output) + std::cerr << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_output) + std::cerr << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_output) + std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1360,11 +1370,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (debug_output) + { + std::cout << std::endl; + + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + std::cout << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 99% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..8a83934b620 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,6 +38,7 @@ struct ASTWindowDefinition; struct QueryFuzzer { pcg64 fuzz_rand{randomSeed()}; + bool debug_output = true; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..c29986c7a7a --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,174 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + for (size_t row_num = 0; row_num < block_size; ++row_num) + { + ASTPtr new_query = query->clone(); + fuzzer.fuzzMain(new_query); + + WriteBufferFromOwnString out; + formatAST(*new_query, out, false); + auto data = out.str(); + size_t data_len = data.size(); + + IColumn::Offset next_offset = offset + data_len + 1; + data_to.resize(next_offset); + + std::copy(data.begin(), data.end(), &data_to[offset]); + + data_to[offset + data_len] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, 0); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +static constexpr std::array optional_configuration_keys = {"query_str", "random_seed"}; + +void StorageFuzzQuery::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) +{ + validateNamedCollection( + collection, + std::unordered_set(), + std::unordered_set(optional_configuration_keys.begin(), optional_configuration_keys.end())); + + if (collection.has("query")) + configuration.query = collection.get("query"); + + if (collection.has("random_seed")) + configuration.random_seed = collection.get("random_seed"); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + { + StorageFuzzQuery::processNamedCollectionResult(configuration, *named_collection); + } + else + { + // Supported signatures: + // + // FuzzQuery('query') + // FuzzQuery('query', 'random_seed') + if (engine_args.empty() || engine_args.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() == 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + } + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..47142a81f16 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query = ""; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + { + fuzzer.fuzz_rand = config_.random_seed; + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index dea9feaf28b..f66d3ec3bfc 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -25,6 +25,7 @@ void registerStorageLiveView(StorageFactory & factory); void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -126,6 +127,7 @@ void registerStorages() registerStorageGenerateRandom(factory); registerStorageExecutable(factory); registerStorageWindowView(factory); + registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 927457ff9f6..2952d0b7b70 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -25,6 +25,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 296af146faf..eef262490bf 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -22,6 +22,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference new file mode 100644 index 00000000000..d598037127f --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -0,0 +1,11 @@ +SELECT 1 +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT\n item_id,\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], toNullable(\'Array(String)\')) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(materialize(toLowCardinality(3)))\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString(count(), (number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC NULLS FIRST,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n [toString((number % 2) * 2)],\n CAST([toString(number % 2)], toNullable(\'Array(LowCardinality(String))\')) AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE (number % 2) * toUInt128(2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC +SELECT\n toString((number % 2) * 2),\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql new file mode 100644 index 00000000000..5f5bb4b23e4 --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -0,0 +1,18 @@ + +SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 1; + +SELECT * FROM fuzzQuery('SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; +', 8956) LIMIT 10; From 77e28e29e9b2550204ea8d60647115587f36673e Mon Sep 17 00:00:00 2001 From: pufit Date: Sat, 30 Mar 2024 14:40:18 -0400 Subject: [PATCH 002/141] fix style --- src/Storages/StorageFuzzQuery.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index c29986c7a7a..56b6a4de2a6 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -19,8 +19,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; - extern const int INCORRECT_DATA; } ColumnPtr FuzzQuerySource::createColumn() From 6e611f7e81fe9977dd88f81cc4c24f3e1620ae8d Mon Sep 17 00:00:00 2001 From: pufit Date: Mon, 8 Apr 2024 16:18:29 +0200 Subject: [PATCH 003/141] fix review --- .../table-functions/fuzzQuery.md | 35 +++++++++ src/Common/QueryFuzzer.cpp | 40 +++++----- src/Common/QueryFuzzer.h | 37 ++++++--- src/Storages/StorageFuzzQuery.cpp | 75 +++++++++---------- src/Storages/StorageFuzzQuery.h | 4 +- .../03031_table_function_fuzzquery.reference | 18 ++--- 6 files changed, 124 insertions(+), 85 deletions(-) create mode 100644 docs/en/sql-reference/table-functions/fuzzQuery.md diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..ff8cfd1cd3b --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,35 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, random_seed]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/src/Common/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp index 137d545f82f..6dd51033e3c 100644 --- a/src/Common/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -164,8 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_output) - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,14 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_output) - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_output) - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -200,8 +200,8 @@ Field QueryFuzzer::fuzzField(Field field) size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_output) - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -211,15 +211,15 @@ Field QueryFuzzer::fuzzField(Field field) size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_output) - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_output) - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -352,8 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - if (debug_output) - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -387,8 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - if (debug_output) - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1370,14 +1370,14 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - if (debug_output) + if (out_stream) { - std::cout << std::endl; + *out_stream << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); + WriteBufferFromOStream ast_buf(*out_stream, 4096); formatAST(*ast, ast_buf, false /*highlight*/); ast_buf.finalize(); - std::cout << std::endl << std::endl; + *out_stream << std::endl << std::endl; } } diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index 8a83934b620..3cf0381e044 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,10 +35,32 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; - bool debug_output = true; +public: + + QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = &std::cout, std::ostream * debug_stream_ = &std::cerr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -67,10 +89,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -78,9 +96,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -103,8 +118,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 56b6a4de2a6..e2b836c98b9 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -30,16 +30,29 @@ ColumnPtr FuzzQuerySource::createColumn() offsets_to.resize(block_size); IColumn::Offset offset = 0; - for (size_t row_num = 0; row_num < block_size; ++row_num) + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) { - ASTPtr new_query = query->clone(); + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); WriteBufferFromOwnString out; formatAST(*new_query, out, false); auto data = out.str(); size_t data_len = data.size(); + /// AST is too long, will start from the original query. + if (data_len > 500) + { + fuzz_base = query; + continue; + } + IColumn::Offset next_offset = offset + data_len + 1; data_to.resize(next_offset); @@ -49,6 +62,8 @@ ColumnPtr FuzzQuerySource::createColumn() offsets_to[row_num] = next_offset; offset = next_offset; + fuzz_base = new_query; + ++row_num; } return column; @@ -99,52 +114,30 @@ Pipe StorageFuzzQuery::read( return Pipe::unitePipes(std::move(pipes)); } -static constexpr std::array optional_configuration_keys = {"query_str", "random_seed"}; - -void StorageFuzzQuery::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection( - collection, - std::unordered_set(), - std::unordered_set(optional_configuration_keys.begin(), optional_configuration_keys.end())); - - if (collection.has("query")) - configuration.query = collection.get("query"); - - if (collection.has("random_seed")) - configuration.random_seed = collection.get("random_seed"); -} - StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) { StorageFuzzQuery::Configuration configuration{}; - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + // Supported signatures: + // + // FuzzQuery('query') + // FuzzQuery('query', 'random_seed') + if (engine_args.empty() || engine_args.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() == 2) { - StorageFuzzQuery::processNamedCollectionResult(configuration, *named_collection); + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } - else - { - // Supported signatures: - // - // FuzzQuery('query') - // FuzzQuery('query', 'random_seed') - if (engine_args.empty() || engine_args.size() > 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); - configuration.query = std::move(first_arg); - - if (engine_args.size() == 2) - { - const auto & literal = engine_args[1]->as(); - if (!literal.value.isNull()) - configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); - } - } return configuration; } diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 47142a81f16..e948d0b2acf 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -35,8 +35,6 @@ public: size_t max_block_size, size_t num_streams) override; - static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); - static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); private: @@ -54,8 +52,8 @@ public: , block_header(std::move(block_header_)) , config(config_) , query(query_) + , fuzzer(config_.random_seed, /* out_stream= */ nullptr, /* debug_stream= */ nullptr) { - fuzzer.fuzz_rand = config_.random_seed; } String getName() const override { return "FuzzQuery"; } diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference index d598037127f..c5b92291207 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -1,11 +1,11 @@ SELECT 1 SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC -SELECT\n item_id,\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], toNullable(\'Array(String)\')) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(materialize(toLowCardinality(3)))\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString(count(), (number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC NULLS FIRST,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n [toString((number % 2) * 2)],\n CAST([toString(number % 2)], toNullable(\'Array(LowCardinality(String))\')) AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE (number % 2) * toUInt128(2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC -SELECT\n toString((number % 2) * 2),\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(materialize(toLowCardinality(3)))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC NULLS LAST +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC NULLS FIRST +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString(multiply(number % 2, item_id, 2))]) AS item_id\n FROM numbers(3)\n WHERE \'Array(LowCardinality(String))\'\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 DESC NULLS FIRST +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC From 4e1f98ee7b78d48362a6788109658c8de859abd5 Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 14:53:28 +0200 Subject: [PATCH 004/141] removed cout,cerr from src --- src/Client/ClientBase.h | 2 +- src/Common/QueryFuzzer.h | 3 +-- src/Storages/StorageFuzzQuery.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index c0188253904..2b05878c176 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -298,7 +298,7 @@ protected: bool send_external_tables = false; NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. - QueryFuzzer fuzzer; + QueryFuzzer fuzzer{randomSeed(), &std::cout, &std::cerr}; int query_fuzzer_runs = 0; int create_query_fuzzer_runs = 0; diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index 3cf0381e044..bf87bdfb24e 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,8 +38,7 @@ struct ASTWindowDefinition; class QueryFuzzer { public: - - QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = &std::cout, std::ostream * debug_stream_ = &std::cerr) + QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) : fuzz_rand(fuzz_rand_) , out_stream(out_stream_) , debug_stream(debug_stream_) diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index e948d0b2acf..40833190895 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -52,7 +52,7 @@ public: , block_header(std::move(block_header_)) , config(config_) , query(query_) - , fuzzer(config_.random_seed, /* out_stream= */ nullptr, /* debug_stream= */ nullptr) + , fuzzer(config_.random_seed) { } From 3bbf86d34506b74a92ca53e857d4c35b9406a08b Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 14:54:58 +0200 Subject: [PATCH 005/141] update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ee3ef1ae795..9d9bea11b82 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1581,6 +1581,7 @@ fuzzBits fuzzJSON fuzzer fuzzers +fuzzQuery gRPC gccMurmurHash gcem From 6329dc812462363ff6edf0388239f6bead2efacd Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 15:05:49 +0200 Subject: [PATCH 006/141] fix --- programs/client/Client.h | 5 ++++- src/Client/ClientBase.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/client/Client.h b/programs/client/Client.h index 11d9dec97b1..122b8e5ab3f 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,7 +9,10 @@ namespace DB class Client : public ClientBase { public: - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 2b05878c176..c0188253904 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -298,7 +298,7 @@ protected: bool send_external_tables = false; NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. - QueryFuzzer fuzzer{randomSeed(), &std::cout, &std::cerr}; + QueryFuzzer fuzzer; int query_fuzzer_runs = 0; int create_query_fuzzer_runs = 0; From 76415ba3523921de138feb39fca7fbc65b8f6dc5 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 10 Apr 2024 15:27:53 +0200 Subject: [PATCH 007/141] add `explicit`, more stable tests --- src/Common/QueryFuzzer.h | 2 +- .../03031_table_function_fuzzquery.reference | 13 ++----------- .../0_stateless/03031_table_function_fuzzquery.sql | 4 ++-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index bf87bdfb24e..35d088809f2 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,7 +38,7 @@ struct ASTWindowDefinition; class QueryFuzzer { public: - QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) : fuzz_rand(fuzz_rand_) , out_stream(out_stream_) , debug_stream(debug_stream_) diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference index c5b92291207..202e4557a33 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -1,11 +1,2 @@ -SELECT 1 -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(materialize(toLowCardinality(3)))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC NULLS LAST -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC NULLS FIRST -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString(multiply(number % 2, item_id, 2))]) AS item_id\n FROM numbers(3)\n WHERE \'Array(LowCardinality(String))\'\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 DESC NULLS FIRST -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +query +String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql index 5f5bb4b23e4..5821e2e5111 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -1,5 +1,5 @@ -SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 1; +SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; SELECT * FROM fuzzQuery('SELECT * FROM ( @@ -15,4 +15,4 @@ FROM ( ) AS r ON l.item_id = r.item_id ORDER BY 1,2,3; -', 8956) LIMIT 10; +', 8956) LIMIT 10 FORMAT NULL; From 4df3cf3b151f407c4daaa7f8df80a6fddb836280 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 11 Apr 2024 16:57:45 +0200 Subject: [PATCH 008/141] Update StorageFuzzQuery.h --- src/Storages/StorageFuzzQuery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 40833190895..3ae506fdfb8 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -17,7 +17,7 @@ class StorageFuzzQuery final : public IStorage public: struct Configuration : public StatelessTableEngineConfiguration { - String query = ""; + String query; UInt64 random_seed = randomSeed(); }; From 562d76ccab6ffc53c9059139fead3b2e41ff4725 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 12 Apr 2024 11:19:44 +0200 Subject: [PATCH 009/141] Update StorageFuzzQuery.cpp --- src/Storages/StorageFuzzQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index e2b836c98b9..5e29a04427b 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -105,7 +105,7 @@ Pipe StorageFuzzQuery::read( const char * begin = config.query.data(); const char * end = begin + config.query.size(); - ParserQuery parser(end, 0); + ParserQuery parser(end, false); auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (UInt64 i = 0; i < num_streams; ++i) From 9ed3acce8223485798b22bcffcd7bd8d595cd025 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 10 May 2024 18:22:23 +0000 Subject: [PATCH 010/141] refactoring near azure blob storage --- src/Backups/BackupIO_AzureBlobStorage.cpp | 96 +++--- src/Backups/BackupIO_AzureBlobStorage.h | 15 +- .../registerBackupEngineAzureBlobStorage.cpp | 66 ++-- src/Core/Settings.h | 31 +- .../IO/WriteBufferFromAzureBlobStorage.cpp | 4 +- .../IO/WriteBufferFromAzureBlobStorage.h | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.h | 58 ---- ...ageAuth.cpp => AzureBlobStorageCommon.cpp} | 276 +++++++++------- .../AzureBlobStorage/AzureBlobStorageCommon.h | 139 ++++++++ .../AzureBlobStorage/AzureObjectStorage.cpp | 37 ++- .../AzureBlobStorage/AzureObjectStorage.h | 75 +---- .../Cached/CachedObjectStorage.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- .../ObjectStorages/ObjectStorageFactory.cpp | 17 +- .../copyAzureBlobStorageFile.cpp | 9 +- .../copyAzureBlobStorageFile.h | 4 +- src/Storages/StorageAzureBlob.cpp | 308 +++++------------- src/Storages/StorageAzureBlob.h | 26 +- src/Storages/StorageAzureBlobCluster.cpp | 5 +- .../TableFunctionAzureBlobStorage.cpp | 65 ++-- .../TableFunctionAzureBlobStorageCluster.cpp | 17 +- 21 files changed, 604 insertions(+), 650 deletions(-) delete mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h rename src/Disks/ObjectStorages/AzureBlobStorage/{AzureBlobStorageAuth.cpp => AzureBlobStorageCommon.cpp} (53%) create mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index a3998431674..3f60ed5c0b4 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -30,21 +31,21 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.endpoint.container_name, false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); - + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); object_storage = std::make_unique("BackupReaderAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + connection_params.endpoint.container_name); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } @@ -53,20 +54,20 @@ BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -81,23 +82,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, && (destination_data_source_description.is_encrypted == encrypted_in_backup)) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); - auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t + auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional &) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. - if (blob_path.size() != 2 || mode != WriteMode::Rewrite) + if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite) throw Exception(ErrorCodes::LOGICAL_ERROR, "Blob writing function called with unexpected blob_path.size={} or mode={}", - blob_path.size(), mode); + dst_blob_path.size(), mode); copyAzureBlobStorageFile( client, destination_disk->getObjectStorage()->getAzureBlobStorageClient(), - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, 0, file_size, - /* dest_container */ blob_path[1], - /* dest_path */ blob_path[0], + /* dest_container */ dst_blob_path[1], + /* dest_path */ dst_blob_path[0], settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure")); @@ -115,22 +116,25 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.endpoint.container_name, false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + if (!attempt_to_create_container) + connection_params.endpoint.container_already_exists = true; + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + connection_params.endpoint.container_name); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } @@ -144,18 +148,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. - if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) + if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2) { LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); copyAzureBlobStorageFile( src_disk->getObjectStorage()->getAzureBlobStorageClient(), client, - /* src_container */ blob_path[1], - /* src_path */ blob_path[0], + /* src_container */ src_blob_path[1], + /* src_path */ src_blob_path[0], start_pos, length, - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); @@ -173,11 +177,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St copyAzureBlobStorageFile( client, client, - configuration.container, - fs::path(configuration.blob_path)/ source, + connection_params.endpoint.container_name, + fs::path(blob_path)/ source, 0, size, - /* dest_container */ configuration.container, + /* dest_container */ connection_params.endpoint.container_name, /* dest_path */ destination, settings, read_settings, @@ -186,21 +190,29 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) { - copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + copyDataToAzureBlobStorageFile( + create_read_buffer, + start_pos, + length, + client, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), + "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -210,7 +222,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -218,7 +230,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, @@ -230,7 +242,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -239,7 +251,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -249,7 +261,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index f0b9aace4d4..0829c3258c9 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,5 +1,6 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -16,7 +17,7 @@ namespace DB class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: - BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderAzureBlobStorage(const AzureBlobStorage::ConnectionParams & connection_params_, const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -29,15 +30,16 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); + BackupWriterAzureBlobStorage(const AzureBlobStorage::ConnectionParams & connection_params_, const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -58,9 +60,10 @@ private: void removeFilesBatch(const Strings & file_names); const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; } diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 1b9545fc455..6974d16e2f6 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -1,3 +1,4 @@ +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #include @@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureBlob::Configuration configuration; + String blob_path; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); if (!id_arg.empty()) { @@ -59,54 +62,41 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (config.has(config_prefix + ".connection_string")) + connection_params = { - configuration.connection_url = config.getString(config_prefix + ".connection_string"); - configuration.is_connection_string = true; - configuration.container = config.getString(config_prefix + ".container"); - } - else - { - configuration.connection_url = config.getString(config_prefix + ".storage_account_url"); - configuration.is_connection_string = false; - configuration.container = config.getString(config_prefix + ".container"); - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - - if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key")) - { - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - } - } + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true), + }; if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.blob_path = args[0].safeGet(); - + blob_path = args[0].safeGet(); } else { if (args.size() == 3) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = !configuration.connection_url.starts_with("http"); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else if (args.size() == 5) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = false; + connection_params.endpoint.storage_account_url = args[0].safeGet(); + connection_params.endpoint.container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); - configuration.account_name = args[3].safeGet(); - configuration.account_key = args[4].safeGet(); + auto account_name = args[3].safeGet(); + auto account_key = args[4].safeGet(); + connection_params.auth_method = std::make_shared(account_name, account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else { @@ -116,12 +106,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.blob_path)) + if (hasRegisteredArchiveFileExtension(blob_path)) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - archive_params.archive_name = removeFileNameFromURL(configuration.blob_path); + archive_params.archive_name = removeFileNameFromURL(blob_path); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; @@ -135,7 +125,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(configuration, + auto reader = std::make_shared( + connection_params, + blob_path, params.read_settings, params.write_settings, params.context); @@ -150,7 +142,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } else { - auto writer = std::make_shared(configuration, + auto writer = std::make_shared( + connection_params, + blob_path, params.read_settings, params.write_settings, params.context, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 04029983d84..e09fef794d7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -79,23 +79,13 @@ class IColumn; M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ - M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ - M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ - M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ - M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ - M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ - M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ - M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ - M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ - M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ - M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ @@ -105,20 +95,33 @@ class IColumn; M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ - M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ - M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \ - M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ - M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \ + M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ + M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ + M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ + M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ + M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ + M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ + M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 2c90e3a9003..cadae33e23e 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -26,7 +26,7 @@ struct WriteBufferFromAzureBlobStorage::PartData std::string block_id; }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureBlobStorage::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -44,7 +44,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 96ba6acefff..f47ba92beab 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -35,7 +35,7 @@ public: const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h deleted file mode 100644 index e4775a053c1..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -namespace DB -{ - -struct AzureBlobStorageEndpoint -{ - const String storage_account_url; - const String account_name; - const String container_name; - const String prefix; - const std::optional container_already_exists; - - String getEndpoint() - { - String url = storage_account_url; - if (url.ends_with('/')) - url.pop_back(); - - if (!account_name.empty()) - url += "/" + account_name; - - if (!container_name.empty()) - url += "/" + container_name; - - if (!prefix.empty()) - url += "/" + prefix; - - return url; - } - - String getEndpointWithoutContainer() - { - String url = storage_account_url; - - if (!account_name.empty()) - url += "/" + account_name; - - return url; - } -}; - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp similarity index 53% rename from src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index a535b007541..76054efff19 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -1,4 +1,8 @@ -#include +#include +#include +#include +#include +#include #if USE_AZURE_BLOB_STORAGE @@ -7,13 +11,9 @@ #include #include #include -#include #include #include -using namespace Azure::Storage::Blobs; - - namespace DB { @@ -22,8 +22,10 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace AzureBlobStorage +{ -void validateStorageAccountUrl(const String & storage_account_url) +static void validateStorageAccountUrl(const String & storage_account_url) { const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); @@ -33,8 +35,7 @@ void validateStorageAccountUrl(const String & storage_account_url) "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); } - -void validateContainerName(const String & container_name) +static void validateContainerName(const String & container_name) { auto len = container_name.length(); if (len < 3 || len > 64) @@ -50,13 +51,51 @@ void validateContainerName(const String & container_name) container_name_pattern_str, container_name); } +static bool isConnectionString(const std::string & candidate) +{ + return !candidate.starts_with("http"); +} -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +String ConnectionParams::getConnectionURL() const +{ + if (std::holds_alternative(auth_method)) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(endpoint.storage_account_url); + return parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl(); + } + + return endpoint.storage_account_url; +} + +std::unique_ptr ConnectionParams::createForService() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ServiceClient::CreateFromConnectionString(auth.toUnderType(), client_options)); + else + return std::make_unique(endpoint.getEndpointWithoutContainer(), auth, client_options); + }, auth_method); +} + +std::unique_ptr ConnectionParams::createForContainer() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ContainerClient::CreateFromConnectionString(auth.toUnderType(), endpoint.container_name, client_options)); + else + return std::make_unique(endpoint.getEndpoint(), auth, client_options); + }, auth_method); +} + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { String storage_url; String account_name; String container_name; String prefix; + if (config.has(config_prefix + ".endpoint")) { String endpoint = config.getString(config_prefix + ".endpoint"); @@ -71,48 +110,48 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr if (endpoint_contains_account_name) { - size_t acc_pos_begin = endpoint.find('/', pos+2); + size_t acc_pos_begin = endpoint.find('/', pos + 2); if (acc_pos_begin == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); - storage_url = endpoint.substr(0,acc_pos_begin); - size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1); + storage_url = endpoint.substr(0, acc_pos_begin); + size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1); if (acc_pos_end == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1); + account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1); - size_t cont_pos_end = endpoint.find('/', acc_pos_end+1); + size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1); if (cont_pos_end != std::string::npos) { - container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1); - prefix = endpoint.substr(cont_pos_end+1); + container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1); + prefix = endpoint.substr(cont_pos_end + 1); } else { - container_name = endpoint.substr(acc_pos_end+1); + container_name = endpoint.substr(acc_pos_end + 1); } } else { - size_t cont_pos_begin = endpoint.find('/', pos+2); + size_t cont_pos_begin = endpoint.find('/', pos + 2); if (cont_pos_begin == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - storage_url = endpoint.substr(0,cont_pos_begin); - size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1); + storage_url = endpoint.substr(0, cont_pos_begin); + size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1); if (cont_pos_end != std::string::npos) { - container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1); - prefix = endpoint.substr(cont_pos_end+1); + container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1); + prefix = endpoint.substr(cont_pos_end + 1); } else { - container_name = endpoint.substr(cont_pos_begin+1); + container_name = endpoint.substr(cont_pos_begin + 1); } } } @@ -132,122 +171,117 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr if (!container_name.empty()) validateContainerName(container_name); + std::optional container_already_exists {}; if (config.has(config_prefix + ".container_already_exists")) container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; - return {storage_url, account_name, container_name, prefix, container_already_exists}; + + return {storage_url, account_name, container_name, prefix, "", container_already_exists}; } - -template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method) { - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); -} + endpoint.container_name = container_name; -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); -} - -template -std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, - const String & container_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Azure::Storage::Blobs::BlobClientOptions & client_options) -{ - std::string connection_str; - if (config.has(config_prefix + ".connection_string")) - connection_str = config.getString(config_prefix + ".connection_string"); - - if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name, client_options); - - if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + if (isConnectionString(url)) { - auto storage_shared_key_credential = std::make_shared( - config.getString(config_prefix + ".account_name"), - config.getString(config_prefix + ".account_key") - ); - return std::make_unique(url, storage_shared_key_credential, client_options); + endpoint.storage_account_url = url; + auth_method = ConnectionString{url}; + return; } - if (config.getBool(config_prefix + ".use_workload_identity", false)) - { - auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential); - } + size_t pos = url.find('?'); - auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential, client_options); + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + endpoint.storage_account_url = url; + auth_method = std::make_shared(); + } + else + { + endpoint.storage_account_url = url.substr(0, pos); + endpoint.sas_auth = url.substr(pos + 1); + auth_method = std::make_shared(); + } } -Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly) { - Azure::Core::Http::Policies::RetryOptions retry_options; - retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); - retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); - - using CurlOptions = Azure::Core::Http::CurlTransportOptions; - CurlOptions curl_options; - curl_options.NoSignal = true; - - if (config.has(config_prefix + ".curl_ip_resolve")) - { - auto value = config.getString(config_prefix + ".curl_ip_resolve"); - if (value == "ipv4") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; - else if (value == "ipv6") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); - } - - Azure::Storage::Blobs::BlobClientOptions client_options; - client_options.Retry = retry_options; - client_options.Transport.Transport = std::make_shared(curl_options); - - client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}; - - return client_options; -} - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - auto container_name = endpoint.container_name; - auto final_url = endpoint.getEndpoint(); - auto client_options = getAzureBlobClientOptions(config, config_prefix); - - if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - - auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); + if (params.endpoint.container_already_exists.value_or(false) || readonly) + return params.createForContainer(); try { - return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); + auto service_client = params.createForService(); + return std::make_unique(service_client->CreateBlobContainer(params.endpoint.container_name).Value); } catch (const Azure::Storage::StorageException & e) { /// If container_already_exists is not set (in config), ignore already exists error. /// (Conflict - The specified container already exists) - if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); + if (!params.endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + return params.createForContainer(); throw; } } -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - std::unique_ptr settings = std::make_unique(); + if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + { + return std::make_shared( + config.getString(config_prefix + ".account_name"), + config.getString(config_prefix + ".account_key") + ); + } + + if (config.has(config_prefix + ".connection_string")) + return ConnectionString{config.getString(config_prefix + ".connection_string")}; + + if (config.getBool(config_prefix + ".use_workload_identity", false)) + return std::make_shared(); + + return std::make_shared(); +} + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = static_cast(settings.sdk_max_retries); + retry_options.RetryDelay = std::chrono::milliseconds(settings.sdk_retry_initial_backoff_ms); + retry_options.MaxRetryDelay = std::chrono::milliseconds(settings.sdk_retry_max_backoff_ms); + + Azure::Core::Http::CurlTransportOptions curl_options; + curl_options.NoSignal = true; + curl_options.IPResolve = settings.curl_ip_resolve; + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=for_disk}; + + return client_options; +} + +std::unique_ptr getRequestSettings(const Settings & query_settings) +{ + auto settings_ptr = std::make_unique(); + + settings_ptr->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings_ptr->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings_ptr->list_object_keys_size = static_cast(query_settings.azure_list_object_keys_size); + settings_ptr->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings_ptr->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings_ptr->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + + return settings_ptr; +} + +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + auto settings = std::make_unique(); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); @@ -262,10 +296,28 @@ std::unique_ptr getAzureBlobStorageSettings(const Po settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); + settings->sdk_max_retries = config.getUInt(config_prefix + ".max_tries", 10); + settings->sdk_retry_initial_backoff_ms = config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10); + settings->sdk_retry_max_backoff_ms = config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000); + + if (config.has(config_prefix + ".curl_ip_resolve")) + { + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } return settings; } } +} + #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h new file mode 100644 index 00000000000..7e716adf4d0 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include "base/strong_typedef.h" +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace AzureBlobStorage +{ + +using ServiceClient = Azure::Storage::Blobs::BlobServiceClient; +using ContainerClient = Azure::Storage::Blobs::BlobContainerClient; +using BlobClient = Azure::Storage::Blobs::BlobClient; +using BlobClientOptions = Azure::Storage::Blobs::BlobClientOptions; + +struct RequestSettings +{ + RequestSettings() = default; + + size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + uint64_t min_bytes_for_seek = 1024 * 1024; + size_t max_single_read_retries = 3; + size_t max_single_download_retries = 3; + int list_object_keys_size = 1000; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t max_single_part_copy_size = 256 * 1024 * 1024; + bool use_native_copy = false; + size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t sdk_max_retries = 10; + size_t sdk_retry_initial_backoff_ms = 10; + size_t sdk_retry_max_backoff_ms = 1000; + + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; +}; + +struct Endpoint +{ + String storage_account_url; + String account_name; + String container_name; + String prefix; + String sas_auth; + std::optional container_already_exists; + + String getEndpoint() const + { + String url = storage_account_url; + if (url.ends_with('/')) + url.pop_back(); + + if (!account_name.empty()) + url += "/" + account_name; + + if (!container_name.empty()) + url += "/" + container_name; + + if (!prefix.empty()) + url += "/" + prefix; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } + + String getEndpointWithoutContainer() const + { + String url = storage_account_url; + + if (!account_name.empty()) + url += "/" + account_name; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } +}; + +using ConnectionString = StrongTypedef; + +using AuthMethod = std::variant< + ConnectionString, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + +struct ConnectionParams +{ + Endpoint endpoint; + AuthMethod auth_method; + BlobClientOptions client_options; + + String getContainer() const { return endpoint.container_name; } + String getConnectionURL() const; + + std::unique_ptr createForService() const; + std::unique_ptr createForContainer() const; +}; + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method); + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly); + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 36225b13ee8..d0f39beb3ca 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -105,7 +105,7 @@ private: AzureObjectStorage::AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_) : name(name_) @@ -397,20 +397,37 @@ void AzureObjectStorage::copyObject( /// NOLINT void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + settings.set(std::move(new_settings)); - /// We don't update client + client.set(std::move(new_client)); } std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - return std::make_unique( - name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace - ); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + return std::make_unique(name, std::move(new_client), std::move(new_settings), object_namespace); } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f52ab803012..d8440453852 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,4 +1,5 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -7,6 +8,7 @@ #include #include #include +#include namespace Poco { @@ -16,70 +18,15 @@ class Logger; namespace DB { -struct AzureObjectStorageSettings -{ - AzureObjectStorageSettings( - uint64_t max_single_part_upload_size_, - uint64_t min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int list_object_keys_size_, - size_t min_upload_part_size_, - size_t max_upload_part_size_, - size_t max_single_part_copy_size_, - bool use_native_copy_, - size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_, - size_t strict_upload_part_size_, - size_t upload_part_size_multiply_factor_, - size_t upload_part_size_multiply_parts_count_threshold_) - : max_single_part_upload_size(max_single_part_upload_size_) - , min_bytes_for_seek(min_bytes_for_seek_) - , max_single_read_retries(max_single_read_retries_) - , max_single_download_retries(max_single_download_retries_) - , list_object_keys_size(list_object_keys_size_) - , min_upload_part_size(min_upload_part_size_) - , max_upload_part_size(max_upload_part_size_) - , max_single_part_copy_size(max_single_part_copy_size_) - , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) - , strict_upload_part_size(strict_upload_part_size_) - , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) - , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) - { - } - - AzureObjectStorageSettings() = default; - - size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; - size_t max_single_read_retries = 3; - size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; - size_t max_unexpected_write_error_retries = 4; - size_t max_inflight_parts_for_one_file = 20; - size_t strict_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; -}; - -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - class AzureObjectStorage : public IObjectStorage { public: - - using SettingsPtr = std::unique_ptr; + using ClientPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_); @@ -156,12 +103,8 @@ public: bool isRemote() const override { return true; } - std::shared_ptr getSettings() { return settings.get(); } - - std::shared_ptr getAzureBlobStorageClient() override - { - return client.get(); - } + std::shared_ptr getSettings() const { return settings.get(); } + std::shared_ptr getAzureBlobStorageClient() const override{ return client.get(); } private: using SharedAzureClientPtr = std::shared_ptr; @@ -169,8 +112,8 @@ private: const String name; /// client used to access the files in the Blob Storage cloud - MultiVersion client; - MultiVersion settings; + MultiVersion client; + MultiVersion settings; const String object_namespace; /// container + prefix LoggerPtr log; diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 961c2709efc..60818933dec 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -120,7 +120,7 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } #if USE_AZURE_BLOB_STORAGE - std::shared_ptr getAzureBlobStorageClient() override + std::shared_ptr getAzureBlobStorageClient() const override { return object_storage->getAzureBlobStorageClient(); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index eae31af9d44..27a58053752 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -238,7 +238,7 @@ public: virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } #if USE_AZURE_BLOB_STORAGE - virtual std::shared_ptr getAzureBlobStorageClient() + virtual std::shared_ptr getAzureBlobStorageClient() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage"); } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 7b949db268b..f8c1c564191 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -13,7 +13,7 @@ #endif #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include -#include +#include #endif #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include @@ -293,12 +293,19 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) const ContextPtr & context, bool /* skip_access_check */) -> ObjectStoragePtr { - AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + auto azure_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*azure_settings, /*for_disk=*/ true), + }; + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); + AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), + params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix); }; factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..d648796b5df 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -44,7 +44,7 @@ namespace size_t total_size_, const String & dest_container_for_logging_, const String & dest_blob_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, const Poco::Logger * log_) : create_read_buffer(create_read_buffer_) @@ -69,7 +69,7 @@ namespace size_t total_size; const String & dest_container_for_logging; const String & dest_blob; - std::shared_ptr settings; + std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; const Poco::Logger * log; size_t max_single_part_upload_size; @@ -265,7 +265,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr dest_client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; @@ -282,11 +282,10 @@ void copyAzureBlobStorageFile( size_t size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { ProfileEvents::increment(ProfileEvents::AzureCopyObject); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 6ad54923ab5..73b91191b96 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -29,7 +29,7 @@ void copyAzureBlobStorageFile( size_t src_size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); @@ -46,7 +46,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index f2e2833dad4..d8fd5cbf05a 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,4 +1,6 @@ +#include #include +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #if USE_AZURE_BLOB_STORAGE #include @@ -95,43 +97,62 @@ const std::unordered_set optional_configuration_keys = { "storage_account_url", }; -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); } -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) +void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection, const ContextPtr & local_context) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + String connection_url; + String container_name; + std::optional account_name; + std::optional account_key; + if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } + connection_url = collection.get("connection_string"); + else if (collection.has("storage_account_url")) + connection_url = collection.get("storage_account_url"); - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); + container_name = collection.get("container"); configuration.blob_path = collection.get("blob_path"); if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); + account_name = collection.get("account_name"); if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); + account_key = collection.get("account_key"); configuration.structure = collection.getOrDefault("structure", "auto"); configuration.format = collection.getOrDefault("format", configuration.format); configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + configuration.connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, local_context); } +AzureBlobStorage::ConnectionParams StorageAzureBlob::getConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const ContextPtr & local_context) +{ + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); + + if (account_name && account_key) + { + connection_params.endpoint.storage_account_url = connection_url; + connection_params.endpoint.container_name = container_name; + connection_params.auth_method = std::make_shared(*account_name, *account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); + } + else + { + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); + } + + return connection_params; +} StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) { @@ -144,8 +165,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) { - processNamedCollectionResult(configuration, *named_collection); - + processNamedCollectionResult(configuration, *named_collection, local_context); configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") @@ -164,11 +184,12 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine std::unordered_map engine_args_to_idx; - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container = checkAndGetLiteralArgument(engine_args[1], "container"); + configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blob_path"); - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + std::optional account_name; + std::optional account_key; auto is_format_arg = [] (const std::string & s) -> bool { @@ -198,8 +219,8 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); } } else if (engine_args.size() == 6) @@ -211,12 +232,13 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; } } @@ -229,17 +251,20 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } } configuration.blobs_paths = {configuration.blob_path}; + configuration.connection_params = getConnectionParams(connection_url, container, account_name, account_key, local_context); if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); @@ -247,18 +272,6 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine return configuration; } - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - void registerStorageAzureBlob(StorageFactory & factory) { factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) @@ -268,7 +281,8 @@ void registerStorageAzureBlob(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, /*readonly=*/ false); + // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -299,11 +313,11 @@ void registerStorageAzureBlob(StorageFactory & factory) if (args.storage_def->partition_by) partition_by = args.storage_def->partition_by->clone(); - auto settings = StorageAzureBlob::createSettings(args.getContext()); + auto azure_settings = AzureBlobStorage::getRequestSettings(args.getContext()->getSettingsRef()); return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), + configuration, + std::make_unique("AzureBlobStorage", std::move(client), std::move(azure_settings), configuration.connection_params.getContainer()), args.getContext(), args.table_id, args.columns, @@ -321,177 +335,6 @@ void registerStorageAzureBlob(StorageFactory & factory) }); } -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - - StorageAzureBlob::StorageAzureBlob( const Configuration & configuration_, std::unique_ptr && object_storage_, @@ -513,7 +356,8 @@ StorageAzureBlob::StorageAzureBlob( { if (configuration.format != "auto") FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); + + context->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(configuration.connection_params.getConnectionURL())); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -850,13 +694,13 @@ void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) { /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, + storage->object_storage.get(), configuration.connection_params.getContainer(), configuration.blob_path, predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, + storage->object_storage.get(), configuration.connection_params.getContainer(), configuration.blobs_paths, predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } } @@ -879,8 +723,8 @@ void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, cons max_block_size, configuration.compression_method, storage->object_storage.get(), - configuration.container, - configuration.connection_url, + configuration.connection_params.getContainer(), + configuration.connection_params.endpoint.storage_account_url, iterator_wrapper, need_only_count)); } @@ -1455,7 +1299,8 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) return; - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; + const auto & params = configuration.connection_params; + String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1466,7 +1311,8 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; + const auto & params = configuration.connection_params; + String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1477,7 +1323,9 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) return; - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; + const auto & params = configuration.connection_params; + auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); + Strings sources; sources.reserve(read_keys.size()); std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); @@ -1520,8 +1368,10 @@ namespace return std::nullopt; }; - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; + const auto & params = configuration.connection_params; + auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); String source = host_and_bucket + '/' + it->relative_path; + if (format) { auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); @@ -1573,12 +1423,12 @@ std::pair StorageAzureBlob::getTableStructureAndForm if (configuration.withGlobs()) { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); + object_storage, configuration.connection_params.getContainer(), configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); } else { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); + object_storage, configuration.connection_params.getContainer(), configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); } ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 20e7f4a6c90..affa02928b6 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -1,5 +1,7 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" +#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -37,21 +39,13 @@ public: bool withWildcard() const { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + static constexpr auto PARTITION_ID_WILDCARD = "{_partition_id}"; return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos; } - Poco::URI getConnectionURL() const; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; std::string blob_path; std::vector blobs_paths; + AzureBlobStorage::ConnectionParams connection_params; }; StorageAzureBlob( @@ -67,16 +61,10 @@ public: ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true); + static AzureBlobStorage::ConnectionParams getConnectionParams(const String & connection_url, const String & container_name, const std::optional & account_name, const std::optional & account_key, const ContextPtr & local_context); + static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection, const ContextPtr & local_context); - static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); - - static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); - - String getName() const override - { - return name; - } + String getName() const override { return name; } void read( QueryPlan & query_plan, diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index a80d121567a..6f6ae8763fd 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -1,4 +1,5 @@ #include "Storages/StorageAzureBlobCluster.h" +#include #include "config.h" @@ -41,7 +42,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( , configuration{configuration_} , object_storage(std::move(object_storage_)) { - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(configuration_.connection_params.getConnectionURL())); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -79,7 +80,7 @@ void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, cons RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - object_storage.get(), configuration.container, configuration.blob_path, + object_storage.get(), configuration.connection_params.getContainer(), configuration.blob_path, predicate, getVirtualsList(), context, nullptr); auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 275cd2a9cbb..c471a72d8c7 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -35,16 +35,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace -{ - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { /// Supported signatures: @@ -54,7 +44,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) { - StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection); + StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection, local_context); configuration.blobs_paths = {configuration.blob_path}; @@ -74,14 +64,14 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const std::unordered_map engine_args_to_idx; - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container = checkAndGetLiteralArgument(engine_args[1], "container"); configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; + std::optional account_name; + std::optional account_key; + + auto is_format_arg = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) { @@ -105,8 +95,8 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); } } else if (engine_args.size() == 6) @@ -120,8 +110,9 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); if (is_format_arg(sixth_arg)) configuration.format = sixth_arg; @@ -132,28 +123,33 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } else if (engine_args.size() == 8) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); } configuration.blobs_paths = {configuration.blob_path}; + configuration.connection_params = StorageAzureBlob::getConnectionParams(connection_url, container, account_name, account_key, local_context); if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); @@ -330,12 +326,19 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex if (configuration.structure == "auto") { context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + + auto object_storage = std::make_unique( + "AzureBlobStorageTableFunction", + std::move(client), + std::move(settings), + configuration.connection_params.getContainer()); + if (configuration.format == "auto") return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); } @@ -354,8 +357,8 @@ std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBefo StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); ColumnsDescription columns; if (configuration.structure != "auto") @@ -365,7 +368,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct StoragePtr storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), context, StorageID(getDatabaseName(), table_name), columns, diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index 04dddca7672..fb311c74657 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -1,3 +1,4 @@ +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -31,30 +32,30 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( columns = structure_hint; } - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) { /// On worker node this filename won't contains globs storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), context, StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings - /* distributed_processing */ true, - /*partition_by_=*/nullptr); + /*comment=*/ String{}, + /*format_settings=*/ std::nullopt, /// No format_settings + /*distributed_processing=*/ true, + /*partition_by=*/ nullptr); } else { storage = std::make_shared( cluster_name, configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, From de3d95a7f05156330dc3cbad3dee7265a027b074 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 11 May 2024 21:00:08 +0000 Subject: [PATCH 011/141] fix style check --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a04d7f54884..1776163688e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -117,8 +117,8 @@ class IColumn; M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ From 9ef86e948ecb1408d6ce8df2e2602584d591252c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 13 May 2024 16:07:28 +0000 Subject: [PATCH 012/141] fix tests --- src/Backups/BackupIO_AzureBlobStorage.cpp | 1 - src/Backups/BackupIO_AzureBlobStorage.h | 5 +---- .../registerBackupEngineAzureBlobStorage.cpp | 3 +-- .../AzureBlobStorageCommon.cpp | 4 ---- .../AzureBlobStorage/AzureBlobStorageCommon.h | 6 +----- src/Storages/StorageAzureBlob.cpp | 19 +++++++------------ src/Storages/StorageAzureBlob.h | 3 --- .../TableFunctionAzureBlobStorageCluster.cpp | 4 ---- .../test_storage_azure_blob_storage/test.py | 2 +- 9 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 3f60ed5c0b4..6ae67ad5dfc 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 0829c3258c9..8f0a6e8fb5d 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,13 +1,10 @@ #pragma once - -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE #include #include -#include -#include +#include namespace DB diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 6974d16e2f6..98920d80662 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -1,4 +1,3 @@ -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #include @@ -6,7 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index 76054efff19..a39cc89b93b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -1,8 +1,4 @@ -#include -#include -#include #include -#include #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 7e716adf4d0..5f9f280ad4a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -1,8 +1,4 @@ #pragma once - -#include -#include -#include "base/strong_typedef.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -15,10 +11,10 @@ #include #include -#include #include #include #include +#include namespace DB { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 2341b8dc94e..a82de72af6d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,6 +1,4 @@ -#include #include -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #if USE_AZURE_BLOB_STORAGE #include @@ -44,6 +42,7 @@ #include #include +#include #include @@ -1307,8 +1306,7 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) return; - const auto & params = configuration.connection_params; - String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; + String source = fs::path(configuration.connection_params.endpoint.getEndpoint()) / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1319,8 +1317,7 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - const auto & params = configuration.connection_params; - String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; + String source = fs::path(configuration.connection_params.endpoint.getEndpoint()) / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1331,12 +1328,11 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) return; - const auto & params = configuration.connection_params; - auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); + auto endpoint = fs::path(configuration.connection_params.endpoint.getEndpoint()); Strings sources; sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); + std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem) { return endpoint / elem.relative_path; }); auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } @@ -1376,9 +1372,8 @@ namespace return std::nullopt; }; - const auto & params = configuration.connection_params; - auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); - String source = host_and_bucket + '/' + it->relative_path; + auto endpoint = fs::path(configuration.connection_params.endpoint.getEndpoint()); + String source = endpoint / it->relative_path; if (format) { diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 936f32c3cb0..396934b4212 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -1,7 +1,4 @@ #pragma once - -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" -#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index fb311c74657..d72735bb47b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -1,4 +1,3 @@ -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -7,11 +6,8 @@ #include #include #include - #include "registerTableFunctions.h" -#include - namespace DB { diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 78aaf26a2a7..c3204808d6f 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -789,7 +789,7 @@ def test_read_subcolumns(cluster): def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( - f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont_not_exists', 'test_table.csv', " + f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont-not-exists', 'test_table.csv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" From 35038f2458f2642700ee685a5c4f43729228d8a0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 May 2024 14:23:24 +0000 Subject: [PATCH 013/141] rename parameter --- .../test_backup_restore_azure_blob_storage/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 1a1458cb68e..6765a519a6d 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -30,12 +30,12 @@ def generate_cluster_def(port): DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1; - cont + cont CSV http://azurite1:{port}/devstoreaccount1 - cont + cont CSV devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== From 29250418cb5d666b81043695a1df09e4df94e539 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 17 May 2024 20:43:40 +0000 Subject: [PATCH 014/141] fix backward incompatibility --- .../AzureBlobStorage/AzureBlobStorageCommon.cpp | 15 +++++++++++++-- .../test.py | 4 ++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index a39cc89b93b..11253d25e3d 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -92,6 +92,17 @@ Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String container_name; String prefix; + auto get_container_name = [&] + { + if (config.has(config_prefix + ".container_name")) + return config.getString(config_prefix + ".container_name"); + + if (config.has(config_prefix + ".container")) + return config.getString(config_prefix + ".container"); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config"); + }; + if (config.has(config_prefix + ".endpoint")) { String endpoint = config.getString(config_prefix + ".endpoint"); @@ -154,13 +165,13 @@ Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const else if (config.has(config_prefix + ".connection_string")) { storage_url = config.getString(config_prefix + ".connection_string"); - container_name = config.getString(config_prefix + ".container_name"); + container_name = get_container_name(); } else if (config.has(config_prefix + ".storage_account_url")) { storage_url = config.getString(config_prefix + ".storage_account_url"); validateStorageAccountUrl(storage_url); - container_name = config.getString(config_prefix + ".container_name"); + container_name = get_container_name(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 6765a519a6d..1a1458cb68e 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -30,12 +30,12 @@ def generate_cluster_def(port): DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1; - cont + cont CSV http://azurite1:{port}/devstoreaccount1 - cont + cont CSV devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== From 47c7b7fccf38f9ae3180e1a456262388bd39e129 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 19 Mar 2024 16:01:48 +0100 Subject: [PATCH 015/141] add tests for non replicated mt --- src/Core/Settings.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 18 ++- .../Transforms/buildPushingToViewsChain.cpp | 6 + src/Storages/MergeTree/MergeTreeDataWriter.h | 2 + src/Storages/MergeTree/MergeTreeSink.cpp | 26 ++++ .../MergeTree/MergedBlockOutputStream.cpp | 5 + .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- ..._non_replicated_deduplication_mv.reference | 0 .../03008_non_replicated_deduplication_mv.sql | 93 ++++++++++++++ ...eduplication_mv_collision_in_dst.reference | 0 ...ated_deduplication_mv_collision_in_dst.sql | 113 +++++++++++++++++ ...lision_in_dst_from_different_src.reverence | 0 ...mv_collision_in_dst_from_different_src.sql | 119 ++++++++++++++++++ ...eduplication_mv_collision_in_src.reference | 0 ...ated_deduplication_mv_collision_in_src.sql | 76 +++++++++++ 15 files changed, 456 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c555b5cb208..491e888e3e0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -34,7 +34,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 12677c422b8..d1a9ead480e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -306,6 +306,9 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { + LOG_DEBUG(getLogger("InsertQuery"), + "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); + ThreadStatus * thread_status = current_thread; if (!thread_status_holder) @@ -465,16 +468,17 @@ BlockIO InterpreterInsertQuery::execute() * to avoid unnecessary squashing. */ + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); + Settings new_settings = getContext()->getSettings(); new_settings.max_threads = std::max(1, settings.max_insert_threads); if (table->prefersLargeBlocks()) { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); + new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); } auto new_context = Context::createCopy(context); @@ -527,6 +531,7 @@ BlockIO InterpreterInsertQuery::execute() /// Deduplication when passing insert_deduplication_token breaks if using more than one thread if (!settings.insert_deduplication_token.toString().empty()) { + /// TODO! LOG_DEBUG( getLogger("InsertQuery"), "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); @@ -566,8 +571,13 @@ BlockIO InterpreterInsertQuery::execute() running_group = std::make_shared(getContext()); for (size_t i = 0; i < sink_streams_size; ++i) { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams_size); + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); } for (size_t i = 0; i < pre_streams_size; ++i) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5e8ecdca95e..66264a46d9d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -223,6 +223,8 @@ std::optional generateViewChain( else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && !insert_settings.insert_deduplication_token.value.empty()) { + + /// TODO! /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle * deduplication in complex INSERT flows. * @@ -252,6 +254,8 @@ std::optional generateViewChain( else insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + LOG_DEBUG(getLogger("PushingToViews"), "insert_deduplication_token {}", insert_deduplication_token); + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); } @@ -483,6 +487,8 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { + LOG_ERROR(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); + try { auto out = generateViewChain( diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 863c951d957..3e47e3705b9 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,6 +47,8 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { + LOG_WARNING(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); + } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b7dede3cb00..f0eb56aea13 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -40,6 +40,8 @@ MergeTreeSink::MergeTreeSink( , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { + LOG_INFO(storage.log, "MergeTreeSink() called for {}.{}", + storage_.getStorageID().database_name, storage_.getStorageID().getTableName()); } void MergeTreeSink::onStart() @@ -56,6 +58,10 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk chunk) { + LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", + num_blocks_processed, + chunk.getNumRows(), chunk.getNumColumns(), chunk.bytes()); + if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); @@ -65,6 +71,8 @@ void MergeTreeSink::consume(Chunk chunk) auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); + LOG_INFO(storage.log, "consume() called part_blocks.count {}", part_blocks.size()); + using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -121,8 +129,16 @@ void MergeTreeSink::consume(Chunk chunk) else max_insert_delayed_streams_for_parallel_write = 0; + LOG_INFO(storage.log, "consume() called for {}.{} " + "streams {} + {} -> {}, " + "max {} support_parallel_write {}", + storage.getStorageID().database_name, storage.getStorageID().getTableName(), + streams, temp_part.streams.size(), streams + temp_part.streams.size(), + max_insert_delayed_streams_for_parallel_write, support_parallel_write); + /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); + if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -156,8 +172,12 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + LOG_INFO(storage.log, "finishDelayedChunk() called partitions count {}", delayed_chunk->partitions.size()); + for (auto & partition : delayed_chunk->partitions) { + LOG_INFO(storage.log, "finishDelayedChunk() part name {} dedup_token {}", partition.temp_part.part->name, partition.block_dedup_token); + ProfileEventsScope scoped_attach(&partition.part_counters); partition.temp_part.finalize(); @@ -174,9 +194,15 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); + + LOG_INFO(storage.log, "finishDelayedChunk() has dedup log {}", bool(deduplication_log)); + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); + + LOG_INFO(storage.log, "finishDelayedChunk() block_dedup_token={}, block_id={}", partition.block_dedup_token, block_id); + auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index d8555d69788..12bc284f68c 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -50,6 +50,8 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->storeVersionMetadata(); writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); + + LOG_WARNING(getLogger("MergedBlockOutputStream()"), "called c-tor"); } /// If data is pre-sorted. @@ -329,6 +331,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { + LOG_WARNING(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", + block.rows(), block.bytes(), data_part_storage->getPartDirectory()); + block.checkNumberOfRows(); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4b4f4c33e7d..2bb9aad1e53 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -535,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir]() + auto try_rollback_part_rename = [this, &part, &original_part_dir] () { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql new file mode 100644 index 00000000000..8f718508ee8 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql @@ -0,0 +1,93 @@ +DROP TABLE IF EXISTS table_a_b; +DROP TABLE IF EXISTS table_when_b_even; +DROP TABLE IF EXISTS mv_b_even; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=3; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +CREATE TABLE table_a_b + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even_wo_dedup + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup +TO table_when_b_even_wo_dedup +AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + +CREATE TABLE table_when_b_even_dedup + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even_dedup +TO table_when_b_even_dedup +AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT toString(number DIV 2), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT toString(number DIV 2), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even_wo_dedup'; +SELECT 'count', count() FROM table_when_b_even_wo_dedup; +SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part; + +SELECT 'table_when_b_even_dedup'; +SELECT 'count', count() FROM table_when_b_even_dedup; +SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; + + +DROP TABLE mv_b_even; +DROP TABLE table_when_b_even; +DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql new file mode 100644 index 00000000000..46b9bd52144 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql @@ -0,0 +1,113 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_for_join_with + ( + a_join String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a_join, b); + +INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(10); +SELECT 'table_for_join_with'; +SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + + +CREATE TABLE table_a_b + ( + a_src String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a_src, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even_dedup + ( + a_src String CODEC(NONE), + a_join String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a_src, a_join, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_dedup + TO table_when_b_even_dedup + AS + SELECT a_src, a_join, b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + +CREATE TABLE table_when_b_even_wo_dedup + ( + a_src String CODEC(NONE), + a_join String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a_src, a_join, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_when_b_even_wo_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup + TO table_when_b_even_wo_dedup + AS + SELECT a_src, a_join, b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(number), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(number), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even_dedup, here the result if join is deduplicated inside one request, it is not correct'; +SELECT 'count', count() FROM table_when_b_even_dedup; +SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; + +SELECT 'table_when_b_even_wo_dedup'; +SELECT 'count', count() FROM table_when_b_even_wo_dedup; +SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part ORDER BY _part; + + +DROP TABLE mv_b_even_dedup; +DROP TABLE table_when_b_even_dedup; +DROP TABLE mv_b_even_wo_dedup; +DROP TABLE table_when_b_even_wo_dedup; +DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql new file mode 100644 index 00000000000..02546af69dc --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql @@ -0,0 +1,119 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_source + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_source; + +CREATE TABLE table_dst_dedup + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_dst_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_dedup + TO table_dst_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 2 = 0; + +CREATE MATERIALIZED VIEW mv_b_even_even_dedup + TO table_dst_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 4 = 0; + +CREATE TABLE table_dst_wo_dedup + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_dst_wo_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup + TO table_dst_wo_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 2 = 0; + +CREATE MATERIALIZED VIEW mv_b_even_wo_even_dedup + TO table_dst_wo_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 4 = 0; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_source +SELECT 'source_' || toString(number), number +FROM numbers(8) +SETTINGS send_logs_level='trace'; + +SELECT 'table_source'; +SELECT 'count', count() FROM table_source; +SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_dedup'; +SELECT 'count', count() FROM table_dst_dedup; +SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_wo_dedup'; +SELECT 'count', count() FROM table_dst_wo_dedup; +SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_source +SELECT 'source_' || toString(number), number +FROM numbers(8) +SETTINGS send_logs_level='trace'; + +SELECT 'table_source'; +SELECT 'count', count() FROM table_source; +SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_dedup, block from different mv is deduplicated, it is wrong'; +SELECT 'count', count() FROM table_dst_dedup; +SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_wo_dedup'; +SELECT 'count', count() FROM table_dst_wo_dedup; +SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; + + +DROP TABLE mv_b_even_dedup; +DROP TABLE mv_b_even_even_dedup; +DROP TABLE mv_b_even_wo_dedup; +DROP TABLE mv_b_even_even_wo_dedup; +DROP TABLE table_dst_dedup; +DROP TABLE table_dst_wo_dedup; +DROP TABLE table_source; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql new file mode 100644 index 00000000000..213b449dd73 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql @@ -0,0 +1,76 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_a_b + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even + ( + a String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(1), 1 +FROM numbers(5) +SETTINGS send_logs_level='trace'; + +SELECT 'table_a_b, it deduplicates rows within one insert, it is wrong'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part; + +SELECT 'table_when_b_even'; +SELECT 'count', count() FROM table_when_b_even; +SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(1), 1 +FROM numbers(5) +SETTINGS send_logs_level='trace'; + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even'; +SELECT 'count', count() FROM table_when_b_even; +SELECT _part, count() FROM table_when_b_even GROUP BY _part; + + +DROP TABLE mv_b_even; +DROP TABLE table_when_b_even; +DROP TABLE table_a_b; From 8b7563040c39dc11647606cdd9f8d77dc6e4cc84 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 19 Mar 2024 19:06:42 +0100 Subject: [PATCH 016/141] non replicated inserts with deduplication user token --- src/Interpreters/InterpreterInsertQuery.cpp | 32 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.h | 3 +- .../MergeTree/MergedBlockOutputStream.cpp | 4 +- .../0_stateless/03008_deduplication.python | 561 +++++++++++ ...uplication_insert_several_blocks.reference | 870 ++++++++++++++++++ ...008_deduplication_insert_several_blocks.sh | 92 ++ ...tion_mv_generates_several_blocks.reference | 814 ++++++++++++++++ ...duplication_mv_generates_several_blocks.sh | 98 ++ ...cation_several_mv_into_one_table.reference | 590 ++++++++++++ ...deduplication_several_mv_into_one_table.sh | 106 +++ ..._non_replicated_deduplication_mv.reference | 0 .../03008_non_replicated_deduplication_mv.sql | 93 -- ...eduplication_mv_collision_in_dst.reference | 0 ...ated_deduplication_mv_collision_in_dst.sql | 113 --- ...lision_in_dst_from_different_src.reverence | 0 ...mv_collision_in_dst_from_different_src.sql | 119 --- ...eduplication_mv_collision_in_src.reference | 0 ...ated_deduplication_mv_collision_in_src.sql | 76 -- 19 files changed, 3151 insertions(+), 422 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication.python create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d1a9ead480e..a5396be9b76 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -519,29 +519,29 @@ BlockIO InterpreterInsertQuery::execute() if (settings.max_insert_threads > 1) { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); + pre_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. - const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); - pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads - : std::min(settings.max_insert_threads, pipeline.getNumStreams()); - /// Deduplication when passing insert_deduplication_token breaks if using more than one thread - if (!settings.insert_deduplication_token.toString().empty()) - { - /// TODO! - LOG_DEBUG( - getLogger("InsertQuery"), - "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); - pre_streams_size = 1; - } +// /// Deduplication when passing insert_deduplication_token breaks if using more than one thread +// if (!settings.insert_deduplication_token.toString().empty()) +// { +// /// TODO! +// LOG_DEBUG( +// getLogger("InsertQuery"), +// "Insert-select query using insert_deduplication_token, setting streams from {} to 1 to avoid deduplication issues, pipeline.getNumStreams() {}", +// pre_streams_size, pipeline.getNumStreams()); +// pre_streams_size = 1; +// } if (table->supportsParallelInsert()) sink_streams_size = pre_streams_size; } + LOG_DEBUG( + getLogger("InsertQuery"), + "pre_streams_size {}, pipeline.getNumStreams() {}", + pre_streams_size, pipeline.getNumStreams()); + pipeline.resize(pre_streams_size); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 66264a46d9d..70f30faa5b1 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -487,7 +487,7 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - LOG_ERROR(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); + LOG_DEBUG(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); try { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 3e47e3705b9..a9a44813545 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,8 +47,7 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { - LOG_WARNING(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); - + LOG_DEBUG(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 12bc284f68c..fd2b05f615e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -51,7 +51,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); - LOG_WARNING(getLogger("MergedBlockOutputStream()"), "called c-tor"); + LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "called c-tor"); } /// If data is pre-sorted. @@ -331,7 +331,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { - LOG_WARNING(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", + LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", block.rows(), block.bytes(), data_part_storage->getPartDirectory()); block.checkNumberOfRows(); diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python new file mode 100644 index 00000000000..3cd29247910 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -0,0 +1,561 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import string + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + + +def __format(template, **params): + field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] + kv_args = {} + for field in field_names: + if field in params: + kv_args[field] = params[field] + else: + kv_args[field] = "" + + return template.format(**kv_args) + + +def instance_create_statement(table_name, table_columns, table_keys, table_engine, with_deduplication, no_merges=True): + template = """ + CREATE TABLE {table_name} + {table_columns} + ENGINE = {table_engine} + ORDER BY {table_keys} + {table_settings}; + {table_no_merges} + """ + + params = dict() + params["table_name"] = table_name + params["table_columns"] = table_columns + params["table_keys"] = table_keys + params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" + params["table_engine"] = "MergeTree()" if table_engine == "MergeTree" else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + + deduplication_window_setting_name = "non_replicated_deduplication_window" if table_engine == "MergeTree" else "replicated_deduplication_window" + deduplication_window_setting_value = 1000 if with_deduplication else 0 + + settings = list() + settings += [f"{deduplication_window_setting_name}={deduplication_window_setting_value}"] + params["table_settings"] = "SETTINGS " + ",".join(settings) + + return __format(template, **params) + + +def instance_insert_statement(table_name, count, insert_unique_blocks, use_insert_token): + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_settings="" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'", + ) + + +def get_drop_tables_statements(tables): + return "".join([f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]]) + + +def get_logs_statement(args): + if args.get_logs: + return "SET send_logs_level='test';" + return "" + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +class ArgsFactory: + def __init__(self, parser): + self.__parser = parser + + def add_opt_engine(self): + self.__parser.add_argument( + "--table-engine", choices=["ReplicatedMergeTree", "MergeTree"], default="MergeTree") + + def add_opt_user_token(self): + self.__parser.add_argument("--use-insert-token", type=str2bool, nargs='?', const=True, default=False) + + def add_opt_single_thread(self): + self.__parser.add_argument("--single-thread", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_dedup_src(self): + self.__parser.add_argument("--deduplicate-src-table", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_dedup_dst(self): + self.__parser.add_argument("--deduplicate-dst-table", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_get_logs(self): + self.__parser.add_argument("--get-logs", type=str2bool, nargs='?', const=True, default=False) + + def add_opt_uniq_blocks(self): + self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + + def add_all(self): + self.add_opt_engine() + self.add_opt_user_token() + self.add_opt_single_thread() + self.add_opt_dedup_src() + self.add_opt_dedup_dst() + self.add_opt_get_logs() + self.add_opt_uniq_blocks() + + +def test_insert_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_statement = instance_create_statement( + table_name="table_when_b_even", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + create_mv_statement = """ + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + """ + + drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) + + insert_statement = instance_insert_statement( + "table_a_b", 10, args.insert_unique_blocks, args.use_insert_token + ) + + print_details_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} + + SELECT 'table_when_b_even'; + SELECT 'count', count() FROM table_when_b_even; + {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} + """ + + + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 5 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = """ + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 10 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_a_b_statement} + + {create_table_when_b_even_statement} + + {create_mv_statement} + + -- first insert + {insert_statement} + + {print_details_statements} + + {assert_first_insert_statements} + + -- second insert, it is retry + {insert_statement} + + {print_details_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_mv_generates_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_for_join_with", "table_a_b", "table_when_b_even_and_joined", "mv_b_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + details_print_for_table_for_join_with = "" + if args.get_logs: + details_print_for_table_for_join_with = """ + SELECT 'table_for_join_with'; + SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + """ + + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a_src String, b UInt64)", + table_keys="(a_src, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_and_joined_statement = instance_create_statement( + table_name="table_when_b_even_and_joined", + table_columns="(a_src String, a_join String, b UInt64)", + table_keys="(a_src, a_join, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_a_b", 5, args.insert_unique_blocks, args.use_insert_token + ) + + details_print_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + + SELECT 'table_when_b_even_and_joined'; + SELECT 'count', count() FROM table_when_b_even_and_joined; + {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 5 ) + FROM table_a_b; + + SELECT throwIf( count() != 47 ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {47 if args.deduplicate_dst_table else 94} ) + FROM table_when_b_even_and_joined; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 90} ) + FROM table_when_b_even_and_joined; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) + FROM table_when_b_even_and_joined; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + CREATE TABLE table_for_join_with + (a_join String, b UInt64) + ENGINE = MergeTree() + ORDER BY (a_join, b); + INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(9); + {details_print_for_table_for_join_with} + + {create_table_a_b_statement} + SYSTEM STOP MERGES table_a_b; + + {create_table_when_b_even_and_joined_statement} + SYSTEM STOP MERGES table_when_b_even_and_joined; + + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even_and_joined + AS + SELECT a_src, a_join, table_for_join_with.b as b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + -- first insert + {insert_statement} + + {details_print_statements} + + -- first assertion + {assert_first_insert_statements} + + -- second insert + {insert_statement} + + {details_print_statements} + + -- second assertion + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_several_mv_into_one_table(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + create_table_src_statement = instance_create_statement( + table_name="table_src", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_dst_statement = instance_create_statement( + table_name="table_dst", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_src", 8, args.insert_unique_blocks, args.use_insert_token + ) + + details_print_statements = f""" + SELECT 'table_src count', count() FROM table_src; + + SELECT 'table_dst count', count() FROM table_dst; + {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 8 ) + FROM table_src; + + SELECT throwIf( count() != 6 ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) + FROM table_dst; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_src_statement} + + {create_table_dst_statement} + + CREATE MATERIALIZED VIEW mv_b_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 2 = 0; + + CREATE MATERIALIZED VIEW mv_b_even_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 4 = 0; + + -- first insert + {insert_statement} + + {details_print_statements} + + {assert_first_insert_statements} + + -- second insert, retry + {insert_statement} + + {details_print_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def parse_args(): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="test") + test_insert_several_blocks( + subparsers.add_parser("insert_several_blocks_into_table") + ) + test_mv_generates_several_blocks( + subparsers.add_parser("mv_generates_several_blocks") + ) + test_several_mv_into_one_table( + subparsers.add_parser("several_mv_into_one_table") + ) + args = parser.parse_args() + if args.test is None: + parser.print_help() + return args + + +def main(): + args = parse_args() + if args.test is not None: + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference new file mode 100644 index 00000000000..35b2642a4d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -0,0 +1,870 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 5 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 5 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh new file mode 100755 index 00000000000..5b07f6033ad --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# fails, it is a error. Several blocks in scr table with the same user token are processed in parallel and deduplicated + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False" +# fails, it is a error. The same situation as first one, but on dst table. + +RUN_ONLY="" +#RUN_ONLY="" + +KNOWN_ERRORS=(8 9 10 11 12 13) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference new file mode 100644 index 00000000000..eccdbd52f37 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -0,0 +1,814 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 47 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 47 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh new file mode 100755 index 00000000000..1dd648583c6 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# failed due to race in multi thread insertion, blocks are deduplicated in different threads + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# the same as first but for dst table + +# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# dst table deduplicates all incoming blocks from one insert because not uniq hash + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference new file mode 100644 index 00000000000..12eea604e3a --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -0,0 +1,590 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 1 +table_dst count 6 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 1 +table_dst count 6 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh new file mode 100755 index 00000000000..487b3ac5f88 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# race condition on insert into src table + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# race condition on insert into dst table + +# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# dst deduplicates blocks from one inserts from different materialized view + +# Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# dst deduplicates blocks from different inserts by hash + +KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28 17 21 25 29) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +RUN_ONLY="" +#RUN_ONLY="Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" + + diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql deleted file mode 100644 index 8f718508ee8..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql +++ /dev/null @@ -1,93 +0,0 @@ -DROP TABLE IF EXISTS table_a_b; -DROP TABLE IF EXISTS table_when_b_even; -DROP TABLE IF EXISTS mv_b_even; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=3; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -CREATE TABLE table_a_b - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even_wo_dedup - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup -TO table_when_b_even_wo_dedup -AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - -CREATE TABLE table_when_b_even_dedup - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even_dedup -TO table_when_b_even_dedup -AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT toString(number DIV 2), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT toString(number DIV 2), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even_wo_dedup'; -SELECT 'count', count() FROM table_when_b_even_wo_dedup; -SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part; - -SELECT 'table_when_b_even_dedup'; -SELECT 'count', count() FROM table_when_b_even_dedup; -SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; - - -DROP TABLE mv_b_even; -DROP TABLE table_when_b_even; -DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql deleted file mode 100644 index 46b9bd52144..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql +++ /dev/null @@ -1,113 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_for_join_with - ( - a_join String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a_join, b); - -INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(10); -SELECT 'table_for_join_with'; -SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - - -CREATE TABLE table_a_b - ( - a_src String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a_src, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even_dedup - ( - a_src String CODEC(NONE), - a_join String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a_src, a_join, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_dedup - TO table_when_b_even_dedup - AS - SELECT a_src, a_join, b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - -CREATE TABLE table_when_b_even_wo_dedup - ( - a_src String CODEC(NONE), - a_join String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a_src, a_join, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_when_b_even_wo_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup - TO table_when_b_even_wo_dedup - AS - SELECT a_src, a_join, b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(number), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(number), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even_dedup, here the result if join is deduplicated inside one request, it is not correct'; -SELECT 'count', count() FROM table_when_b_even_dedup; -SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; - -SELECT 'table_when_b_even_wo_dedup'; -SELECT 'count', count() FROM table_when_b_even_wo_dedup; -SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part ORDER BY _part; - - -DROP TABLE mv_b_even_dedup; -DROP TABLE table_when_b_even_dedup; -DROP TABLE mv_b_even_wo_dedup; -DROP TABLE table_when_b_even_wo_dedup; -DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql deleted file mode 100644 index 02546af69dc..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql +++ /dev/null @@ -1,119 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_source - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_source; - -CREATE TABLE table_dst_dedup - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_dst_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_dedup - TO table_dst_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 2 = 0; - -CREATE MATERIALIZED VIEW mv_b_even_even_dedup - TO table_dst_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 4 = 0; - -CREATE TABLE table_dst_wo_dedup - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_dst_wo_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup - TO table_dst_wo_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 2 = 0; - -CREATE MATERIALIZED VIEW mv_b_even_wo_even_dedup - TO table_dst_wo_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 4 = 0; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_source -SELECT 'source_' || toString(number), number -FROM numbers(8) -SETTINGS send_logs_level='trace'; - -SELECT 'table_source'; -SELECT 'count', count() FROM table_source; -SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_dedup'; -SELECT 'count', count() FROM table_dst_dedup; -SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_wo_dedup'; -SELECT 'count', count() FROM table_dst_wo_dedup; -SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_source -SELECT 'source_' || toString(number), number -FROM numbers(8) -SETTINGS send_logs_level='trace'; - -SELECT 'table_source'; -SELECT 'count', count() FROM table_source; -SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_dedup, block from different mv is deduplicated, it is wrong'; -SELECT 'count', count() FROM table_dst_dedup; -SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_wo_dedup'; -SELECT 'count', count() FROM table_dst_wo_dedup; -SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; - - -DROP TABLE mv_b_even_dedup; -DROP TABLE mv_b_even_even_dedup; -DROP TABLE mv_b_even_wo_dedup; -DROP TABLE mv_b_even_even_wo_dedup; -DROP TABLE table_dst_dedup; -DROP TABLE table_dst_wo_dedup; -DROP TABLE table_source; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql deleted file mode 100644 index 213b449dd73..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql +++ /dev/null @@ -1,76 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_a_b - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even - ( - a String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(1), 1 -FROM numbers(5) -SETTINGS send_logs_level='trace'; - -SELECT 'table_a_b, it deduplicates rows within one insert, it is wrong'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part; - -SELECT 'table_when_b_even'; -SELECT 'count', count() FROM table_when_b_even; -SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(1), 1 -FROM numbers(5) -SETTINGS send_logs_level='trace'; - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even'; -SELECT 'count', count() FROM table_when_b_even; -SELECT _part, count() FROM table_when_b_even GROUP BY _part; - - -DROP TABLE mv_b_even; -DROP TABLE table_when_b_even; -DROP TABLE table_a_b; From 02c9a07778cdc6295d2ebf972c52de389e7edabb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 12 Apr 2024 15:04:52 +0200 Subject: [PATCH 017/141] work in progress --- src/Common/CollectionOfDerived.h | 153 +++ src/Interpreters/AsynchronousInsertQueue.cpp | 8 +- src/Interpreters/InterpreterCheckQuery.cpp | 18 +- src/Interpreters/InterpreterCreateQuery.cpp | 11 +- src/Interpreters/InterpreterExplainQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 615 +++++----- src/Interpreters/InterpreterInsertQuery.h | 17 +- src/Interpreters/SystemLog.cpp | 2 +- src/Processors/Chunk.cpp | 20 +- src/Processors/Chunk.h | 62 +- .../PullingAsyncPipelineExecutor.cpp | 9 +- .../Executors/PullingPipelineExecutor.cpp | 9 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 4 +- src/Processors/IAccumulatingTransform.cpp | 5 +- .../FinishAggregatingInOrderAlgorithm.cpp | 17 +- .../Algorithms/MergeTreePartLevelInfo.h | 12 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- .../Algorithms/ReplacingSortedAlgorithm.h | 8 +- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 2 +- src/Processors/Sinks/RemoteSink.h | 2 +- src/Processors/Sinks/SinkToStorage.cpp | 6 +- src/Processors/Sinks/SinkToStorage.h | 37 +- src/Processors/Sources/BlocksSource.h | 5 +- src/Processors/Sources/RemoteSource.cpp | 2 +- .../Sources/SourceFromSingleChunk.cpp | 2 +- .../AggregatingInOrderTransform.cpp | 11 +- .../Transforms/AggregatingInOrderTransform.h | 5 +- .../Transforms/AggregatingTransform.cpp | 16 +- .../Transforms/AggregatingTransform.h | 4 +- src/Processors/Transforms/FilterTransform.cpp | 3 +- .../Transforms/JoiningTransform.cpp | 9 +- src/Processors/Transforms/JoiningTransform.h | 5 +- .../Transforms/MemoryBoundMerging.h | 6 +- ...gingAggregatedMemoryEfficientTransform.cpp | 36 +- ...ergingAggregatedMemoryEfficientTransform.h | 5 +- .../Transforms/MergingAggregatedTransform.cpp | 10 +- .../Transforms/NumberBlocksTransform.cpp | 1 + .../Transforms/NumberBlocksTransform.h | 224 ++++ .../Transforms/SelectByIndicesTransform.h | 3 +- .../Transforms/SquashingChunksTransform.cpp | 10 + .../Transforms/SquashingChunksTransform.h | 2 + .../Transforms/TotalsHavingTransform.cpp | 6 +- .../Transforms/buildPushingToViewsChain.cpp | 91 +- src/QueryPipeline/QueryPipelineBuilder.h | 2 +- src/QueryPipeline/QueryPlanResourceHolder.cpp | 8 +- src/QueryPipeline/QueryPlanResourceHolder.h | 3 + src/Storages/Distributed/DistributedSink.cpp | 8 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 4 +- src/Storages/Kafka/StorageKafka.cpp | 2 +- src/Storages/LiveView/LiveViewSink.h | 4 +- src/Storages/MaterializedView/RefreshTask.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 19 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 50 +- src/Storages/MergeTree/MergeTreeSink.h | 4 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 54 +- .../MergeTree/ReplicatedMergeTreeSink.h | 3 +- src/Storages/MessageQueueSink.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/NATS/StorageNATS.cpp | 2 +- src/Storages/PartitionedSink.cpp | 4 +- src/Storages/PartitionedSink.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 2 +- .../PostgreSQLReplicationHandler.cpp | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 +- src/Storages/S3Queue/StorageS3Queue.cpp | 2 +- src/Storages/StorageAzureBlob.cpp | 4 +- src/Storages/StorageBuffer.cpp | 4 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageKeeperMap.cpp | 9 +- src/Storages/StorageLog.cpp | 6 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 5 +- src/Storages/StorageMySQL.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/StorageRedis.cpp | 9 +- src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 6 +- src/Storages/StorageStripeLog.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURL.h | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 4 +- .../0_stateless/03008_deduplication.python | 140 ++- ...uplication_insert_several_blocks.reference | 1088 ++++++++++++++++- ...008_deduplication_insert_several_blocks.sh | 101 +- ...tion_mv_generates_several_blocks.reference | 1032 +++++++++++++++- ...duplication_mv_generates_several_blocks.sh | 99 +- ...cation_several_mv_into_one_table.reference | 784 +++++++++++- ...deduplication_several_mv_into_one_table.sh | 101 +- 100 files changed, 4107 insertions(+), 1004 deletions(-) create mode 100644 src/Common/CollectionOfDerived.h create mode 100644 src/Processors/Transforms/NumberBlocksTransform.cpp create mode 100644 src/Processors/Transforms/NumberBlocksTransform.h diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h new file mode 100644 index 00000000000..8579c4dd50c --- /dev/null +++ b/src/Common/CollectionOfDerived.h @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +template +class CollectionOfDerivedItems +{ +public: + using Self = CollectionOfDerivedItems; + using ItemPtr = std::shared_ptr; + +private: + struct Rec + { + std::type_index type_idx; + ItemPtr ptr; + + bool operator<(const Rec & other) const + { + return type_idx < other.type_idx; + } + + bool operator<(const std::type_index & value) const + { + return type_idx < value; + } + + bool operator==(const Rec & other) const + { + return type_idx == other.type_idx; + } + }; + using Records = std::vector; + +public: + void swap(Self & other) + { + records.swap(other.records); + } + + void clear() + { + records.clear(); + } + + bool empty() const + { + return records.empty(); + } + + size_t size() const + { + return records.size(); + } + + Self clone() const + { + Self result; + result.records.reserve(records.size()); + for (const auto & rec: records) + result.records.emplace_back(rec.type_idx, rec.ptr->clone()); + return result; + } + + void append(Self && other) + { + std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); + std::sort(records.begin(), records.end()); + chassert(isUniqTypes()); + } + + template + void add(std::shared_ptr info) + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + return addImpl(std::type_index(typeid(T)), std::move(info)); + } + + template + std::shared_ptr get() const + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + return cast; + } + + template + std::shared_ptr extract() + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + + records.erase(it); + return cast; + } + +private: + bool isUniqTypes() const + { + auto uniq_it = std::adjacent_find(records.begin(), records.end()); + + return uniq_it == records.end(); + } + + void addImpl(std::type_index type_idx, ItemPtr item) + { + auto it = std::lower_bound(records.begin(), records.end(), type_idx); + + if (it == records.end()) + { + records.emplace_back(type_idx, item); + return; + } + + chassert(it->type_idx != type_idx); + + records.emplace(it, type_idx, item); + + chassert(isUniqTypes()); + } + + Records::const_iterator getImpl(std::type_index type_idx) const + { + auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); + + if (it == records.cend()) + return records.cend(); + + if (it->type_idx != type_idx) + return records.cend(); + + return it; + } + + Records records; +}; + +} diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index ab29c64184d..65035790729 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns, false, false, false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -780,7 +780,7 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, true, false, false); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -999,7 +999,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } @@ -1051,7 +1051,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 4a84a7bf570..e070d8694a7 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -11,6 +12,7 @@ #include #include #include +#include "Processors/Chunk.h" #include #include @@ -91,7 +93,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfo +class TableCheckTask : public ChunkInfoCloneable { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -110,6 +112,12 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } + TableCheckTask(const TableCheckTask & other) + : table(other.table) + , check_data_tasks(other.check_data_tasks) + , is_finished(other.is_finished.load()) + {} + std::optional checkNext() const { if (isFinished()) @@ -121,8 +129,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; - auto result = table->checkDataNext(check_data_tasks_); + IStorage::DataValidationTasksPtr tmp = check_data_tasks; + auto result = table->checkDataNext(tmp); is_finished = !result.has_value(); return result; } @@ -180,7 +188,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.setChunkInfo(std::move(current_check_task)); + result.getChunkInfos().add(std::move(current_check_task)); return result; } @@ -280,7 +288,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); + auto table_check_task = chunk.getChunkInfos().get(); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 519cbde588f..a143ca867e1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1690,8 +1690,15 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery(insert, getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); + return InterpreterInsertQuery( + insert, + getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns, + false, + false, + false + ) + .execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..08d6ac7df9e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -524,7 +524,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext(), false, false, false, false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a5396be9b76..40d5a84031d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include #include #include +#include "Interpreters/Context_fwd.h" namespace ProfileEvents @@ -394,28 +396,323 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +{ + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + + std::vector sink_chains; + std::vector presink_chains; + + for (size_t i = 0; i < sink_streams; ++i) + { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + + sink_chains.emplace_back(std::move(out)); + } + + for (size_t i = 0; i < presink_streams; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } + + return {std::move(presink_chains), std::move(sink_chains)}; +} + + +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + StoragePtr table = getTable(query); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + ContextPtr select_context = getContext(); + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); + + Settings new_settings = select_context->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); + new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); + } + + auto context_for_trivial_select = Context::createCopy(context); + context_for_trivial_select->setSettings(new_settings); + context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + select_context = context_for_trivial_select; + } + + QueryPipelineBuilder pipeline; + + { + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + { + query_sample_block.setColumn( + col_idx, + ColumnWithTypeAndName( + makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), + makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), + query_columns[col_idx].name)); + } + } + } + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(1, std::max(settings.max_insert_threads, pipeline.getNumStreams())); + size_t sink_streams_size = table->supportsParallelInsert() ? presink_streams_size : 1; + + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.resize(1); + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + return std::make_shared(in_header); + }); + } + + pipeline.resize(presink_chains.size()); + for (auto & chain : presink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(presink_chains)); + + pipeline.resize(sink_streams_size); + for (auto & chain : sink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + size_t num_select_threads = pipeline.getNumThreads(); + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); +} + + +QueryPipeline InterpreterInsertQuery::buildInsertPipeline() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + StoragePtr table = getTable(query); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + Chain chain; + + { + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + 1, 1, + table, metadata_snapshot, query_sample_block); + + chain = std::move(presink_chains.front()); + chain.appendChain(std::move(sink_chains.front())); + } + + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + QueryPipeline pipeline = QueryPipeline(std::move(chain)); + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); + pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + pipeline.complete(std::move(pipe)); + } + + return pipeline; +} + + BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); - QueryPipelineBuilder pipeline; - std::optional distributed_pipeline; - QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - StoragePtr inner_table; - if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -423,303 +720,37 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && settings.parallel_distributed_insert_select) - // Distributed INSERT SELECT - distributed_pipeline = table->distributedWrite(query, getContext()); - - std::vector presink_chains; - std::vector sink_chains; - if (!distributed_pipeline) + if (!allow_materialized) { - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - /// * If it's not an INSERT SELECT, forget all that and use one stream. - size_t pre_streams_size = 1; - size_t sink_streams_size = 1; - - if (query.select) - { - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); - - Settings new_settings = getContext()->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); - new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); - } - - auto new_context = Context::createCopy(context); - new_context->setSettings(new_settings); - new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - else - { - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - if (settings.max_insert_threads > 1) - { - pre_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - -// /// Deduplication when passing insert_deduplication_token breaks if using more than one thread -// if (!settings.insert_deduplication_token.toString().empty()) -// { -// /// TODO! -// LOG_DEBUG( -// getLogger("InsertQuery"), -// "Insert-select query using insert_deduplication_token, setting streams from {} to 1 to avoid deduplication issues, pipeline.getNumStreams() {}", -// pre_streams_size, pipeline.getNumStreams()); -// pre_streams_size = 1; -// } - - if (table->supportsParallelInsert()) - sink_streams_size = pre_streams_size; - } - - LOG_DEBUG( - getLogger("InsertQuery"), - "pre_streams_size {}, pipeline.getNumStreams() {}", - pre_streams_size, pipeline.getNumStreams()); - - pipeline.resize(pre_streams_size); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) - query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); - } - } - } - } - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - for (size_t i = 0; i < sink_streams_size; ++i) - { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams_size); - - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - for (size_t i = 0; i < pre_streams_size; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } BlockIO res; - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (distributed_pipeline) + if (query.select) { - res.pipeline = std::move(*distributed_pipeline); - } - else if (query.select) - { - const auto & header = presink_chains.at(0).getInputHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + if (settings.parallel_distributed_insert_select) { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); + res.pipeline = *table->distributedWrite(query, getContext()); } - - size_t num_select_threads = pipeline.getNumThreads(); - - for (auto & chain : presink_chains) - resources = chain.detachResources(); - for (auto & chain : sink_chains) - resources = chain.detachResources(); - - pipeline.addChains(std::move(presink_chains)); - pipeline.resize(sink_chains.size()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) + else { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); + res.pipeline = buildInsertSelectPipeline(); } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - if (!allow_materialized) - { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); - } - - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - auto & chain = presink_chains.at(0); - chain.appendChain(std::move(sink_chains.at(0))); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - res.pipeline = QueryPipeline(std::move(presink_chains[0])); - res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); - res.pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - res.pipeline.complete(std::move(pipe)); - } + res.pipeline = buildInsertPipeline(); } - res.pipeline.addResources(std::move(resources)); - res.pipeline.addStorageHolder(table); + + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (inner_table) res.pipeline.addStorageHolder(inner_table); @@ -742,17 +773,21 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } + void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } + void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized); + return std::make_unique(args.query, args.context, args.allow_materialized, false, false, false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } + + } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index bf73fb2a319..3f3b7a6f106 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_ = false, - bool no_squash_ = false, - bool no_destination_ = false, - bool async_insert_ = false); + bool allow_materialized_, + bool no_squash_, + bool no_destination, + bool async_insert_); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,12 +73,17 @@ private: ASTPtr query_ptr; const bool allow_materialized; - const bool no_squash; - const bool no_destination; + bool no_squash = false; + bool no_destination = false; const bool async_insert; std::vector> owned_buffers; + std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + + QueryPipeline buildInsertSelectPipeline(); + QueryPipeline buildInsertPipeline(); + Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3af8761ff8e..2d5109a612c 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -522,7 +522,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, /// We always want to deliver the data to the original table regardless of the MVs insert_context->setSetting("materialized_views_ignore_errors", true); - InterpreterInsertQuery interpreter(query_ptr, insert_context); + InterpreterInsertQuery interpreter(query_ptr, insert_context, false, false, false, false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..13df2e64421 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,14 +19,6 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -43,17 +35,11 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info); + auto tmp = Chunk(getColumns(), getNumRows()); + tmp.setChunkInfos(chunk_infos.clone()); + return tmp; } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 4f753798eaa..b4345d18a08 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,7 +1,15 @@ #pragma once +#include "base/defines.h" + +#include #include + +#include +#include +#include #include +#include namespace DB { @@ -9,11 +17,29 @@ namespace DB class ChunkInfo { public: - virtual ~ChunkInfo() = default; + using Ptr = std::shared_ptr; + ChunkInfo() = default; + ChunkInfo(const ChunkInfo&) = default; + ChunkInfo(ChunkInfo&&) = default; + + virtual Ptr clone() const = 0; + virtual ~ChunkInfo() = default; }; -using ChunkInfoPtr = std::shared_ptr; + +template +class ChunkInfoCloneable : public ChunkInfo +{ +public: + ChunkInfoCloneable() = default; + ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; + + Ptr clone() const override + { + return std::static_pointer_cast(std::make_shared(*static_cast(this))); + } +}; /** * Chunk is a list of columns with the same length. @@ -32,26 +58,26 @@ using ChunkInfoPtr = std::shared_ptr; class Chunk { public: + using ChunkInfoCollection = CollectionOfDerivedItems; + Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_info(std::move(other.chunk_info)) + , chunk_infos(std::move(other.chunk_infos)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_info = std::move(other.chunk_info); + chunk_infos = std::move(other.chunk_infos); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -62,15 +88,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); - chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); + chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_info.reset(); + chunk_infos.clear(); } const Columns & getColumns() const { return columns; } @@ -81,9 +107,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } - bool hasChunkInfo() const { return chunk_info != nullptr; } - void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } + ChunkInfoCollection & getChunkInfos() { return chunk_infos; } + const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } + void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -107,7 +133,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoPtr chunk_info; + ChunkInfoCollection chunk_infos; void checkNumRowsIsConsistent(); }; @@ -117,11 +143,15 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfo +class AsyncInsertInfo : public ChunkInfoCloneable { public: AsyncInsertInfo() = default; - explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + AsyncInsertInfo(const AsyncInsertInfo & other) = default; + AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) + : offsets(offsets_) + , tokens(tokens_) + {} std::vector offsets; std::vector tokens; @@ -130,9 +160,11 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfo +class ChunkMissingValues : public ChunkInfoCloneable { public: + ChunkMissingValues(const ChunkMissingValues & other) = default; + using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d27002197d2..d9fab88fe1f 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,13 +147,10 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..25c15d40c9a 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,13 +73,10 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9c85dab70c4..6067e2f3db3 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -180,7 +180,9 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + piece.emplace_back(std::move(columns), count); + piece.back().setChunkInfos(concatenated.getChunkInfos()); + writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 4136fc5a5f2..46be6e74693 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,8 +8,9 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}), - input(inputs.front()), output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}) + , input(inputs.front()) + , output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index a5befca7233..f33cc267c44 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -51,16 +51,11 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & info = input.chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); + const auto & arenas_info = input.chunk.getChunkInfos().get(); + if (!arenas_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ChunkInfoWithAllocatedBytes was not set for chunk in FinishAggregatingInOrderAlgorithm"); - Int64 allocated_bytes = 0; - /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. - if (const auto * arenas_info = typeid_cast(info.get())) - allocated_bytes = arenas_info->allocated_bytes; - - states[source_num] = State{input.chunk, description, allocated_bytes}; + states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes}; } IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() @@ -134,7 +129,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -161,7 +156,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().setChunkInfo(std::make_shared()); + chunks.back().getChunkInfos().add(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index bcf4e759024..e4f22deec8d 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,18 +6,22 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfo +class MergeTreePartLevelInfo : public ChunkInfoCloneable { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } + explicit MergeTreePartLevelInfo(ssize_t part_level) + : origin_merge_tree_part_level(part_level) + { } + MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; + size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (const auto * part_level_info = typeid_cast(info.get())) + const auto part_level_info = chunk.getChunkInfos().get(); + if (part_level_info) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 9e5c1249c4e..d0b0291511d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2fbd73c9072..770510232cc 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include #include #include +#include "Processors/Chunk.h" namespace Poco { @@ -14,11 +16,13 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfo +struct ChunkSelectFinalIndices : public ChunkInfoCloneable { + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); + ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; + const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index fbb47969b2f..b1b0182a113 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) + if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index c218f622870..be629271736 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) + if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index 30cf958c072..c05cc1defcb 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 5f9f9f9b1a1..146bd4505a4 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,10 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk.clone()); - if (!lastBlockIsDuplicate()) + setDeduplicationTokenForChildren(chunk); + fillDeduplicationTokenForChildren(chunk); + consume(chunk); + if (!lastBlockIsDuplicate()) // TODO: remove that cur_chunk = std::move(chunk); } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 023bbd8b094..07a944b0943 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,6 +1,10 @@ #pragma once +#include #include +#include #include +#include +#include "Processors/Transforms/NumberBlocksTransform.h" namespace DB { @@ -18,9 +22,38 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk chunk) = 0; + virtual void consume(Chunk & chunk) = 0; virtual bool lastBlockIsDuplicate() const { return false; } + virtual std::shared_ptr setDeduplicationTokenForChildren(Chunk & chunk) const + { + auto token_info = chunk.getChunkInfos().get(); + if (token_info) + return token_info; + + auto block_dedup_token_for_children = std::make_shared(""); + chunk.getChunkInfos().add(block_dedup_token_for_children); + return block_dedup_token_for_children; + } + + virtual std::shared_ptr getDeduplicationTokenForChildren(Chunk & chunk) const + { + return chunk.getChunkInfos().get(); + } + + virtual void fillDeduplicationTokenForChildren(Chunk & chunk) const + { + SipHash hash; + for (const auto & colunm: chunk.getColumns()) + { + colunm->updateHashFast(hash); + } + const auto hash_value = hash.get128(); + + chunk.getChunkInfos().get()->addTokenPart( + fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]))); + } + private: std::vector table_locks; @@ -38,7 +71,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk) override {} + void consume(Chunk &) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index ec0dc9609f1..7ac460c14e2 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,7 +43,10 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - return Chunk(res.getColumns(), res.rows(), std::move(info)); + auto chunk = Chunk(res.getColumns(), res.rows()); + chunk.getChunkInfos().add(std::move(info)); + + return chunk; } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3d7dd3f76b8..1578bd389c9 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 00f40a34361..fb888c104c4 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -20,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 9ffe15d0f85..45b0960ec8f 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); + to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,11 +351,12 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - finalizeChunk(chunk, aggregates_mask); - else if (!chunk.getChunkInfo()) { - auto info = std::make_shared(); - chunk.setChunkInfo(std::move(info)); + finalizeChunk(chunk, aggregates_mask); + } + else if (!chunk.getChunkInfos().get()) + { + chunk.getChunkInfos().add(std::make_shared()); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 5d50e97f552..6433f862dfd 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace DB { @@ -12,10 +13,12 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfo +struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable { + ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} + Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index b48d435720a..d6595ef9e9a 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -44,15 +44,11 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info; + return agg_info.get(); } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -210,11 +206,7 @@ private: void process(Chunk && chunk) { - if (!chunk.hasChunkInfo()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); - - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index e167acde067..430a9a6e50a 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace CurrentMetrics { @@ -19,7 +21,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfo +class AggregatedChunkInfo : public ChunkInfoCloneable { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 0793bb3db5b..36aea045b18 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -38,10 +38,9 @@ static void replaceFilterToConstant(Block & block, const String & filter_column_ static std::shared_ptr getSelectByFinalIndices(Chunk & chunk) { - if (auto select_final_indices_info = std::dynamic_pointer_cast(chunk.getChunkInfo())) + if (auto select_final_indices_info = chunk.getChunkInfos().extract()) { const auto & index_column = select_final_indices_info->select_final_indices; - chunk.setChunkInfo(nullptr); if (index_column && index_column->size() != chunk.getNumRows()) return select_final_indices_info; } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 3e2a9462e54..ca204bcb482 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,10 +365,9 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - if (!data.chunk.hasChunkInfo()) + task = data.chunk.getChunkInfos().get(); + if (!task) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); - - task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -479,7 +478,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.setChunkInfo(std::make_shared()); + chunk.getChunkInfos().add(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -496,7 +495,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.setChunkInfo(task); + chunk.getChunkInfos().add(std::move(task)); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index a308af03662..5fdea2524e2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,5 +1,7 @@ #pragma once +#include #include +#include "Processors/Chunk.h" namespace DB @@ -111,11 +113,12 @@ private: }; -class DelayedBlocksTask : public ChunkInfo +class DelayedBlocksTask : public ChunkInfoCloneable { public: DelayedBlocksTask() = default; + DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index 607087fb39c..d7bc320173b 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,11 +150,7 @@ private: if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index fc40c6894bb..ea9ebb0f96e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_unique(std::move(chunks)); + info->chunks = std::make_shared(std::move(chunks)); Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); output.push(std::move(chunk)); } @@ -255,11 +255,10 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -275,7 +274,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -304,7 +303,11 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); + + auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); + chunk.getChunkInfos().add(std::move(chunk_info)); + + chunks_map[bucket].emplace_back(std::move(chunk)); } } } @@ -319,9 +322,7 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); - + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -330,11 +331,10 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - const auto & cur_info = cur_chunk.getChunkInfo(); - if (!cur_info) + if (cur_chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (const auto * agg_info = typeid_cast(cur_info.get())) + if (auto agg_info = cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (typeid_cast(cur_info.get())) + else if (cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.setChunkInfo(std::move(res_info)); + chunk.getChunkInfos().add(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,11 +405,7 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 77ee3034ffc..958b43b11ed 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -2,6 +2,7 @@ #include #include +#include "Processors/Chunk.h" #include #include #include @@ -142,9 +143,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfo +struct ChunksToMerge : public ChunkInfoCloneable { - std::unique_ptr chunks; + std::shared_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index ad723da7527..446e60a0b81 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,11 +32,10 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -49,7 +48,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -89,7 +88,8 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + + chunk.getChunkInfos().add(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp new file mode 100644 index 00000000000..61ff3f6bfd5 --- /dev/null +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -0,0 +1 @@ +#include diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h new file mode 100644 index 00000000000..ca990a925c1 --- /dev/null +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -0,0 +1,224 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB +{ + struct SerialBlockNumberInfo : public ChunkInfoCloneable + { + SerialBlockNumberInfo(const SerialBlockNumberInfo & other) = default; + explicit SerialBlockNumberInfo(size_t block_number_) + : block_number(block_number_) + { + } + + size_t block_number = 0; + }; + + + class NumberBlocksTransform : public ISimpleTransform + { + public: + explicit NumberBlocksTransform(const Block & header) + : ISimpleTransform(header, header, true) + { + } + + String getName() const override { return "NumberBlocksTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared(block_number++)); + } + + private: + size_t block_number = 0; + }; + + + class DedupTokenInfo : public ChunkInfoCloneable + { + public: + DedupTokenInfo(const DedupTokenInfo & other) = default; + explicit DedupTokenInfo(String first_part) + { + addTokenPart(std::move(first_part)); + } + + String getToken() const + { + String result; + result.reserve(getTotalSize()); + + for (const auto & part : token_parts) + { + result.append(part); + } + + return result; + } + + void addTokenPart(String part) + { + token_parts.push_back(std::move(part)); + } + + private: + size_t getTotalSize() const + { + size_t size = 0; + for (const auto & part : token_parts) + size += part.size(); + return size; + } + + std::vector token_parts; + }; + + class AddUserDeduplicationTokenTransform : public ISimpleTransform + { + public: + AddUserDeduplicationTokenTransform(String token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , token(token_) + { + } + + String getName() const override { return "AddUserDeduplicationTokenTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared(token)); + } + + private: + String token; + }; + + + class CheckInsertDeduplicationTokenTransform : public ISimpleTransform + { + public: + CheckInsertDeduplicationTokenTransform(String debug_, bool must_be_present_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(debug_) + , must_be_present(must_be_present_) + { + } + + String getName() const override { return "CheckInsertDeduplicationTokenTransform"; } + + void transform(Chunk & chunk) override + { + if (!must_be_present) + return; + + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), + "{}, token: {}", + debug, token_info->getToken()); + } + + private: + String debug; + bool must_be_present = false; + }; + + + class ExtendDeduplicationWithBlockNumberFromInfoTokenTransform : public ISimpleTransform + { + public: + explicit ExtendDeduplicationWithBlockNumberFromInfoTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, recs {}", chunk.getChunkInfos().size()); + + auto block_number_info = chunk.getChunkInfos().get(); + if (!block_number_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have SerialBlockNumberInfo as ChunkInfo"); + + token_info->addTokenPart(fmt::format(":block-{}", block_number_info->block_number)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"), + "updated with {}, result: {}", + fmt::format(":block-{}", block_number_info->block_number), token_info->getToken()); + } + }; + + class ExtendDeduplicationWithBlockNumberTokenTransform : public ISimpleTransform + { + public: + explicit ExtendDeduplicationWithBlockNumberTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo"); + + auto x = block_number++; + token_info->addTokenPart(fmt::format(":block-{}", x)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberTokenTransform"), + "updated with {}, result: {}", + fmt::format(":block-{}", x), token_info->getToken()); + } + private: + size_t block_number = 0; + }; + + class ExtendDeduplicationWithTokenPartTransform : public ISimpleTransform + { + public: + ExtendDeduplicationWithTokenPartTransform(String token_part_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , token_part(token_part_) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, try to add token part {}", token_part); + + token_info->addTokenPart(fmt::format("{}", token_part)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithTokenPartTransform"), + "updated with {}, result: {}", + token_part, token_info->getToken()); + } + + private: + String token_part; + }; + +} diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index 480ab1a0f61..b44f5a3203e 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); + auto select_final_indices_info = chunk.getChunkInfos().extract(); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,7 +41,6 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } - chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 0d69b6e0a8d..4d693e5e809 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,9 +17,14 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { + if (cur_chunkinfos.empty()) + cur_chunkinfos = chunk.getChunkInfos(); + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) { cur_chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } } @@ -35,6 +40,8 @@ void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); finish_chunk.setColumns(block.getColumns(), block.rows()); + finish_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } void SquashingChunksTransform::work() @@ -65,7 +72,10 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (!finished) { if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + { chunk.setColumns(block.getColumns(), block.rows()); + chunk.setChunkInfos(chunk.getChunkInfos()); + } } else { diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..6de96d4100d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace DB { @@ -25,6 +26,7 @@ protected: private: SquashingTransform squashing; Chunk cur_chunk; + Chunk::ChunkInfoCollection cur_chunkinfos; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index aa86879e62c..59fceccb538 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,11 +150,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 70f30faa5b1..056f8d07627 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,9 +24,12 @@ #include #include #include +#include "Processors/Chunk.h" +#include "Processors/Transforms/NumberBlocksTransform.h" #include #include +#include namespace ProfileEvents @@ -120,6 +124,7 @@ private: { QueryPipeline pipeline; PullingPipelineExecutor executor; + Chunk::ChunkInfoCollection chunk_infos; explicit State(QueryPipeline pipeline_) : pipeline(std::move(pipeline_)) @@ -137,7 +142,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageLiveView & live_view; @@ -151,7 +156,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageWindowView & window_view; @@ -215,50 +220,6 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); - // Do not deduplicate insertions into MV if the main insertion is Ok - if (disable_deduplication_for_children) - { - insert_context->setSetting("insert_deduplicate", Field{false}); - } - else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && - !insert_settings.insert_deduplication_token.value.empty()) - { - - /// TODO! - /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle - * deduplication in complex INSERT flows. - * - * Example: - * - * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 - * | | - * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ - * - * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will - * be inserted into `ds_2_1`. - * - * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables - * are involved. - * - * Example: - * - * landing -┬--> mv_1_1 --┬-> ds_1_1 - * | | - * └--> mv_1_2 --┘ - * - */ - auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; - - if (view_id.hasUUID()) - insert_deduplication_token += "_" + toString(view_id.uuid); - else - insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); - - LOG_DEBUG(getLogger("PushingToViews"), "insert_deduplication_token {}", insert_deduplication_token); - - insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); - } - // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) select_context->setSetting("parallelize_output_from_storages", Field{false}); @@ -364,12 +325,22 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); + out.addSource(std::make_shared("Before inner chain", !disable_deduplication_for_children, out.getInputHeader())); + + if (!disable_deduplication_for_children) + { + String addition_part = view_id.hasUUID() ? toString(view_id.uuid) : view_id.getFullNameNotQuoted(); + out.addSource(std::make_shared(fmt::format(":mv-{}", addition_part), out.getInputHeader())); + } + + out.addSource(std::make_shared("Before extend token", !disable_deduplication_for_children, out.getInputHeader())); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) { bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); @@ -381,6 +352,8 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } + out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -422,11 +395,20 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { + out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); + + if (!disable_deduplication_for_children) + { + out.addSource(std::make_shared(out.getInputHeader())); + } + auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); + + out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); } return out; @@ -641,6 +623,9 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -743,6 +728,7 @@ void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); state.emplace(process(block, view, *views_data)); + state->chunk_infos = chunk.getChunkInfos(); } @@ -760,6 +746,9 @@ ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromView break; } + // here are we copy chunk_infos to the all chunks generated from the one consumed chunk + res.chunk.getChunkInfos().append(state->chunk_infos.clone()); + if (res.is_done) state.reset(); @@ -774,10 +763,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk chunk) +void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.getColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -797,11 +786,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk chunk) +void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); + window_view, getHeader().cloneWithColumns(chunk.getColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index f0b2ead687e..a9e5b1535c0 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } + void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index 2cd4dc42a83..bb2be2c8ffb 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,6 +16,12 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHo return *this; } +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +{ + append(std::move(rhs)); + return *this; +} + QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index ed9eb68b7ba..e40fa04f72c 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -19,9 +19,12 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e556bda2561..2e3096683d0 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk chunk) +void DistributedSink::consume(Chunk & chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,7 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized, false, false, false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized); + InterpreterInsertQuery interp(query_ast, context, allow_materialized, false, false, false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index a4c95633595..5b7396f2c6f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index abd4b4ce23b..6ca4ec6e079 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true); + InterpreterInsertQuery interpreter(insert, new_context, false, true, true, false); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0f3b03f0955..1ca7c1f71d0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -803,12 +803,12 @@ public: String getName() const override { return "HDFSSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 03a30d47d91..7b19dacb4c9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1098,7 +1098,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true, false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 792133ced64..9803fa0a160 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc8cb0ce69a..57d75b969c3 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(task->getInfo().data_part->info.level)); + return ChunkAndProgress{ - .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), + .chunk = std::move(chunk), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index fbb48b37482..8841f490e38 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -275,7 +275,10 @@ try ++it; } - return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); + auto result = Chunk(std::move(res_columns), rows_read); + if (add_part_level) + result.getChunkInfos().add(std::make_shared(data_part->info.level)); + return result; } } else diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index f0eb56aea13..2e455cd2bd5 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,8 +1,11 @@ #include #include +#include #include #include +#include "Common/Exception.h" #include +#include "Interpreters/StorageID.h" namespace ProfileEvents { @@ -56,7 +59,7 @@ void MergeTreeSink::onFinish() finishDelayedChunk(); } -void MergeTreeSink::consume(Chunk chunk) +void MergeTreeSink::consume(Chunk & chunk) { LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", num_blocks_processed, @@ -65,7 +68,7 @@ void MergeTreeSink::consume(Chunk chunk) if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -80,6 +83,30 @@ void MergeTreeSink::consume(Chunk chunk) size_t streams = 0; bool support_parallel_write = false; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + if (token_info) + { + block_dedup_token = token_info->getToken(); + + LOG_DEBUG(storage.log, + "dedup token from insert deduplication token in chunk: {}", + block_dedup_token); + } + else + { + LOG_DEBUG(storage.log, + "dedup token from hash is caclulated"); + } + } + for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -99,6 +126,11 @@ void MergeTreeSink::consume(Chunk chunk) current_block.block.clear(); current_block.partition.clear(); + if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + { + children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } + /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) @@ -107,19 +139,6 @@ void MergeTreeSink::consume(Chunk chunk) if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; - String block_dedup_token; - if (storage.getDeduplicationLog()) - { - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - } - size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -151,6 +170,7 @@ void MergeTreeSink::consume(Chunk chunk) partitions = DelayedPartitions{}; } + /// TODO block_dedup_token partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition { .temp_part = std::move(temp_part), diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 07ab3850df2..4e1ca5c1f60 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; @@ -35,13 +35,13 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; - UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. struct DelayedChunk; std::unique_ptr delayed_chunk; + void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(); }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 2bb9aad1e53..ce140c93cbe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -253,12 +254,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); const auto & settings = context->getSettingsRef(); @@ -284,13 +285,40 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + const auto async_insert_info_ptr = chunk.getChunkInfos().get(); + if (async_insert_info_ptr) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } + String block_dedup_token; + if constexpr (!async_insert) + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + + if (token_info) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = token_info->getToken(); + + LOG_DEBUG(storage.log, + "dedup token from insert deduplication token in chunk: {}", + block_dedup_token); + } + else + { + LOG_DEBUG(storage.log, + "dedup token from hash is caclulated"); + } + } + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -342,23 +370,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) } else { - if (deduplicate) { - String block_dedup_token; - /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. - - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -366,6 +381,11 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } + + if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + { + children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } } profile_events_scope.reset(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39623c20584..b1eff67d845 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -139,6 +139,7 @@ private: /// We can delay processing for previous chunk and start writing a new one. std::unique_ptr delayed_chunk; + void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); }; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 4fb81d69070..36899011e33 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk chunk) +void MessageQueueSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index b3c1e61734f..4a9248c6c4d 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 0b88a9e8929..9c6d70f2c5b 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true, false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 09b009b26d8..ee2570756ed 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk chunk) +void PartitionedSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(std::move(partition_index_to_chunk[partition_index])); + sink->consume(partition_index_to_chunk[partition_index]); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index 68edeb6fd73..fcd67556dc9 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index ba3cc6f58d0..57c8d24ccc2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true); + InterpreterInsertQuery interpreter(insert, insert_context, true, false, false, false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 2bb1e2dde0d..4a5a621aa43 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context); + InterpreterInsertQuery interpreter(insert, insert_context, false, false, false, false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e4b19992151..5bf5ab9b2f5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true, false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index c451cfd1bf5..1f7f6939f40 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk chunk) +void EmbeddedRocksDBSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 011322df829..2e1e0c7b429 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 01417b8977b..47e1b8feb43 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -310,7 +310,8 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index c3a772e532c..12abd7a9849 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -498,7 +498,7 @@ bool StorageS3Queue::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true, false); auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 9c551e82a99..5dc407bf86d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -600,12 +600,12 @@ public: String getName() const override { return "StorageAzureBlobSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d9a0b2b4d59..d4defd92196 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -605,7 +605,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1018,7 +1018,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; + InterpreterInsertQuery interpreter(insert, insert_context, allow_materialized, false, false, false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7b5916c0273..0478936fdfc 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1047,7 +1047,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context); + InterpreterInsertQuery interpreter(new_query, query_context, false, false, false, false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 76d75a368b3..581e0f87f15 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1770,12 +1770,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 20f99070000..c80e799a92b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,7 +1248,10 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - sink->consume(Chunk{block.getColumns(), block.rows()}); + { + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); + } sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 25c48de94e1..fad31e8ae03 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -337,7 +337,7 @@ public: } } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -388,9 +388,9 @@ private: }; -void LogSink::consume(Chunk chunk) +void LogSink::consume(Chunk & chunk) { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f69c4adb552..b1bd7053c2e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 62a2a048642..e0818fafae9 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,12 +106,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index da391909dff..2a8a7bd2ee7 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 9379cb5a1c6..c99de3e3588 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -163,9 +163,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 83bb3c606c9..1a275320f43 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk chunk) +void RedisSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,7 +567,8 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + Chunk chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9768653f3fe..7975b42ac02 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1100,12 +1100,12 @@ public: String getName() const override { return "StorageS3Sink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 179e4cee199..85417a2f2a4 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 54218351cf1..4105e8decd3 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk chunk) +void SetOrJoinSink::consume(Chunk & chunk) { - Block block = getHeader().cloneWithColumns(chunk.detachColumns()); + Block block = getHeader().cloneWithColumns(chunk.getColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 48389dccf48..7fa5a5670a3 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -217,9 +217,9 @@ public: } } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8a71a771367..c21d24ac2e5 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -559,12 +559,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk chunk) +void StorageURLSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5aca3df1513..e90585c79ca 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 7afa1894a64..d295bebe615 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a9ec1f6c694..e0f3b437af7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -305,7 +305,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,7 +690,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext()); + InterpreterInsertQuery interpreter(insert, getContext(), false, false, false, false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 3cd29247910..87c48a73513 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -49,19 +49,40 @@ def instance_create_statement(table_name, table_columns, table_keys, table_engin return __format(template, **params) -def instance_insert_statement(table_name, count, insert_unique_blocks, use_insert_token): - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", - insert_settings="" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'", - ) +def instance_insert_statement(table_name, count, insert_method, insert_unique_blocks, use_insert_token): + insert_settings = "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + + if insert_method == 'InsertSelect': + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_settings=insert_settings, + ) + + else: + template = """ + INSERT INTO {table_name} + {insert_settings} VALUES {insert_values}; + """ + + values = [] + for i in range(count): + values += [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + insert_values = ", ".join(values) + + return __format( + template, + table_name=table_name, + insert_settings=insert_settings, + insert_values=insert_values, + ) def get_drop_tables_statements(tables): @@ -109,6 +130,10 @@ class ArgsFactory: def add_opt_uniq_blocks(self): self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + def add_opt_insert_method(self): + self.__parser.add_argument( + "--insert-method", choices=["InsertSelect", "InsertValues"], default="InsertSelect") + def add_all(self): self.add_opt_engine() self.add_opt_user_token() @@ -116,6 +141,7 @@ class ArgsFactory: self.add_opt_dedup_src() self.add_opt_dedup_dst() self.add_opt_get_logs() + self.add_opt_insert_method() self.add_opt_uniq_blocks() @@ -151,14 +177,14 @@ def test_insert_several_blocks(parser): drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) insert_statement = instance_insert_statement( - "table_a_b", 10, args.insert_unique_blocks, args.use_insert_token + "table_a_b", 10, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) print_details_statements = f""" SELECT 'table_a_b'; SELECT 'count', count() FROM table_a_b; {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - + SELECT 'table_when_b_even'; SELECT 'count', count() FROM table_when_b_even; {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} @@ -209,37 +235,37 @@ def test_insert_several_blocks(parser): script = f""" {get_logs_statement(args)} - + SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + {create_table_a_b_statement} - + {create_table_when_b_even_statement} - + {create_mv_statement} - + -- first insert {insert_statement} - + {print_details_statements} - + {assert_first_insert_statements} - + -- second insert, it is retry {insert_statement} - + {print_details_statements} - + {assert_second_insert_statements} - + {drop_tables_statements} """ @@ -279,13 +305,13 @@ def test_mv_generates_several_blocks(parser): ) insert_statement = instance_insert_statement( - "table_a_b", 5, args.insert_unique_blocks, args.use_insert_token + "table_a_b", 5, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) details_print_statements = f""" SELECT 'table_a_b'; SELECT 'count', count() FROM table_a_b; - + SELECT 'table_when_b_even_and_joined'; SELECT 'count', count() FROM table_when_b_even_and_joined; {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} @@ -295,7 +321,7 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != 5 ) FROM table_a_b; - + SELECT throwIf( count() != 47 ) FROM table_when_b_even_and_joined; """ @@ -311,7 +337,7 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) FROM table_a_b; - + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) FROM table_when_b_even_and_joined; """ @@ -326,14 +352,14 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) FROM table_a_b; - + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) FROM table_a_b; - + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) FROM table_when_b_even_and_joined; """ @@ -344,13 +370,13 @@ def test_mv_generates_several_blocks(parser): SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + CREATE TABLE table_for_join_with (a_join String, b UInt64) ENGINE = MergeTree() @@ -359,13 +385,13 @@ def test_mv_generates_several_blocks(parser): SELECT 'joined_' || toString(number), number FROM numbers(9); {details_print_for_table_for_join_with} - + {create_table_a_b_statement} SYSTEM STOP MERGES table_a_b; - + {create_table_when_b_even_and_joined_statement} SYSTEM STOP MERGES table_when_b_even_and_joined; - + CREATE MATERIALIZED VIEW mv_b_even TO table_when_b_even_and_joined AS @@ -377,20 +403,20 @@ def test_mv_generates_several_blocks(parser): -- first insert {insert_statement} - + {details_print_statements} - + -- first assertion {assert_first_insert_statements} - + -- second insert {insert_statement} - + {details_print_statements} - + -- second assertion {assert_second_insert_statements} - + {drop_tables_statements} """ @@ -423,12 +449,12 @@ def test_several_mv_into_one_table(parser): ) insert_statement = instance_insert_statement( - "table_src", 8, args.insert_unique_blocks, args.use_insert_token + "table_src", 8, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) details_print_statements = f""" SELECT 'table_src count', count() FROM table_src; - + SELECT 'table_dst count', count() FROM table_dst; {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} """ @@ -453,7 +479,7 @@ def test_several_mv_into_one_table(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) FROM table_src; - + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) FROM table_dst; """ @@ -469,7 +495,7 @@ def test_several_mv_into_one_table(parser): SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) FROM table_src; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 16} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) FROM table_dst; """ assert_second_insert_statements = f""" @@ -486,13 +512,13 @@ def test_several_mv_into_one_table(parser): SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + {create_table_src_statement} {create_table_dst_statement} @@ -503,7 +529,7 @@ def test_several_mv_into_one_table(parser): SELECT a, b FROM table_src WHERE b % 2 = 0; - + CREATE MATERIALIZED VIEW mv_b_even_even TO table_dst AS @@ -515,16 +541,16 @@ def test_several_mv_into_one_table(parser): {insert_statement} {details_print_statements} - + {assert_first_insert_statements} -- second insert, retry {insert_statement} - + {details_print_statements} {assert_second_insert_statements} - + {drop_tables_statements} """ diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference index 35b2642a4d2..9b4738ce805 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -14,7 +14,7 @@ count 5 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -29,7 +29,7 @@ count 10 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -44,7 +44,7 @@ count 10 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -59,7 +59,7 @@ count 20 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -74,7 +74,7 @@ count 5 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -89,7 +89,7 @@ count 10 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -104,7 +104,7 @@ count 10 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -119,35 +119,35 @@ count 20 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even count 5 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even count 10 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -155,7 +155,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -163,7 +163,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -178,7 +178,7 @@ count 10 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -193,7 +193,7 @@ count 20 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -208,7 +208,7 @@ count 5 0 OK -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -223,7 +223,7 @@ count 1 0 OK -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -238,7 +238,7 @@ count 10 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -253,7 +253,7 @@ count 20 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -268,7 +268,7 @@ count 5 0 OK -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -283,7 +283,7 @@ count 1 0 OK -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -298,7 +298,7 @@ count 10 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -313,7 +313,7 @@ count 20 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -328,7 +328,7 @@ count 5 0 OK -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -343,7 +343,7 @@ count 1 0 OK -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -358,7 +358,7 @@ count 10 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -373,7 +373,7 @@ count 20 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -388,7 +388,7 @@ count 5 0 OK -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -403,7 +403,7 @@ count 1 0 OK -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -418,7 +418,7 @@ count 10 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -433,7 +433,7 @@ count 20 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -448,7 +448,7 @@ count 5 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -463,7 +463,7 @@ count 10 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -478,7 +478,7 @@ count 10 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -493,7 +493,7 @@ count 20 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -508,7 +508,7 @@ count 5 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -523,7 +523,7 @@ count 10 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -538,7 +538,7 @@ count 10 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -553,35 +553,35 @@ count 20 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even count 5 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even count 10 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -589,7 +589,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -597,7 +597,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -612,7 +612,7 @@ count 10 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -627,7 +627,7 @@ count 20 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -642,7 +642,7 @@ count 5 0 OK -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -657,7 +657,7 @@ count 1 0 OK -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -672,7 +672,7 @@ count 10 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -687,7 +687,7 @@ count 20 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -702,7 +702,7 @@ count 5 0 OK -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -717,7 +717,7 @@ count 1 0 OK -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -732,7 +732,7 @@ count 10 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -747,7 +747,7 @@ count 20 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -762,7 +762,7 @@ count 5 0 OK -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -777,7 +777,7 @@ count 1 0 OK -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -792,7 +792,7 @@ count 10 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -807,7 +807,7 @@ count 20 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -822,7 +822,7 @@ count 5 0 OK -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -837,7 +837,7 @@ count 1 0 OK -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -852,7 +852,967 @@ count 10 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh index 5b07f6033ad..ed50110b7eb 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh @@ -15,7 +15,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # fails, it is a error. The same situation as first one, but on dst table. RUN_ONLY="" -#RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" KNOWN_ERRORS=(8 9 10 11 12 13) @@ -23,7 +23,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -31,56 +31,61 @@ function is_known_error() } i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference index eccdbd52f37..4411bdecea8 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -14,7 +14,7 @@ count 47 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -29,7 +29,7 @@ count 45 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -44,7 +44,7 @@ count 94 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -59,7 +59,7 @@ count 90 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -74,7 +74,7 @@ count 47 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -89,7 +89,7 @@ count 45 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -104,7 +104,7 @@ count 94 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -119,35 +119,35 @@ count 90 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 10 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 9 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 47 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 45 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -155,7 +155,7 @@ count 10 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -163,7 +163,7 @@ count 9 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -178,7 +178,7 @@ count 94 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -193,7 +193,7 @@ count 90 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -201,7 +201,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -216,7 +216,7 @@ count 9 0 OK -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -231,7 +231,7 @@ count 94 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -246,7 +246,7 @@ count 90 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -254,7 +254,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -269,7 +269,7 @@ count 9 0 OK -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -284,7 +284,7 @@ count 94 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -299,7 +299,7 @@ count 90 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -307,7 +307,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -322,7 +322,7 @@ count 9 0 OK -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -337,7 +337,7 @@ count 94 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -352,7 +352,7 @@ count 90 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -360,7 +360,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -375,7 +375,7 @@ count 9 0 OK -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -390,7 +390,7 @@ count 94 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -405,7 +405,7 @@ count 90 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -420,7 +420,7 @@ count 47 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -435,7 +435,7 @@ count 45 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -450,7 +450,7 @@ count 94 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -465,7 +465,7 @@ count 90 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -480,7 +480,7 @@ count 47 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -495,7 +495,7 @@ count 45 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -510,7 +510,7 @@ count 94 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -525,35 +525,35 @@ count 90 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 10 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 9 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 47 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 45 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -561,7 +561,7 @@ count 10 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -569,7 +569,7 @@ count 9 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -584,7 +584,7 @@ count 94 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -599,7 +599,7 @@ count 90 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -607,7 +607,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -622,7 +622,7 @@ count 9 0 OK -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -637,7 +637,7 @@ count 94 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -652,7 +652,7 @@ count 90 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -660,7 +660,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -675,7 +675,7 @@ count 9 0 OK -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -690,7 +690,7 @@ count 94 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -705,7 +705,7 @@ count 90 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -713,7 +713,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -728,7 +728,7 @@ count 9 0 OK -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -743,7 +743,7 @@ count 94 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -758,7 +758,7 @@ count 90 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -766,7 +766,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -781,7 +781,7 @@ count 9 0 OK -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -796,7 +796,911 @@ count 94 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh index 1dd648583c6..61996905135 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh @@ -29,7 +29,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -37,56 +37,61 @@ function is_known_error() } i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference index 12eea604e3a..a56f7deb744 100644 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -10,7 +10,7 @@ table_dst count 6 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -21,7 +21,7 @@ table_dst count 16 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -32,7 +32,7 @@ table_dst count 12 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -43,7 +43,7 @@ table_dst count 32 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -54,7 +54,7 @@ table_dst count 6 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -65,7 +65,7 @@ table_dst count 16 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -76,7 +76,7 @@ table_dst count 12 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -87,39 +87,39 @@ table_dst count 32 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 1 table_dst count 6 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -130,7 +130,7 @@ table_dst count 12 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -141,13 +141,13 @@ table_dst count 32 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -157,7 +157,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -168,7 +168,7 @@ table_dst count 12 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -179,13 +179,13 @@ table_dst count 32 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -195,7 +195,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -206,7 +206,7 @@ table_dst count 12 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -217,13 +217,13 @@ table_dst count 32 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -233,7 +233,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -244,7 +244,7 @@ table_dst count 12 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -255,13 +255,13 @@ table_dst count 32 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -271,7 +271,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -282,7 +282,7 @@ table_dst count 12 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -293,7 +293,7 @@ table_dst count 32 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -304,7 +304,7 @@ table_dst count 6 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -315,7 +315,7 @@ table_dst count 16 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -326,7 +326,7 @@ table_dst count 12 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -337,7 +337,7 @@ table_dst count 32 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -348,7 +348,7 @@ table_dst count 6 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -359,7 +359,7 @@ table_dst count 16 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -370,7 +370,7 @@ table_dst count 12 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -381,39 +381,39 @@ table_dst count 32 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 1 table_dst count 6 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -424,7 +424,7 @@ table_dst count 12 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -435,13 +435,13 @@ table_dst count 32 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -451,7 +451,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -462,7 +462,7 @@ table_dst count 12 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -473,13 +473,13 @@ table_dst count 32 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -489,7 +489,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -500,7 +500,7 @@ table_dst count 12 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -511,13 +511,13 @@ table_dst count 32 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -527,7 +527,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -538,7 +538,7 @@ table_dst count 12 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -549,13 +549,13 @@ table_dst count 32 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -565,7 +565,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -576,7 +576,663 @@ table_dst count 12 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh index 487b3ac5f88..3d2814ed77d 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh @@ -32,7 +32,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -40,59 +40,64 @@ function is_known_error() } RUN_ONLY="" -#RUN_ONLY="Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done From ac27860b49c45cb0861d3989171be493140b0eb5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 20 Apr 2024 15:58:25 +0000 Subject: [PATCH 018/141] Automatic style fix --- src/Processors/Sinks/SinkToStorage.cpp | 3 +- src/Processors/Sinks/SinkToStorage.h | 21 +-- .../Transforms/NumberBlocksTransform.h | 9 +- src/Storages/MergeTree/MergeTreeSink.cpp | 9 +- src/Storages/MergeTree/MergeTreeSink.h | 1 - .../MergeTree/ReplicatedMergeTreeSink.cpp | 8 +- .../MergeTree/ReplicatedMergeTreeSink.h | 1 - .../0_stateless/03008_deduplication.python | 132 ++++++++++++++---- 8 files changed, 128 insertions(+), 56 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 146bd4505a4..fff4a881e3d 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,9 +15,8 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - setDeduplicationTokenForChildren(chunk); - fillDeduplicationTokenForChildren(chunk); consume(chunk); + fillDeduplicationTokenForChildren(chunk); if (!lastBlockIsDuplicate()) // TODO: remove that cur_chunk = std::move(chunk); } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 07a944b0943..21e003c4317 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -25,24 +25,12 @@ protected: virtual void consume(Chunk & chunk) = 0; virtual bool lastBlockIsDuplicate() const { return false; } - virtual std::shared_ptr setDeduplicationTokenForChildren(Chunk & chunk) const + void fillDeduplicationTokenForChildren(Chunk & chunk) const { auto token_info = chunk.getChunkInfos().get(); if (token_info) - return token_info; + return; - auto block_dedup_token_for_children = std::make_shared(""); - chunk.getChunkInfos().add(block_dedup_token_for_children); - return block_dedup_token_for_children; - } - - virtual std::shared_ptr getDeduplicationTokenForChildren(Chunk & chunk) const - { - return chunk.getChunkInfos().get(); - } - - virtual void fillDeduplicationTokenForChildren(Chunk & chunk) const - { SipHash hash; for (const auto & colunm: chunk.getColumns()) { @@ -50,8 +38,9 @@ protected: } const auto hash_value = hash.get128(); - chunk.getChunkInfos().get()->addTokenPart( - fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]))); + chunk.getChunkInfos().add(std::make_shared( + fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])) + )); } private: diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index ca990a925c1..9bc23a583d3 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -49,6 +49,7 @@ namespace DB class DedupTokenInfo : public ChunkInfoCloneable { public: + DedupTokenInfo() = default; DedupTokenInfo(const DedupTokenInfo & other) = default; explicit DedupTokenInfo(String first_part) { @@ -68,9 +69,15 @@ namespace DB return result; } + bool empty() const + { + return token_parts.empty(); + } + void addTokenPart(String part) { - token_parts.push_back(std::move(part)); + if (!part.empty()) + token_parts.push_back(std::move(part)); } private: diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2e455cd2bd5..ce7833d25da 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -84,6 +85,7 @@ void MergeTreeSink::consume(Chunk & chunk) bool support_parallel_write = false; String block_dedup_token; + std::shared_ptr dedub_token_info_for_children = nullptr; if (storage.getDeduplicationLog()) { auto token_info = chunk.getChunkInfos().get(); @@ -102,6 +104,9 @@ void MergeTreeSink::consume(Chunk & chunk) } else { + dedub_token_info_for_children = std::make_shared(); + chunk.getChunkInfos().add(dedub_token_info_for_children); + LOG_DEBUG(storage.log, "dedup token from hash is caclulated"); } @@ -126,9 +131,9 @@ void MergeTreeSink::consume(Chunk & chunk) current_block.block.clear(); current_block.partition.clear(); - if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + if (dedub_token_info_for_children) { - children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } /// If optimize_on_insert setting is true, current_block could become empty after merge diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 4e1ca5c1f60..8f065773d6a 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -41,7 +41,6 @@ private: struct DelayedChunk; std::unique_ptr delayed_chunk; - void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(); }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index ce140c93cbe..1712170dddd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -293,6 +294,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } String block_dedup_token; + std::shared_ptr dedub_token_info_for_children = nullptr; if constexpr (!async_insert) { auto token_info = chunk.getChunkInfos().get(); @@ -314,6 +316,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + dedub_token_info_for_children = std::make_shared(); + chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, "dedup token from hash is caclulated"); } @@ -382,9 +386,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + if (dedub_token_info_for_children) { - children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index b1eff67d845..e460804d7f1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -139,7 +139,6 @@ private: /// We can delay processing for previous chunk and start writing a new one. std::unique_ptr delayed_chunk; - void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); }; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 87c48a73513..89dbea97667 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -22,7 +22,14 @@ def __format(template, **params): return template.format(**kv_args) -def instance_create_statement(table_name, table_columns, table_keys, table_engine, with_deduplication, no_merges=True): +def instance_create_statement( + table_name, + table_columns, + table_keys, + table_engine, + with_deduplication, + no_merges=True, +): template = """ CREATE TABLE {table_name} {table_columns} @@ -37,22 +44,36 @@ def instance_create_statement(table_name, table_columns, table_keys, table_engin params["table_columns"] = table_columns params["table_keys"] = table_keys params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = "MergeTree()" if table_engine == "MergeTree" else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + params["table_engine"] = ( + "MergeTree()" + if table_engine == "MergeTree" + else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + ) - deduplication_window_setting_name = "non_replicated_deduplication_window" if table_engine == "MergeTree" else "replicated_deduplication_window" + deduplication_window_setting_name = ( + "non_replicated_deduplication_window" + if table_engine == "MergeTree" + else "replicated_deduplication_window" + ) deduplication_window_setting_value = 1000 if with_deduplication else 0 settings = list() - settings += [f"{deduplication_window_setting_name}={deduplication_window_setting_value}"] + settings += [ + f"{deduplication_window_setting_name}={deduplication_window_setting_value}" + ] params["table_settings"] = "SETTINGS " + ",".join(settings) return __format(template, **params) -def instance_insert_statement(table_name, count, insert_method, insert_unique_blocks, use_insert_token): - insert_settings = "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" +def instance_insert_statement( + table_name, count, insert_method, insert_unique_blocks, use_insert_token +): + insert_settings = ( + "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + ) - if insert_method == 'InsertSelect': + if insert_method == "InsertSelect": template = """ INSERT INTO {table_name} SELECT {insert_columns} @@ -62,7 +83,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl template, table_name=table_name, count=count, - insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_columns="'src_4', 4" + if not insert_unique_blocks + else "'src_' || toString(number), number", insert_settings=insert_settings, ) @@ -74,7 +97,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl values = [] for i in range(count): - values += [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + values += ( + [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + ) insert_values = ", ".join(values) return __format( @@ -86,7 +111,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl def get_drop_tables_statements(tables): - return "".join([f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]]) + return "".join( + [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] + ) def get_logs_statement(args): @@ -94,15 +121,17 @@ def get_logs_statement(args): return "SET send_logs_level='test';" return "" + def str2bool(v): if isinstance(v, bool): return v - if v.lower() in ('yes', 'true', 't', 'y', '1'): + if v.lower() in ("yes", "true", "t", "y", "1"): return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): + elif v.lower() in ("no", "false", "f", "n", "0"): return False else: - raise argparse.ArgumentTypeError('Boolean value expected.') + raise argparse.ArgumentTypeError("Boolean value expected.") + class ArgsFactory: def __init__(self, parser): @@ -110,29 +139,55 @@ class ArgsFactory: def add_opt_engine(self): self.__parser.add_argument( - "--table-engine", choices=["ReplicatedMergeTree", "MergeTree"], default="MergeTree") + "--table-engine", + choices=["ReplicatedMergeTree", "MergeTree"], + default="MergeTree", + ) def add_opt_user_token(self): - self.__parser.add_argument("--use-insert-token", type=str2bool, nargs='?', const=True, default=False) + self.__parser.add_argument( + "--use-insert-token", type=str2bool, nargs="?", const=True, default=False + ) def add_opt_single_thread(self): - self.__parser.add_argument("--single-thread", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--single-thread", type=str2bool, nargs="?", const=True, default=True + ) def add_opt_dedup_src(self): - self.__parser.add_argument("--deduplicate-src-table", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--deduplicate-src-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) def add_opt_dedup_dst(self): - self.__parser.add_argument("--deduplicate-dst-table", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--deduplicate-dst-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) def add_opt_get_logs(self): - self.__parser.add_argument("--get-logs", type=str2bool, nargs='?', const=True, default=False) + self.__parser.add_argument( + "--get-logs", type=str2bool, nargs="?", const=True, default=False + ) def add_opt_uniq_blocks(self): - self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True + ) def add_opt_insert_method(self): self.__parser.add_argument( - "--insert-method", choices=["InsertSelect", "InsertValues"], default="InsertSelect") + "--insert-method", + choices=["InsertSelect", "InsertValues"], + default="InsertSelect", + ) def add_all(self): self.add_opt_engine() @@ -174,10 +229,16 @@ def test_insert_several_blocks(parser): WHERE b % 2 = 0; """ - drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) + drop_tables_statements = get_drop_tables_statements( + ["table_a_b", "table_when_b_even", "mv_b_even"] + ) insert_statement = instance_insert_statement( - "table_a_b", 10, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_a_b", + 10, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) print_details_statements = f""" @@ -190,8 +251,6 @@ def test_insert_several_blocks(parser): {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} """ - - if args.insert_unique_blocks: assert_first_insert_statements = f""" SELECT throwIf( count() != 10 ) @@ -278,7 +337,12 @@ def test_mv_generates_several_blocks(parser): ArgsFactory(parser).add_all() def calle(args): - tables = ["table_for_join_with", "table_a_b", "table_when_b_even_and_joined", "mv_b_even"] + tables = [ + "table_for_join_with", + "table_a_b", + "table_when_b_even_and_joined", + "mv_b_even", + ] drop_tables_statements = get_drop_tables_statements(tables) details_print_for_table_for_join_with = "" @@ -305,7 +369,11 @@ def test_mv_generates_several_blocks(parser): ) insert_statement = instance_insert_statement( - "table_a_b", 5, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_a_b", + 5, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) details_print_statements = f""" @@ -449,7 +517,11 @@ def test_several_mv_into_one_table(parser): ) insert_statement = instance_insert_statement( - "table_src", 8, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_src", + 8, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) details_print_statements = f""" @@ -568,9 +640,7 @@ def parse_args(): test_mv_generates_several_blocks( subparsers.add_parser("mv_generates_several_blocks") ) - test_several_mv_into_one_table( - subparsers.add_parser("several_mv_into_one_table") - ) + test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) args = parser.parse_args() if args.test is None: parser.print_help() From c7908f62d056c5a96f0bea743de48e48399c0f91 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 14:06:04 +0200 Subject: [PATCH 019/141] fix sigfault --- src/Storages/MergeTree/MergeTreeSink.cpp | 11 ++++++----- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index ce7833d25da..7ca6ed10a76 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -131,16 +131,17 @@ void MergeTreeSink::consume(Chunk & chunk) current_block.block.clear(); current_block.partition.clear(); - if (dedub_token_info_for_children) - { - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); - } - /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) continue; + if (dedub_token_info_for_children) + { + chassert(temp_part.part); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 1712170dddd..3c1e2bc9219 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -388,6 +388,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (dedub_token_info_for_children) { + chassert(temp_part.part); dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } } From 687b5940fa37563f2121ab2f915edd513a2c8b6e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 14:45:48 +0200 Subject: [PATCH 020/141] fix style --- src/Interpreters/InterpreterCreateQuery.cpp | 6 ++---- src/Interpreters/InterpreterInsertQuery.cpp | 6 ++++-- src/QueryPipeline/QueryPlanResourceHolder.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 ++++++- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a143ca867e1..35ed6c9ab69 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1691,14 +1691,12 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) insert->select = create.select->clone(); return InterpreterInsertQuery( - insert, + insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns, false, false, - false - ) - .execute(); + false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 40d5a84031d..62bffcfe6a1 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -592,10 +592,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() { pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { return std::make_shared(settings.insert_deduplication_token.value, in_header); }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { return std::make_shared(in_header); }); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index e40fa04f72c..10f7f39ab09 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -19,7 +19,7 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7ca6ed10a76..2d29f87c556 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -13,6 +13,11 @@ namespace ProfileEvents extern const Event DuplicatedInsertedBlocks; } +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -108,7 +113,7 @@ void MergeTreeSink::consume(Chunk & chunk) chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, - "dedup token from hash is caclulated"); + "dedup token from hash is calculated"); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3c1e2bc9219..e855bb7d969 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -319,7 +319,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) dedub_token_info_for_children = std::make_shared(); chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, - "dedup token from hash is caclulated"); + "dedup token from hash is calculated"); } } From f1493a40a4b70251b12bc06bdd43bbd9eeadfe8b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 15:44:30 +0200 Subject: [PATCH 021/141] fix tests --- src/Interpreters/AsynchronousInsertQueue.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 65035790729..e1595243ae3 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -780,7 +780,12 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, true, false, false); + key.query, + insert_context, + key.settings.insert_allow_materialized_columns, + false, + false, + true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); From fa667b454366c724dcfa59872de4d89c987fa5bd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 18:21:32 +0200 Subject: [PATCH 022/141] fix tests --- src/Interpreters/InterpreterCheckQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index e070d8694a7..81bb6290acb 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "Processors/Chunk.h" #include #include @@ -24,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 62bffcfe6a1..bbff38a06bf 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,6 @@ #include #include #include -#include "Interpreters/Context_fwd.h" namespace ProfileEvents @@ -682,6 +682,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline() chain.addSource(std::move(counting)); QueryPipeline pipeline = QueryPipeline(std::move(chain)); + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); pipeline.setConcurrencyControl(settings.use_concurrency_control); @@ -735,7 +736,11 @@ BlockIO InterpreterInsertQuery::execute() { if (settings.parallel_distributed_insert_select) { - res.pipeline = *table->distributedWrite(query, getContext()); + auto distributed = table->distributedWrite(query, getContext()); + if (distributed) + res.pipeline = std::move(*distributed); + else + res.pipeline = buildInsertSelectPipeline(); } else { From 2d4216ecada4593ba43af441c6fd44f694e1867d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 2 May 2024 13:00:57 +0200 Subject: [PATCH 023/141] debugging --- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++++++++++ src/Storages/StorageDistributed.cpp | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index bbff38a06bf..435c616f27c 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -499,6 +499,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); pipeline = interpreter_select.buildQueryPipeline(); } + + // auto resources = QueryPlanResourceHolder(); + // resources.interpreter_context.push_back(select_context); + // pipeline.addResources(std::move(resources)); } pipeline.dropTotalsAndExtremes(); @@ -710,6 +714,12 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + if (auto * dist_storage = dynamic_cast(table.get())) + { + LOG_DEBUG(getLogger("InsertQuery"), + "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); + } + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0478936fdfc..1ecb83aa120 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -23,6 +23,7 @@ #include +#include "Common/logger_useful.h" #include #include #include @@ -106,6 +107,7 @@ #include #include +#include #include #include #include @@ -1822,10 +1824,14 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { + LOG_WARNING(log, "delayInsertOrThrowIfNeeded"); + if (!distributed_settings.bytes_to_throw_insert && !distributed_settings.bytes_to_delay_insert) return; + LOG_WARNING(log, "delayInsertOrThrowIfNeeded getContext() is null: {}", getContext() == nullptr); + UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) From db6951e48897d297f443dfb85f8da76add6e4f0d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 2 May 2024 15:43:49 +0200 Subject: [PATCH 024/141] fix distributed inserts --- src/Common/CollectionOfDerived.h | 14 ++++++++++ src/Interpreters/InterpreterInsertQuery.cpp | 26 ++++++++++++------- src/Interpreters/InterpreterInsertQuery.h | 4 +-- .../FinishAggregatingInOrderAlgorithm.cpp | 11 +++++--- src/Storages/StorageDistributed.cpp | 4 --- 5 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 8579c4dd50c..c98e375b4b1 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -109,6 +110,19 @@ public: return cast; } + std::string debug() const + { + std::string result; + + for (auto & rec : records) + { + result.append(rec.type_idx.name()); + result.append(" "); + } + + return result; + } + private: bool isUniqTypes() const { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 435c616f27c..fdf77486c85 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -429,12 +429,10 @@ std::pair, std::vector> InterpreterInsertQuery::buildP } -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) { const Settings & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - StoragePtr table = getTable(query); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); @@ -641,12 +639,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() } -QueryPipeline InterpreterInsertQuery::buildInsertPipeline() +QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) { const Settings & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - StoragePtr table = getTable(query); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); @@ -714,9 +710,11 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + bool is_table_dist = false; if (auto * dist_storage = dynamic_cast(table.get())) { - LOG_DEBUG(getLogger("InsertQuery"), + is_table_dist = true; + LOG_DEBUG(getLogger("InsertQuery"), "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); } @@ -748,18 +746,26 @@ BlockIO InterpreterInsertQuery::execute() { auto distributed = table->distributedWrite(query, getContext()); if (distributed) + { + LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); + } else - res.pipeline = buildInsertSelectPipeline(); + { + LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertSelectPipeline(query, table); + } } else { - res.pipeline = buildInsertSelectPipeline(); + LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertSelectPipeline(query, table); } } else { - res.pipeline = buildInsertPipeline(); + LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertPipeline(query, table); } res.pipeline.addStorageHolder(table); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 3f3b7a6f106..b06bb9a3db2 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -81,8 +81,8 @@ private: std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - QueryPipeline buildInsertSelectPipeline(); - QueryPipeline buildInsertPipeline(); + QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); + QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); Chain buildSink( const StoragePtr & table, diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index f33cc267c44..ae47de4a81e 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -51,11 +51,14 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & arenas_info = input.chunk.getChunkInfos().get(); - if (!arenas_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ChunkInfoWithAllocatedBytes was not set for chunk in FinishAggregatingInOrderAlgorithm"); + if (input.chunk.getChunkInfos().empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); - states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes}; + Int64 allocated_bytes = 0; + if (auto arenas_info = input.chunk.getChunkInfos().get()) + allocated_bytes = arenas_info->allocated_bytes; + + states[source_num] = State{input.chunk, description, allocated_bytes}; } IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1ecb83aa120..747bb2c2080 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1824,14 +1824,10 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { - LOG_WARNING(log, "delayInsertOrThrowIfNeeded"); - if (!distributed_settings.bytes_to_throw_insert && !distributed_settings.bytes_to_delay_insert) return; - LOG_WARNING(log, "delayInsertOrThrowIfNeeded getContext() is null: {}", getContext() == nullptr); - UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) From 7fe1fe11b9c59eda01cb20f506f3c3e95398668d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 3 May 2024 22:43:47 +0200 Subject: [PATCH 025/141] fixing tests --- src/Columns/ColumnObject.cpp | 6 +++ src/Columns/ColumnObject.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++--- src/Interpreters/SquashingTransform.cpp | 20 ++++----- src/Interpreters/SquashingTransform.h | 11 +++-- src/Processors/ISimpleTransform.h | 4 ++ .../Transforms/CountingTransform.cpp | 3 ++ src/Processors/Transforms/CountingTransform.h | 4 ++ .../Transforms/ExpressionTransform.cpp | 7 ++++ .../Transforms/MaterializingTransform.cpp | 6 +++ .../Transforms/NumberBlocksTransform.h | 20 +++++++++ .../Transforms/SquashingChunksTransform.cpp | 42 +++++++++++++++---- .../Transforms/SquashingChunksTransform.h | 1 + .../Transforms/buildPushingToViewsChain.cpp | 25 ++++++----- src/Server/TCPHandler.cpp | 6 +-- src/Storages/MergeTree/MutateTask.cpp | 9 ++-- ...view_and_deduplication_zookeeper.reference | 2 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- 18 files changed, 129 insertions(+), 51 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 90ef974010c..ded56b60e64 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,4 +1093,10 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } +void ColumnObject::updateHashFast(SipHash & hash) const +{ + for (const auto & entry : subcolumns) + for (auto & part : entry->data.data) + part->updateHashFast(hash); +} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index e2936b27994..b1b8827622f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } + void updateHashFast(SipHash & hash) const override; void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fdf77486c85..2961d643869 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -471,8 +471,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (table->prefersLargeBlocks()) { - new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); - new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; } auto context_for_trivial_select = Context::createCopy(context); @@ -497,10 +499,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); pipeline = interpreter_select.buildQueryPipeline(); } - - // auto resources = QueryPlanResourceHolder(); - // resources.interpreter_context.push_back(select_context); - // pipeline.addResources(std::move(resources)); } pipeline.dropTotalsAndExtremes(); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 41f024df7a7..cf4f2060414 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -15,12 +15,12 @@ SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_b { } -Block SquashingTransform::add(Block && input_block) +SquashingTransform::SquashResult SquashingTransform::add(Block && input_block) { return addImpl(std::move(input_block)); } -Block SquashingTransform::add(const Block & input_block) +SquashingTransform::SquashResult SquashingTransform::add(const Block & input_block) { return addImpl(input_block); } @@ -32,14 +32,14 @@ Block SquashingTransform::add(const Block & input_block) * have to. */ template -Block SquashingTransform::addImpl(ReferenceType input_block) +SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) { Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), false}; } /// Just read block is already enough. @@ -48,13 +48,13 @@ Block SquashingTransform::addImpl(ReferenceType input_block) /// If no accumulated data, return just read block. if (!accumulated_block) { - return std::move(input_block); + return SquashResult{std::move(input_block), false}; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. Block to_return = std::move(input_block); std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), true}; } /// Accumulated block is already enough. @@ -63,7 +63,7 @@ Block SquashingTransform::addImpl(ReferenceType input_block) /// Return accumulated data and place new block to accumulated data. Block to_return = std::move(input_block); std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), true}; } append(std::move(input_block)); @@ -71,11 +71,11 @@ Block SquashingTransform::addImpl(ReferenceType input_block) { Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), false}; } - /// Squashed block is not ready. - return {}; + /// Squashed block is not ready, input block consumed + return SquashResult{{}, true}; } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index b04d012bcd1..f1eba537338 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -25,11 +25,16 @@ public: /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + struct SquashResult + { + Block block; + bool input_block_delayed = false; + }; /** Add next block and possibly returns squashed block. * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. */ - Block add(Block && block); - Block add(const Block & block); + SquashResult add(Block && block); + SquashResult add(const Block & block); private: size_t min_block_size_rows; @@ -38,7 +43,7 @@ private: Block accumulated_block; template - Block addImpl(ReferenceType block); + SquashResult addImpl(ReferenceType block); template void append(ReferenceType block); diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index 629529cdffa..3862ea76dbb 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -2,6 +2,8 @@ #include +#include + namespace DB { @@ -29,6 +31,8 @@ protected: virtual void transform(Chunk & input_chunk, Chunk & output_chunk) { + LOG_DEBUG(getLogger("ISimpleTransform"), + "transform {}", input_chunk.getNumRows()); transform(input_chunk); output_chunk.swap(input_chunk); } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 3dfb9fe178f..7329a196f8a 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -17,6 +17,9 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { + LOG_DEBUG(getLogger("CountingTransform"), + "onConsume {}", chunk.getNumRows()); + if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 05d8e2aeac8..ab8d083fd05 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { @@ -43,6 +45,8 @@ public: void onConsume(Chunk chunk) override; GenerateResult onGenerate() override { + LOG_DEBUG(getLogger("CountingTransform"), + "onGenerate {}", cur_chunk.getNumRows()); GenerateResult res; res.chunk = std::move(cur_chunk); return res; diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 2fbd2c21b8d..db5d2b0c49c 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,5 +1,9 @@ #include #include + +#include + + namespace DB { @@ -17,6 +21,9 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("ExpressionTransform"), + "transform {}", chunk.getNumRows()); + size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 1eaa5458d37..8366472f876 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,6 +1,9 @@ #include #include +#include + + namespace DB { @@ -9,6 +12,9 @@ MaterializingTransform::MaterializingTransform(const Block & header) void MaterializingTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("MaterializingTransform"), + "transform {}", chunk.getNumRows()); + auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 9bc23a583d3..6586f015d3e 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -228,4 +228,24 @@ namespace DB String token_part; }; + class RestoreChunkInfosTransform : public ISimpleTransform + { + public: + RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , chunk_infos(chunk_infos_) + { + } + + String getName() const override { return "RestoreChunkInfosTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().append(chunk_infos.clone()); + } + + private: + Chunk::ChunkInfoCollection chunk_infos; + }; + } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 4d693e5e809..7464cb79ba6 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,15 +17,24 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { + LOG_DEBUG(getLogger("SquashingChunksTransform"), + "onConsume {}", chunk.getNumRows()); + if (cur_chunkinfos.empty()) cur_chunkinfos = chunk.getChunkInfos(); - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (result.block) { - cur_chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setColumns(result.block.getColumns(), result.block.rows()); cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); cur_chunkinfos = {}; } + + if (cur_chunkinfos.empty() && result.input_block_delayed) + { + cur_chunkinfos = chunk.getChunkInfos(); + } } SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() @@ -38,8 +47,8 @@ SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() void SquashingChunksTransform::onFinish() { - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); + auto result = squashing.add({}); + finish_chunk.setColumns(result.block.getColumns(), result.block.rows()); finish_chunk.setChunkInfos(std::move(cur_chunkinfos)); cur_chunkinfos = {}; } @@ -69,12 +78,25 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + "transform {}", chunk.getNumRows()); + if (!finished) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + if (cur_chunkinfos.empty()) + cur_chunkinfos = chunk.getChunkInfos(); + + auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (result.block) { - chunk.setColumns(block.getColumns(), block.rows()); - chunk.setChunkInfos(chunk.getChunkInfos()); + chunk.setColumns(result.block.getColumns(), result.block.rows()); + chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; + } + + if (cur_chunkinfos.empty() && result.input_block_delayed) + { + cur_chunkinfos = chunk.getChunkInfos(); } } else @@ -82,8 +104,10 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); + auto result = squashing.add({}); + chunk.setColumns(result.block.getColumns(), result.block.rows()); + chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 6de96d4100d..116b9e47460 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -45,6 +45,7 @@ protected: private: SquashingTransform squashing; + Chunk::ChunkInfoCollection cur_chunkinfos; /// When consumption is finished we need to release the final chunk regardless of its size. bool finished = false; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 056f8d07627..ccecfcf3333 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -124,7 +124,6 @@ private: { QueryPipeline pipeline; PullingPipelineExecutor executor; - Chunk::ChunkInfoCollection chunk_infos; explicit State(QueryPipeline pipeline_) : pipeline(std::move(pipeline_)) @@ -397,10 +396,13 @@ std::optional generateViewChain( { out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); - if (!disable_deduplication_for_children) - { - out.addSource(std::make_shared(out.getInputHeader())); - } + // if (!disable_deduplication_for_children) + // { + // // out.addSource(std::make_shared(out.getInputHeader())); + // // out.addSource(std::make_shared(out.getInputHeader())); + + // out.addSource(std::make_shared(out.getInputHeader())); + // } auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); @@ -576,7 +578,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos) { const auto & context = views_data.context; @@ -623,8 +625,9 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -727,8 +730,7 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data)); - state->chunk_infos = chunk.getChunkInfos(); + state.emplace(process(block, view, *views_data, chunk.getChunkInfos())); } @@ -746,9 +748,6 @@ ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromView break; } - // here are we copy chunk_infos to the all chunks generated from the one consumed chunk - res.chunk.getChunkInfos().append(state->chunk_infos.clone()); - if (res.is_done) state.reset(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3db935729b4..c21d230cba7 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -890,18 +890,18 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + if (result.block) { return PushResult { .status = PushResult::TOO_MUCH_DATA, - .insert_block = std::move(result), + .insert_block = std::move(result.block), }; } } auto result = squashing.add({}); - return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); + return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result.block), query_context); } void TCPHandler::processInsertQuery() diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5934756fb95..ab316947ff8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1298,7 +1298,8 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block projection_block; { ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + auto result = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + projection_block = std::move(result.block); } if (projection_block) @@ -1323,11 +1324,11 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add({}); - if (projection_block) + auto squash_result = projection_squash.add({}); + if (squash_result.block) { auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, std::move(squash_result.block), projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index adf6abb7298..741591b0dd4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -2,7 +2,7 @@ 3 2 -3 +2 1 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index d3c4da86b41..0a41581025a 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated +-- Implicit insert is deduplicated even for MV without_deduplication_mv SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; From 9242c78fee09962abc5a28f368887e531187b5dd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 6 May 2024 21:53:22 +0200 Subject: [PATCH 026/141] work with test --- src/Interpreters/InterpreterInsertQuery.cpp | 131 +++--- src/Processors/Sinks/SinkToStorage.cpp | 4 +- src/Processors/Sinks/SinkToStorage.h | 20 - .../Transforms/NumberBlocksTransform.cpp | 156 ++++++ .../Transforms/NumberBlocksTransform.h | 391 +++++++-------- .../Transforms/SquashingChunksTransform.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 61 +-- src/Storages/FileLog/StorageFileLog.cpp | 8 +- src/Storages/MergeTree/MergeTreeSink.cpp | 21 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 28 +- .../MergeTree/ReplicatedMergeTreeSink.h | 11 - ...02912_ingestion_mv_deduplication.reference | 3 +- .../02912_ingestion_mv_deduplication.sql | 1 + ...uplication_insert_several_blocks.reference | 148 ++++-- ...tion_mv_generates_several_blocks.reference | 324 ++++++++++--- ...cation_several_mv_into_one_table.reference | 444 ++++++++++++------ 16 files changed, 1162 insertions(+), 591 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2961d643869..0f3df3752cb 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -39,6 +39,7 @@ #include #include #include +#include "base/defines.h" namespace ProfileEvents @@ -398,6 +399,9 @@ Chain InterpreterInsertQuery::buildPreSinkChain( std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) { + chassert(presink_streams > 0); + chassert(sink_streams > 0); + ThreadGroupPtr running_group; if (current_thread) running_group = current_thread->getThreadGroup(); @@ -410,8 +414,8 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + "call buildSink sink_streams table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -421,6 +425,10 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink presink_streams table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); } @@ -454,6 +462,9 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); + if (is_trivial_insert_select) { /** When doing trivial INSERT INTO ... SELECT ... FROM table, @@ -462,9 +473,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & * to avoid unnecessary squashing. */ - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); - Settings new_settings = select_context->getSettings(); new_settings.max_threads = std::max(1, settings.max_insert_threads); @@ -503,6 +511,11 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); + LOG_DEBUG(getLogger("InsertQuery"), + "adding transforms, pipline size {}, threads {}, max_insert_threads {}", + pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { @@ -532,6 +545,56 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & } } + pipeline.resize(1); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + pipeline.resize(presink_chains.size()); + auto actions_dag = ActionsDAG::makeConvertingActions( pipeline.getHeader().getColumnsWithTypeAndName(), query_sample_block.getColumnsWithTypeAndName(), @@ -560,54 +623,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return counting; }); - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(1, std::max(settings.max_insert_threads, pipeline.getNumStreams())); - size_t sink_streams_size = table->supportsParallelInsert() ? presink_streams_size : 1; - - auto [presink_chains, sink_chains] = buildPreAndSyncChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.resize(1); - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - pipeline.resize(presink_chains.size()); for (auto & chain : presink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(presink_chains)); pipeline.resize(sink_streams_size); + for (auto & chain : sink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(sink_chains)); @@ -655,12 +676,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query chain.appendChain(std::move(sink_chains.front())); } - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - if (shouldAddSquashingFroStorage(table)) { bool table_prefers_large_blocks = table->prefersLargeBlocks(); @@ -673,6 +688,14 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query chain.addSource(std::move(squashing)); } + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + chain.addSource(std::make_shared(chain.getInputHeader())); + auto context_ptr = getContext(); auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); counting->setProcessListElement(context_ptr->getProcessListElement()); diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index fff4a881e3d..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -16,9 +16,7 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); - fillDeduplicationTokenForChildren(chunk); - if (!lastBlockIsDuplicate()) // TODO: remove that - cur_chunk = std::move(chunk); + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 21e003c4317..c350b9f79b0 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -4,7 +4,6 @@ #include #include #include -#include "Processors/Transforms/NumberBlocksTransform.h" namespace DB { @@ -23,25 +22,6 @@ public: protected: virtual void consume(Chunk & chunk) = 0; - virtual bool lastBlockIsDuplicate() const { return false; } - - void fillDeduplicationTokenForChildren(Chunk & chunk) const - { - auto token_info = chunk.getChunkInfos().get(); - if (token_info) - return; - - SipHash hash; - for (const auto & colunm: chunk.getColumns()) - { - colunm->updateHashFast(hash); - } - const auto hash_value = hash.get128(); - - chunk.getChunkInfos().add(std::make_shared( - fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])) - )); - } private: std::vector table_locks; diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 61ff3f6bfd5..19ebf94a27a 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -1 +1,157 @@ #include + +#include + +#include +#include +#include + + +#include + + +namespace DB +{ +namespace DeduplicationToken +{ + +String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const +{ + chassert(stage == MATERIALIZE_VIEW_ID || !enable_assert); + + String result; + result.reserve(getTotalSize()); + + for (const auto & part : parts) + result.append(part); + + return result; +} + +void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) +{ + chassert(stage == INITIAL); + addTokenPart(std::move(part)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::setUserToken(const String & token) +{ + chassert(stage == INITIAL); + addTokenPart(fmt::format("user-token-{}", token)); + stage = SOURCE_BLOCK_NUMBER; +} + +void TokenInfo::setSourceBlockNumber(size_t sbn) +{ + chassert(stage == SOURCE_BLOCK_NUMBER); + addTokenPart(fmt::format(":source-number-{}", sbn)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::setMaterializeViewID(const String & id) +{ + chassert(stage == MATERIALIZE_VIEW_ID); + addTokenPart(fmt::format(":mv-{}", id)); + stage = MATERIALIZE_VIEW_BLOCK_NUMBER; +} + +void TokenInfo::setMaterializeViewBlockNumber(size_t mvbn) +{ + chassert(stage == MATERIALIZE_VIEW_BLOCK_NUMBER); + addTokenPart(fmt::format(":mv-bn-{}", mvbn)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::reset() +{ + stage = INITIAL; + parts.clear(); +} + +void TokenInfo::addTokenPart(String part) +{ + if (!part.empty()) + parts.push_back(std::move(part)); +} + +size_t TokenInfo::getTotalSize() const +{ + size_t size = 0; + for (const auto & part : parts) + size += part.size(); + return size; +} + +void CheckTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + if (!must_be_present) + { + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, no token required, token {}", debug, token_info->getToken(false)); + return; + } + + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); +} + +void SetInitialTokenTransform::transform(Chunk & chunk) +{ + auto token_builder = chunk.getChunkInfos().get(); + chassert(token_builder); + if (token_builder->tokenInitialized()) + return; + + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + token_builder->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); +} + +void SetUserTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(!token_info->tokenInitialized()); + token_info->setUserToken(user_token); +} + +void SetSourceBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(!token_info->tokenInitialized()); + token_info->setSourceBlockNumber(block_number++); +} + +void SetMaterializeViewIDTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(token_info->tokenInitialized()); + token_info->setMaterializeViewID(mv_id); +} + +void SetMaterializeViewBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(token_info->tokenInitialized()); + token_info->setMaterializeViewBlockNumber(block_number++); +} + +void ResetTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + token_info->reset(); +} + +} +} diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 6586f015d3e..46b62029c21 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -2,10 +2,9 @@ #include #include -#include -#include -#include +#include + namespace ErrorCodes { @@ -14,220 +13,6 @@ namespace ErrorCodes namespace DB { - struct SerialBlockNumberInfo : public ChunkInfoCloneable - { - SerialBlockNumberInfo(const SerialBlockNumberInfo & other) = default; - explicit SerialBlockNumberInfo(size_t block_number_) - : block_number(block_number_) - { - } - - size_t block_number = 0; - }; - - - class NumberBlocksTransform : public ISimpleTransform - { - public: - explicit NumberBlocksTransform(const Block & header) - : ISimpleTransform(header, header, true) - { - } - - String getName() const override { return "NumberBlocksTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared(block_number++)); - } - - private: - size_t block_number = 0; - }; - - - class DedupTokenInfo : public ChunkInfoCloneable - { - public: - DedupTokenInfo() = default; - DedupTokenInfo(const DedupTokenInfo & other) = default; - explicit DedupTokenInfo(String first_part) - { - addTokenPart(std::move(first_part)); - } - - String getToken() const - { - String result; - result.reserve(getTotalSize()); - - for (const auto & part : token_parts) - { - result.append(part); - } - - return result; - } - - bool empty() const - { - return token_parts.empty(); - } - - void addTokenPart(String part) - { - if (!part.empty()) - token_parts.push_back(std::move(part)); - } - - private: - size_t getTotalSize() const - { - size_t size = 0; - for (const auto & part : token_parts) - size += part.size(); - return size; - } - - std::vector token_parts; - }; - - class AddUserDeduplicationTokenTransform : public ISimpleTransform - { - public: - AddUserDeduplicationTokenTransform(String token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , token(token_) - { - } - - String getName() const override { return "AddUserDeduplicationTokenTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared(token)); - } - - private: - String token; - }; - - - class CheckInsertDeduplicationTokenTransform : public ISimpleTransform - { - public: - CheckInsertDeduplicationTokenTransform(String debug_, bool must_be_present_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(debug_) - , must_be_present(must_be_present_) - { - } - - String getName() const override { return "CheckInsertDeduplicationTokenTransform"; } - - void transform(Chunk & chunk) override - { - if (!must_be_present) - return; - - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), - "{}, token: {}", - debug, token_info->getToken()); - } - - private: - String debug; - bool must_be_present = false; - }; - - - class ExtendDeduplicationWithBlockNumberFromInfoTokenTransform : public ISimpleTransform - { - public: - explicit ExtendDeduplicationWithBlockNumberFromInfoTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, recs {}", chunk.getChunkInfos().size()); - - auto block_number_info = chunk.getChunkInfos().get(); - if (!block_number_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have SerialBlockNumberInfo as ChunkInfo"); - - token_info->addTokenPart(fmt::format(":block-{}", block_number_info->block_number)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"), - "updated with {}, result: {}", - fmt::format(":block-{}", block_number_info->block_number), token_info->getToken()); - } - }; - - class ExtendDeduplicationWithBlockNumberTokenTransform : public ISimpleTransform - { - public: - explicit ExtendDeduplicationWithBlockNumberTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo"); - - auto x = block_number++; - token_info->addTokenPart(fmt::format(":block-{}", x)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberTokenTransform"), - "updated with {}, result: {}", - fmt::format(":block-{}", x), token_info->getToken()); - } - private: - size_t block_number = 0; - }; - - class ExtendDeduplicationWithTokenPartTransform : public ISimpleTransform - { - public: - ExtendDeduplicationWithTokenPartTransform(String token_part_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , token_part(token_part_) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, try to add token part {}", token_part); - - token_info->addTokenPart(fmt::format("{}", token_part)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithTokenPartTransform"), - "updated with {}, result: {}", - token_part, token_info->getToken()); - } - - private: - String token_part; - }; - class RestoreChunkInfosTransform : public ISimpleTransform { public: @@ -248,4 +33,176 @@ namespace DB Chunk::ChunkInfoCollection chunk_infos; }; + +namespace DeduplicationToken +{ + class TokenInfo : public ChunkInfoCloneable + { + public: + TokenInfo() = default; + TokenInfo(const TokenInfo & other) = default; + + String getToken(bool enable_assert = true) const; + + bool empty() const { return parts.empty(); } + bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } + + void setInitialToken(String part); + void setUserToken(const String & token); + void setSourceBlockNumber(size_t sbn); + void setMaterializeViewID(const String & id); + void setMaterializeViewBlockNumber(size_t mvbn); + void reset(); + + private: + void addTokenPart(String part); + size_t getTotalSize() const; + + enum BuildingStage + { + INITIAL, + SOURCE_BLOCK_NUMBER, + MATERIALIZE_VIEW_ID, + MATERIALIZE_VIEW_BLOCK_NUMBER, + }; + + BuildingStage stage = INITIAL; + std::vector parts; + }; + + + class CheckTokenTransform : public ISimpleTransform + { + public: + CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(debug_) + , must_be_present(must_be_present_) + { + } + + String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String debug; + bool must_be_present = false; + }; + + + class AddTokenInfoTransform : public ISimpleTransform + { + public: + explicit AddTokenInfoTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared()); + } + }; + + + class SetInitialTokenTransform : public ISimpleTransform + { + public: + explicit SetInitialTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + class ResetTokenTransform : public ISimpleTransform + { + public: + explicit ResetTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + + class SetUserTokenTransform : public ISimpleTransform + { + public: + SetUserTokenTransform(String user_token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , user_token(std::move(user_token_)) + { + } + + String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String user_token; + }; + + + class SetSourceBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetSourceBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number; + }; + + + class SetMaterializeViewIDTransform : public ISimpleTransform + { + public: + SetMaterializeViewIDTransform(String mv_id_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , mv_id(std::move(mv_id_)) + { + } + + String getName() const override { return "DeduplicationToken::SetMaterializeViewIDTransform"; } + + void transform(Chunk & chunk) override; + + private: + String mv_id; + }; + + + class SetMaterializeViewBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetMaterializeViewBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetMaterializeViewBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number; + }; + +} } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 7464cb79ba6..1a29b8d8a2d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -79,7 +79,7 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform {}", chunk.getNumRows()); + "transform {}, finished {}", chunk.getNumRows(), finished); if (!finished) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ccecfcf3333..0c1893e0f37 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -108,7 +108,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -119,6 +119,7 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; + bool disable_deduplication_for_children; struct State { @@ -219,6 +220,11 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + if (disable_deduplication_for_children) + { + insert_context->setSetting("insert_deduplicate", Field{false}); + } + // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) select_context->setSetting("parallelize_output_from_storages", Field{false}); @@ -330,16 +336,6 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - out.addSource(std::make_shared("Before inner chain", !disable_deduplication_for_children, out.getInputHeader())); - - if (!disable_deduplication_for_children) - { - String addition_part = view_id.hasUUID() ? toString(view_id.uuid) : view_id.getFullNameNotQuoted(); - out.addSource(std::make_shared(fmt::format(":mv-{}", addition_part), out.getInputHeader())); - } - - out.addSource(std::make_shared("Before extend token", !disable_deduplication_for_children, out.getInputHeader())); - if (interpreter.shouldAddSquashingFroStorage(inner_table)) { bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); @@ -351,7 +347,7 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } - out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -394,23 +390,15 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { - out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); - - // if (!disable_deduplication_for_children) - // { - // // out.addSource(std::make_shared(out.getInputHeader())); - // // out.addSource(std::make_shared(out.getInputHeader())); - - // out.addSource(std::make_shared(out.getInputHeader())); - // } + out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data); + storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); } return out; @@ -451,8 +439,6 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - /// If the "root" table deduplicates blocks, there are no need to make deduplication for children - /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks bool disable_deduplication_for_children = false; if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); @@ -563,6 +549,10 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -578,7 +568,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos, bool disable_deduplication_for_children) { const auto & context = views_data.context; @@ -625,9 +615,18 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - //pipeline.addTransform(std::make_shared(pipeline.getHeader())); pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + + if (!disable_deduplication_for_children) + { + String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + else + { + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -720,17 +719,19 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_) + std::shared_ptr views_data_, + bool disable_deduplication_for_children_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) + , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data, chunk.getChunkInfos())); + state.emplace(process(block, view, *views_data, chunk.getChunkInfos(), disable_deduplication_for_children)); } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 6ca4ec6e079..b86845d48e0 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,13 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + new_context, + false, + true, + true, + false); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2d29f87c556..4b0fa94e183 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -90,16 +90,20 @@ void MergeTreeSink::consume(Chunk & chunk) bool support_parallel_write = false; String block_dedup_token; - std::shared_ptr dedub_token_info_for_children = nullptr; + auto token_info = chunk.getChunkInfos().get(); if (storage.getDeduplicationLog()) { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + "DedupTokenBuilder is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info) + if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenBuilder has to be initialized with user token for table: {}", + storage.getStorageID().getNameForLogs()); + + if (token_info->tokenInitialized()) { block_dedup_token = token_info->getToken(); @@ -109,9 +113,6 @@ void MergeTreeSink::consume(Chunk & chunk) } else { - dedub_token_info_for_children = std::make_shared(); - chunk.getChunkInfos().add(dedub_token_info_for_children); - LOG_DEBUG(storage.log, "dedup token from hash is calculated"); } @@ -141,10 +142,10 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (dedub_token_info_for_children) + if (!token_info->tokenInitialized()) { chassert(temp_part.part); - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index e855bb7d969..b03f3f88611 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -294,17 +294,21 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } String block_dedup_token; - std::shared_ptr dedub_token_info_for_children = nullptr; + auto token_info = chunk.getChunkInfos().get(); if constexpr (!async_insert) { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + "DedupTokenBuilder is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenBuilder has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info) + if (token_info->tokenInitialized()) { /// multiple blocks can be inserted within the same insert query /// an ordinal number is added to dedup token to generate a distinctive block id for each block @@ -316,8 +320,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { - dedub_token_info_for_children = std::make_shared(); - chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, "dedup token from hash is calculated"); } @@ -386,10 +388,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (dedub_token_info_for_children) + if (!token_info->tokenInitialized()) { chassert(temp_part.part); - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); } } @@ -444,8 +446,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) /// value for `last_block_is_duplicate`, which is possible only after the part is committed. /// Othervide we can delay commit. /// TODO: we can also delay commit if there is no MVs. - if (!settings.deduplicate_blocks_in_dependent_materialized_views) - finishDelayedChunk(zookeeper); + // if (!settings.deduplicate_blocks_in_dependent_materialized_views) + // finishDelayedChunk(zookeeper); ++num_blocks_processed; } @@ -456,8 +458,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; - last_block_is_duplicate = false; - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -470,8 +470,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; - last_block_is_duplicate = last_block_is_duplicate || deduplicated; - /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index e460804d7f1..7d025361717 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -59,16 +59,6 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); - /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const override - { - /// If MV is responsible for deduplication, block is not considered duplicating. - if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - return false; - - return last_block_is_duplicate; - } - struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -126,7 +116,6 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; - bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 335b55f05c8..ae82b9c0463 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,7 +10,8 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 +2023-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index f206f0d7775..06fe156500d 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -56,6 +56,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference index 9b4738ce805..641735d1bb6 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -121,47 +121,93 @@ OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even count 5 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even count 10 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even -count 1 +count 5 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even -count 1 +count 10 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -555,47 +601,93 @@ OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even count 5 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even count 10 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even -count 1 +count 5 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even -count 1 +count 10 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference index 4411bdecea8..06f30793670 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -121,47 +121,93 @@ OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 10 -EXPECTED_TO_FAIL +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 9 -EXPECTED_TO_FAIL +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined count 47 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined count 45 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined -count 10 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 45 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -197,9 +243,16 @@ Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -250,9 +303,16 @@ Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -303,9 +363,16 @@ Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -356,9 +423,16 @@ Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -527,47 +601,93 @@ OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 10 -EXPECTED_TO_FAIL +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 9 -EXPECTED_TO_FAIL +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined count 47 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined count 45 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined -count 10 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 45 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -603,9 +723,16 @@ Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -656,9 +783,16 @@ Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -709,9 +843,16 @@ Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -762,9 +903,16 @@ Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1055,9 +1203,16 @@ Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1108,9 +1263,16 @@ Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1161,9 +1323,16 @@ Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1214,9 +1383,16 @@ Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1507,9 +1683,16 @@ Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1560,9 +1743,16 @@ Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1613,9 +1803,16 @@ Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1666,9 +1863,16 @@ Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference index a56f7deb744..4d517948a25 100644 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -88,36 +88,70 @@ table_dst count 32 OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 1 +table_src count 8 table_dst count 6 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 +table_src count 8 table_dst count 16 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 2 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 -table_dst count 2 +table_dst count 16 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -143,19 +177,25 @@ OK Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -181,19 +221,25 @@ OK Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -219,19 +265,25 @@ OK Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -257,19 +309,25 @@ OK Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -382,36 +440,70 @@ table_dst count 32 OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 1 +table_src count 8 table_dst count 6 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 +table_src count 8 table_dst count 16 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 2 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 -table_dst count 2 +table_dst count 16 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -437,19 +529,25 @@ OK Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -475,19 +573,25 @@ OK Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -513,19 +617,25 @@ OK Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -551,19 +661,25 @@ OK Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -765,19 +881,25 @@ OK Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -803,19 +925,25 @@ OK Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -841,19 +969,25 @@ OK Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -879,19 +1013,25 @@ OK Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1093,19 +1233,25 @@ OK Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1131,19 +1277,25 @@ OK Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1169,19 +1321,25 @@ OK Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1207,19 +1365,25 @@ OK Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 From 55ff6446b5035588eb6985e6fa2291ca444f0a00 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 7 May 2024 12:20:31 +0200 Subject: [PATCH 027/141] adjust rebase --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 7 +++++-- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 7094578a9cc..c55bcd08573 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -191,7 +191,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) { std::vector to_written = squash(std::move(chunk_)); @@ -217,7 +217,10 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - consume({}); + { + Chunk empty; + consume(empty); + } } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 19ce1e3b83e..be425208357 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -34,7 +34,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; From 7a8f6b120699a9c4baf7a465ed21fa0aa66ddda5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 17 May 2024 16:26:15 +0200 Subject: [PATCH 028/141] fix window view and other tests --- .../Sources/SourceFromSingleChunk.cpp | 4 +- .../Transforms/NumberBlocksTransform.cpp | 22 ++++++++-- .../Transforms/NumberBlocksTransform.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 8 ++-- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 4 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 16 ++++++-- src/Storages/WindowView/StorageWindowView.cpp | 41 +++++++++++++++---- src/Storages/WindowView/StorageWindowView.h | 2 +- .../03035_max_insert_threads_support.sh | 2 +- 11 files changed, 78 insertions(+), 29 deletions(-) diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index fb888c104c4..9abe0504d10 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,7 +5,9 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) +{ +} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 19ebf94a27a..387d1ceb8e0 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -12,6 +12,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DeduplicationToken { @@ -101,9 +107,17 @@ void CheckTokenTransform::transform(Chunk & chunk) void SetInitialTokenTransform::transform(Chunk & chunk) { - auto token_builder = chunk.getChunkInfos().get(); - chassert(token_builder); - if (token_builder->tokenInitialized()) + auto token_info = chunk.getChunkInfos().get(); + + LOG_DEBUG(getLogger("SetInitialTokenTransform"), "has token_info {}", bool(token_info)); + + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); + + chassert(token_info); + if (!token_info || token_info->tokenInitialized()) return; SipHash hash; @@ -111,7 +125,7 @@ void SetInitialTokenTransform::transform(Chunk & chunk) colunm->updateHashFast(hash); const auto hash_value = hash.get128(); - token_builder->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 46b62029c21..6978fe5e6b6 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -166,7 +166,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; private: - size_t block_number; + size_t block_number = 0; }; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 0c1893e0f37..2e6baea7c26 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,8 +24,8 @@ #include #include #include -#include "Processors/Chunk.h" -#include "Processors/Transforms/NumberBlocksTransform.h" +#include +#include #include #include @@ -766,7 +766,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.getColumns()), context); + live_view.writeBlock(getHeader(), chunk, context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -790,7 +790,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.getColumns()), context); + window_view, getHeader(), chunk, context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c3aacfd67d3..f6008347425 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -330,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -363,7 +363,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) if (!is_block_processed) { Pipes pipes; - pipes.emplace_back(std::make_shared(block)); + pipes.emplace_back(std::make_shared(header, chunk.clone())); auto creator = [&](const StorageID & blocks_id_global) { diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 91daac32c7b..fce5bad6240 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & block, ContextPtr context); + void writeBlock(const Block & header, Chunk & chunk, ContextPtr context); void refresh(); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 4b0fa94e183..c252d95a5e9 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -95,12 +95,12 @@ void MergeTreeSink::consume(Chunk & chunk) { if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder is expected for consumed chunk in MergeTreeSink for table: {}", + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder has to be initialized with user token for table: {}", + "TokenInfo has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); if (token_info->tokenInitialized()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b03f3f88611..41fdb86f3bd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -299,12 +299,12 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder has to be initialized with user token for table: {}", + "TokenInfo has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); @@ -1174,8 +1174,16 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - auto zookeeper = storage.getZooKeeper(); - finishDelayedChunk(std::make_shared(zookeeper)); + const auto & settings = context->getSettingsRef(); + + ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + storage.getZooKeeper(), + "ReplicatedMergeTreeSink::onFinish", + log); + + finishDelayedChunk(zookeeper); } template diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e0f3b437af7..b81ca34c427 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1415,22 +1416,25 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & block, ContextPtr local_context) + StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) + if (!window_view.is_proctime && window_view.max_watermark == 0 && chunk.getNumRows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); - const auto & window_column = block.getByName(window_view.timestamp_column_name); + const auto & window_column = header.getByName(window_view.timestamp_column_name); const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); UInt32 first_record_timestamp = window_end_data[0]; window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); + auto chunk_infos = chunk.getChunkInfos(); + chunk.setChunkInfos({}); + + Pipe pipe(std::make_shared(header.cloneEmpty(), std::move(chunk))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1475,10 +1479,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header) + pipe.addSimpleTransform([&](const Block & header_) { return std::make_shared( - header, std::make_shared(filter_expression), + header_, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1533,6 +1537,17 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); + + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(std::move(chunk_infos), stream_header); + }); + + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); + }); + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1546,7 +1561,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 block_max_timestamp = 0; if (window_view.is_watermark_bounded || window_view.allowed_lateness) { - const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; + const auto & timestamp_column = *header.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) block_max_timestamp = std::max(timestamp, block_max_timestamp); @@ -1572,6 +1587,11 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); + }); + auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1588,9 +1608,14 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); + builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Before out", true, stream_header); + }); + builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f79867df424..56a21279b86 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index 1e6bfb414d8..cedb651a430 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o MaterializingTransform | wc -l +" | grep -o StorageFileSink | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH From 4fa59ca49dc7536dbe5b10cbe1f56cd411415aa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 17 May 2024 17:42:18 +0200 Subject: [PATCH 029/141] adjust style --- src/Processors/Transforms/NumberBlocksTransform.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 6978fe5e6b6..610c219dfa2 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -6,11 +6,6 @@ #include -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { class RestoreChunkInfosTransform : public ISimpleTransform From ae124bf0b36958be0f1ef492272cd85664e50eb7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 May 2024 17:07:31 +0200 Subject: [PATCH 030/141] fix tests for liveview windowview --- .../Transforms/NumberBlocksTransform.cpp | 64 +++++++++++-------- .../Transforms/NumberBlocksTransform.h | 37 ++++++----- .../Transforms/buildPushingToViewsChain.cpp | 8 +-- src/Storages/LiveView/StorageLiveView.cpp | 20 +++++- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 38 ++++++++--- src/Storages/WindowView/StorageWindowView.h | 2 +- ...view_and_deduplication_zookeeper.reference | 4 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- 9 files changed, 116 insertions(+), 61 deletions(-) diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 387d1ceb8e0..11054f652ff 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -23,7 +23,7 @@ namespace DeduplicationToken String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const { - chassert(stage == MATERIALIZE_VIEW_ID || !enable_assert); + chassert(stage == VIEW_ID || !enable_assert); String result; result.reserve(getTotalSize()); @@ -38,7 +38,7 @@ void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) { chassert(stage == INITIAL); addTokenPart(std::move(part)); - stage = MATERIALIZE_VIEW_ID; + stage = VIEW_ID; } void TokenInfo::setUserToken(const String & token) @@ -52,21 +52,21 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) { chassert(stage == SOURCE_BLOCK_NUMBER); addTokenPart(fmt::format(":source-number-{}", sbn)); - stage = MATERIALIZE_VIEW_ID; + stage = VIEW_ID; } -void TokenInfo::setMaterializeViewID(const String & id) +void TokenInfo::setViewID(const String & id) { - chassert(stage == MATERIALIZE_VIEW_ID); - addTokenPart(fmt::format(":mv-{}", id)); - stage = MATERIALIZE_VIEW_BLOCK_NUMBER; + chassert(stage == VIEW_ID); + addTokenPart(fmt::format(":view-id-{}", id)); + stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setMaterializeViewBlockNumber(size_t mvbn) +void TokenInfo::setViewBlockNumber(size_t mvbn) { - chassert(stage == MATERIALIZE_VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format(":mv-bn-{}", mvbn)); - stage = MATERIALIZE_VIEW_ID; + chassert(stage == VIEW_BLOCK_NUMBER); + addTokenPart(fmt::format(":view-block-{}", mvbn)); + stage = VIEW_ID; } void TokenInfo::reset() @@ -116,8 +116,7 @@ void SetInitialTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); - chassert(token_info); - if (!token_info || token_info->tokenInitialized()) + if (token_info->tokenInitialized()) return; SipHash hash; @@ -131,39 +130,52 @@ void SetInitialTokenTransform::transform(Chunk & chunk) void SetUserTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(!token_info->tokenInitialized()); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); token_info->setUserToken(user_token); } void SetSourceBlockNumberTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(!token_info->tokenInitialized()); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); token_info->setSourceBlockNumber(block_number++); } -void SetMaterializeViewIDTransform::transform(Chunk & chunk) +void SetViewIDTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(token_info->tokenInitialized()); - token_info->setMaterializeViewID(mv_id); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewIDTransform"); + token_info->setViewID(view_id); } -void SetMaterializeViewBlockNumberTransform::transform(Chunk & chunk) +void SetViewBlockNumberTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(token_info->tokenInitialized()); - token_info->setMaterializeViewBlockNumber(block_number++); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); + token_info->setViewBlockNumber(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ResetTokenTransform"); + + LOG_DEBUG(getLogger("ResetTokenTransform"), "token_info was {}", token_info->getToken(false)); token_info->reset(); } diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 610c219dfa2..b4f61eb887c 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -13,14 +13,21 @@ namespace DB public: RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) : ISimpleTransform(header_, header_, true) - , chunk_infos(chunk_infos_) + , chunk_infos(std::move(chunk_infos_)) { + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "create RestoreChunkInfosTransform to append {}:{}", + chunk_infos.size(), chunk_infos.debug()); } String getName() const override { return "RestoreChunkInfosTransform"; } void transform(Chunk & chunk) override { + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + chunk_infos.size(), chunk_infos.debug(), + chunk.getNumRows()); + chunk.getChunkInfos().append(chunk_infos.clone()); } @@ -45,8 +52,8 @@ namespace DeduplicationToken void setInitialToken(String part); void setUserToken(const String & token); void setSourceBlockNumber(size_t sbn); - void setMaterializeViewID(const String & id); - void setMaterializeViewBlockNumber(size_t mvbn); + void setViewID(const String & id); + void setViewBlockNumber(size_t mvbn); void reset(); private: @@ -57,8 +64,8 @@ namespace DeduplicationToken { INITIAL, SOURCE_BLOCK_NUMBER, - MATERIALIZE_VIEW_ID, - MATERIALIZE_VIEW_BLOCK_NUMBER, + VIEW_ID, + VIEW_BLOCK_NUMBER, }; BuildingStage stage = INITIAL; @@ -71,7 +78,7 @@ namespace DeduplicationToken public: CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) : ISimpleTransform(header_, header_, true) - , debug(debug_) + , debug(std::move(debug_)) , must_be_present(must_be_present_) { } @@ -165,38 +172,38 @@ namespace DeduplicationToken }; - class SetMaterializeViewIDTransform : public ISimpleTransform + class SetViewIDTransform : public ISimpleTransform { public: - SetMaterializeViewIDTransform(String mv_id_, const Block & header_) + SetViewIDTransform(String view_id_, const Block & header_) : ISimpleTransform(header_, header_, true) - , mv_id(std::move(mv_id_)) + , view_id(std::move(view_id_)) { } - String getName() const override { return "DeduplicationToken::SetMaterializeViewIDTransform"; } + String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } void transform(Chunk & chunk) override; private: - String mv_id; + String view_id; }; - class SetMaterializeViewBlockNumberTransform : public ISimpleTransform + class SetViewBlockNumberTransform : public ISimpleTransform { public: - explicit SetMaterializeViewBlockNumberTransform(const Block & header_) + explicit SetViewBlockNumberTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } - String getName() const override { return "DeduplicationToken::SetMaterializeViewBlockNumberTransform"; } + String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } void transform(Chunk & chunk) override; private: - size_t block_number; + size_t block_number = 0; }; } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 2e6baea7c26..47ac1f3baed 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -620,8 +620,8 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat if (!disable_deduplication_for_children) { String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); } else { @@ -766,7 +766,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader(), chunk, context); + live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -790,7 +790,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader(), chunk, context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index f6008347425..b9d29a90f56 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -27,6 +27,7 @@ limitations under the License. */ #include #include #include +#include "Processors/Transforms/NumberBlocksTransform.h" #include #include @@ -330,7 +331,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr local_context) +void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -363,7 +364,7 @@ void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr if (!is_block_processed) { Pipes pipes; - pipes.emplace_back(std::make_shared(header, chunk.clone())); + pipes.emplace_back(std::make_shared(block)); auto creator = [&](const StorageID & blocks_id_global) { @@ -407,6 +408,21 @@ void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr builder = interpreter.buildQueryPipeline(); } + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(chunk_infos.clone(), cur_header); + }); + + String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(live_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fce5bad6240..12d8e898347 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & header, Chunk & chunk, ContextPtr context); + void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); void refresh(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b81ca34c427..738de4b07ed 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1416,25 +1416,27 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr local_context) + StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: rows {}, infos {} with {}, window column {}", + block.rows(), + chunk_infos.size(), chunk_infos.debug(), + window_view.timestamp_column_name); + window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (!window_view.is_proctime && window_view.max_watermark == 0 && chunk.getNumRows() > 0) + if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); - const auto & window_column = header.getByName(window_view.timestamp_column_name); + const auto & window_column = block.getByName(window_view.timestamp_column_name); const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); UInt32 first_record_timestamp = window_end_data[0]; window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - auto chunk_infos = chunk.getChunkInfos(); - chunk.setChunkInfos({}); - - Pipe pipe(std::make_shared(header.cloneEmpty(), std::move(chunk))); + Pipe pipe(std::make_shared(block)); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1465,6 +1467,9 @@ void StorageWindowView::writeIntoWindowView( lateness_bound = t_max_fired_watermark; } + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: lateness_bound {}, window_view.is_proctime {}", + lateness_bound, window_view.is_proctime); + if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { auto filter_function = makeASTFunction( @@ -1540,7 +1545,18 @@ void StorageWindowView::writeIntoWindowView( builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared(std::move(chunk_infos), stream_header); + // Can't move chunk_infos here, that function could be called several times + return std::make_shared(chunk_infos.clone(), stream_header); + }); + + String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(window_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); }); builder.addSimpleTransform([&](const Block & stream_header) @@ -1548,6 +1564,7 @@ void StorageWindowView::writeIntoWindowView( return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); }); + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1561,7 +1578,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 block_max_timestamp = 0; if (window_view.is_watermark_bounded || window_view.allowed_lateness) { - const auto & timestamp_column = *header.getByName(window_view.timestamp_column_name).column; + const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) block_max_timestamp = std::max(timestamp, block_max_timestamp); @@ -1569,6 +1586,9 @@ void StorageWindowView::writeIntoWindowView( if (block_max_timestamp) window_view.updateMaxTimestamp(block_max_timestamp); + + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: block_max_timestamp {}", + block_max_timestamp); } UInt32 lateness_upper_bound = 0; diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 56a21279b86..14ac65091d3 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 741591b0dd4..9c9281dc7e4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,8 +1,8 @@ 2 3 -2 -2 +3 +3 1 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 0a41581025a..51e6a513608 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert is deduplicated even for MV without_deduplication_mv +-- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; From 7fe4e675707c1e27edf2a06f3779768a483e6c21 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 May 2024 19:02:50 +0200 Subject: [PATCH 031/141] accept test 02457_insert_select_progress_http --- src/Interpreters/InterpreterInsertQuery.cpp | 56 +++++++++---------- src/Interpreters/SquashingTransform.cpp | 37 ++++++------ src/Interpreters/SquashingTransform.h | 7 +-- .../Transforms/SquashingChunksTransform.cpp | 13 +++-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 --- 5 files changed, 54 insertions(+), 66 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 0f3df3752cb..339f68258dc 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -545,6 +545,34 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & } } + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + pipeline.resize(1); if (shouldAddSquashingFroStorage(table)) @@ -595,34 +623,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.resize(presink_chains.size()); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - for (auto & chain : presink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(presink_chains)); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index cf4f2060414..8a902add9a5 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,5 +1,6 @@ #include +#include namespace DB { @@ -16,23 +17,6 @@ SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_b } SquashingTransform::SquashResult SquashingTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -SquashingTransform::SquashResult SquashingTransform::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) @@ -66,7 +50,7 @@ SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input return SquashResult{std::move(to_return), true}; } - append(std::move(input_block)); + append(std::move(input_block)); if (isEnoughSize(accumulated_block)) { Block to_return; @@ -79,8 +63,7 @@ SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input } -template -void SquashingTransform::append(ReferenceType input_block) +void SquashingTransform::append(Block && input_block) { if (!accumulated_block) { @@ -88,6 +71,11 @@ void SquashingTransform::append(ReferenceType input_block) return; } + LOG_DEBUG(getLogger("SquashingTransform"), + "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", + input_block.rows(), input_block.bytes(), input_block.columns(), + accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); + assert(blocksHaveEqualStructure(input_block, accumulated_block)); try @@ -96,6 +84,15 @@ void SquashingTransform::append(ReferenceType input_block) { const auto source_column = input_block.getByPosition(i).column; + const auto acc_column = accumulated_block.getByPosition(i).column; + + LOG_DEBUG(getLogger("SquashingTransform"), + "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", + i, source_column->getName(), + acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), + source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index f1eba537338..fff55a760db 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -34,7 +34,6 @@ public: * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. */ SquashResult add(Block && block); - SquashResult add(const Block & block); private: size_t min_block_size_rows; @@ -42,11 +41,7 @@ private: Block accumulated_block; - template - SquashResult addImpl(ReferenceType block); - - template - void append(ReferenceType block); + void append(Block && block); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 1a29b8d8a2d..ea0d63a2ed7 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -21,7 +21,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) "onConsume {}", chunk.getNumRows()); if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -33,7 +33,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) if (cur_chunkinfos.empty() && result.input_block_delayed) { - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); } } @@ -79,12 +79,15 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform {}, finished {}", chunk.getNumRows(), finished); + "transform rows {}, size {}, columns {}, infos: {}/{}, finished {}", + chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + finished); if (!finished) { if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -96,7 +99,7 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (cur_chunkinfos.empty() && result.input_block_delayed) { - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); } } else diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 41fdb86f3bd..11c64c97cb7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -442,13 +442,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); - /// If deduplicated data should not be inserted into MV, we need to set proper - /// value for `last_block_is_duplicate`, which is possible only after the part is committed. - /// Othervide we can delay commit. - /// TODO: we can also delay commit if there is no MVs. - // if (!settings.deduplicate_blocks_in_dependent_materialized_views) - // finishDelayedChunk(zookeeper); - ++num_blocks_processed; } From ee3385fbc00151427a209398a881f882aef6512b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 28 May 2024 18:56:41 +0200 Subject: [PATCH 032/141] adjust after merge with master --- src/Storages/ObjectStorage/StorageObjectStorageSink.cpp | 4 ++-- src/Storages/ObjectStorage/StorageObjectStorageSink.h | 2 +- src/Storages/StorageAzureBlob.cpp | 2 +- tests/integration/helpers/s3_mocks/broken_s3.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 0a3cf19a590..9718b329414 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk chunk) +void StorageObjectStorageSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 45cf83d606f..1ec52889f0a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 5dc407bf86d..a1ce991b5c9 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 7d0127bc1c4..a8d407e8d79 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -231,7 +231,7 @@ class _ServerRuntime: class BrokenPipeAction: def inject_error(self, request_handler): # partial read - self.rfile.read(50) + request_handler.rfile.read(50) time.sleep(1) request_handler.connection.setsockopt( From 1b7db4195c1f0e5be62ab7a3784deebc2481f666 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 02:18:50 +0200 Subject: [PATCH 033/141] work with tests --- src/Interpreters/InterpreterInsertQuery.cpp | 13 +- src/Interpreters/SquashingTransform.cpp | 8 +- src/Processors/ISimpleTransform.h | 2 - .../Transforms/CountingTransform.cpp | 3 +- .../Transforms/ExpressionTransform.cpp | 3 - .../Transforms/MaterializingTransform.cpp | 3 - .../Transforms/SquashingChunksTransform.cpp | 52 +- .../Transforms/buildPushingToViewsChain.cpp | 6 +- src/Storages/MergeTree/MergeTreeSink.cpp | 5 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 6 +- src/Storages/StorageAzureBlob.cpp | 1638 ------------ src/Storages/StorageS3.cpp | 2310 ----------------- .../0_stateless/01275_parallel_mv.reference | 4 +- ...01927_query_views_log_current_database.sql | 1 + ...ication_token_materialized_views.reference | 14 +- ...deduplication_token_materialized_views.sql | 8 +- .../0_stateless/02125_query_views_log.sql | 2 +- 17 files changed, 82 insertions(+), 3996 deletions(-) delete mode 100644 src/Storages/StorageAzureBlob.cpp delete mode 100644 src/Storages/StorageS3.cpp diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index cd68cbc41c0..249c69b51b9 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -574,6 +574,8 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return counting; }); + size_t num_select_threads = pipeline.getNumThreads(); + pipeline.resize(1); if (shouldAddSquashingFroStorage(table)) @@ -616,8 +618,18 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & /// Otherwise ResizeProcessor them down to 1 stream. size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + if (!settings.parallel_view_processing) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + if (table->isView() || !views.empty()) + sink_streams_size = 1; + } + auto [presink_chains, sink_chains] = buildPreAndSyncChains( presink_streams_size, sink_streams_size, table, metadata_snapshot, query_sample_block); @@ -636,7 +648,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (!settings.parallel_view_processing) { - size_t num_select_threads = pipeline.getNumThreads(); /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. if (pipeline.getNumThreads() > num_select_threads) pipeline.setMaxThreads(num_select_threads); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 8a902add9a5..30c801aaaff 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -82,9 +83,8 @@ void SquashingTransform::append(Block && input_block) { for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) { - const auto source_column = input_block.getByPosition(i).column; - - const auto acc_column = accumulated_block.getByPosition(i).column; + const auto source_column = std::move(input_block.getByPosition(i).column); + auto acc_column = std::move(accumulated_block.getByPosition(i).column); LOG_DEBUG(getLogger("SquashingTransform"), "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", @@ -93,7 +93,7 @@ void SquashingTransform::append(Block && input_block) source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + auto mutable_column = IColumn::mutate(std::move(acc_column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); } diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index 3862ea76dbb..a47e0e49121 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -31,8 +31,6 @@ protected: virtual void transform(Chunk & input_chunk, Chunk & output_chunk) { - LOG_DEBUG(getLogger("ISimpleTransform"), - "transform {}", input_chunk.getNumRows()); transform(input_chunk); output_chunk.swap(input_chunk); } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 7329a196f8a..c138eed69de 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "IO/Progress.h" namespace ProfileEvents @@ -18,7 +19,7 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { LOG_DEBUG(getLogger("CountingTransform"), - "onConsume {}", chunk.getNumRows()); + "onConsume rows {} bytes {}, progress rows {} bytes {}", chunk.getNumRows(), chunk.bytes(), progress.written_rows, progress.written_bytes); if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index db5d2b0c49c..73d41828bc0 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -21,9 +21,6 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { - LOG_DEBUG(getLogger("ExpressionTransform"), - "transform {}", chunk.getNumRows()); - size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 8366472f876..4a7f5187c75 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -12,9 +12,6 @@ MaterializingTransform::MaterializingTransform(const Block & header) void MaterializingTransform::transform(Chunk & chunk) { - LOG_DEBUG(getLogger("MaterializingTransform"), - "transform {}", chunk.getNumRows()); - auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 22171d97b6e..2ee13c05b95 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -20,20 +20,32 @@ void SquashingChunksTransform::onConsume(Chunk chunk) LOG_DEBUG(getLogger("SquashingChunksTransform"), "onConsume {}", chunk.getNumRows()); - if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos().clone(); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); + if (result.block) { cur_chunk.setColumns(result.block.getColumns(), result.block.rows()); - cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); - cur_chunkinfos = {}; - } + if (result.input_block_delayed) + { + cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = std::move(chunk.getChunkInfos()); + } + else + { + cur_chunk.setChunkInfos(chunk.getChunkInfos()); + cur_chunkinfos = {}; + } - if (cur_chunkinfos.empty() && result.input_block_delayed) + LOG_DEBUG(getLogger("SquashingChunksTransform"), + "got result rows {}, size {}, columns {}, infos: {}/{}", + cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), + cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); + } + else { - cur_chunkinfos = chunk.getChunkInfos().clone(); + assert(!result.input_block_delayed); + cur_chunkinfos = std::move(chunk.getChunkInfos()); } } @@ -85,15 +97,29 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(result.block.getColumns(), result.block.rows()); - if (result.input_block_delayed) + if (result.block) { - squashed_chunk.setChunkInfos(std::move(squashed_info)); - squashed_info = std::move(chunk.getChunkInfos()); + squashed_chunk.setColumns(result.block.getColumns(), result.block.rows()); + if (result.input_block_delayed) + { + squashed_chunk.setChunkInfos(std::move(squashed_info)); + squashed_info = std::move(chunk.getChunkInfos()); + } + else + { + squashed_chunk.setChunkInfos(chunk.getChunkInfos()); + squashed_info = {}; + } + + LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + "got result rows {}, size {}, columns {}, infos: {}/{}", + squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), + squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { - squashed_chunk.setChunkInfos(std::move(chunk.getChunkInfos())); + assert(!result.input_block_delayed); + squashed_info = std::move(chunk.getChunkInfos()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 72897a06c44..d44796610ed 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "Core/Field.h" #include #include @@ -223,6 +224,7 @@ std::optional generateViewChain( if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); + insert_context->setSetting("insert_deduplication_token", Field{""}); } // Processing of blocks for MVs is done block by block, and there will @@ -731,8 +733,8 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data, chunk.getChunkInfos(), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index c252d95a5e9..0953cdc5d72 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -100,8 +100,9 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}", - storage.getStorageID().getNameForLogs()); + "TokenInfo has to be initialized with user token for table: {}, user dedup token {}", + storage.getStorageID().getNameForLogs(), + context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 11c64c97cb7..62d30764ca8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,9 +304,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}", - storage.getStorageID().getNameForLogs()); - + "TokenInfo has to be initialized with user token for table: {} user dedup token {}", + storage.getStorageID().getNameForLogs(), + context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp deleted file mode 100644 index a1ce991b5c9..00000000000 --- a/src/Storages/StorageAzureBlob.cpp +++ /dev/null @@ -1,1638 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -using namespace Azure::Storage::Blobs; - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -namespace -{ - -const std::unordered_set required_configuration_keys = { - "blob_path", - "container", -}; - -const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "structure", - "compression_method", - "account_name", - "account_key", - "connection_string", - "storage_account_url", -}; - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); - configuration.blob_path = collection.get("blob_path"); - - if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); - - if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); - - configuration.structure = collection.getOrDefault("structure", "auto"); - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); -} - - -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) -{ - StorageAzureBlob::Configuration configuration; - - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; - } - - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage AzureBlobStorage requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool - { - return s == "auto" || FormatFactory::instance().exists(s); - }; - - if (engine_args.size() == 4) - { - //'c1 UInt64, c2 UInt64 - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; -} - - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - -void registerStorageAzureBlob(StorageFactory & factory) -{ - factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - auto settings = StorageAzureBlob::createSettings(args.getContext()); - - return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing */ false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::AZURE, - }); -} - -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - -bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -StorageAzureBlob::StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , name("AzureBlobStorage") - , configuration(configuration_) - , object_storage(std::move(object_storage_)) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context); - else - columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second; - - /// We don't allow special columns in File storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); -} - -void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) -{ - if (configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", - configuration.blob_path); - } - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - object_storage->removeObjectsIfExist(objects); -} - -namespace -{ - -class StorageAzureBlobSink : public SinkToStorage -{ -public: - StorageAzureBlobSink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - AzureObjectStorage * object_storage, - const String & blob_path) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - StoredObject object(blob_path); - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - object_storage->writeObject(object, WriteMode::Rewrite), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageAzureBlobSink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, AzureObjectStorage * object_storage, const String & path, size_t sequence_number) - { - if (context->getSettingsRef().azure_truncate_on_insert || !object_storage->exists(StoredObject(path))) - return std::nullopt; - - if (context->getSettingsRef().azure_create_new_file_on_insert) - { - auto pos = path.find_first_of('.'); - String new_path; - do - { - new_path = path.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : path.substr(pos)); - ++sequence_number; - } - while (object_storage->exists(StoredObject(new_path))); - - return new_path; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object with key {} already exists. " - "If you want to overwrite it, enable setting azure_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting azure_create_new_file_on_insert", - path); - } -} - -class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageAzureBlobSink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - AzureObjectStorage * object_storage_, - const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , object_storage(object_storage_) - , blob(blob_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_key = replaceWildcards(blob, partition_id); - validateKey(partition_key); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(getContext(), object_storage, partition_key, 1)) - partition_key = *new_path; - - return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - object_storage, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - AzureObjectStorage * object_storage; - const String blob; - const std::optional format_settings; - - ExpressionActionsPtr partition_by_expr; - - static void validateKey(const String & str) - { - validatePartitionKey(str, true); - } -}; - -} - -class ReadFromAzureBlob : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromAzureBlob"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromAzureBlob( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - ReadFromFormatInfo info_, - const bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , storage(std::move(storage_)) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - std::shared_ptr storage; - ReadFromFormatInfo info; - const bool need_only_count; - - size_t max_block_size; - const size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageAzureBlob::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - if (partition_by && configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet"); - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - std::move(this_ptr), - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - const auto & configuration = storage->configuration; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared(context, - context->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } - else - { - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - const auto & configuration = storage->configuration; - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - configuration.format, - getName(), - context, - storage->format_settings, - max_block_size, - configuration.compression_method, - storage->object_storage.get(), - configuration.container, - configuration.connection_url, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - if (configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); - - auto path = configuration.blobs_paths.front(); - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(path, configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } - else - { - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(local_context, object_storage.get(), path, configuration.blobs_paths.size())) - { - configuration.blobs_paths.push_back(*new_path); - path = *new_path; - } - - return std::make_shared( - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } -} - -bool StorageAzureBlob::supportsPartitionBy() const -{ - return true; -} - -bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageAzureBlob::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -StorageAzureBlobSource::GlobIterator::GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , blob_path_with_globs(blob_path_with_globs_) - , virtual_columns(virtual_columns_) - , outer_blobs(outer_blobs_) - , file_progress_callback(file_progress_callback_) -{ - - const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs.size()) - { - auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs); - blobs_with_metadata.emplace_back( - blob_path_with_globs, - object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - is_finished = true; - return; - } - - object_storage_iterator = object_storage->iterate(key_prefix); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(blob_path_with_globs)); - - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error()); - - recursive = blob_path_with_globs == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); -} - -RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() -{ - std::lock_guard lock(next_mutex); - - if (is_finished && index >= blobs_with_metadata.size()) - { - return {}; - } - - bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); - - if (need_new_batch) - { - RelativePathsWithMetadata new_batch; - while (new_batch.empty()) - { - auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); - if (result.has_value()) - { - new_batch = result.value(); - } - else - { - is_finished = true; - return {}; - } - - for (auto it = new_batch.begin(); it != new_batch.end();) - { - if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher)) - it = new_batch.erase(it); - else - ++it; - } - } - - index = 0; - - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (auto & path_with_metadata : new_batch) - paths.push_back(fs::path(container) / path_with_metadata.relative_path); - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - } - - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - - blobs_with_metadata = std::move(new_batch); - if (file_progress_callback) - { - for (const auto & [relative_path, info] : blobs_with_metadata) - { - file_progress_callback(FileProgress(0, info.size_bytes)); - } - } - } - - size_t current_index = index++; - if (current_index >= blobs_with_metadata.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return blobs_with_metadata[current_index]; -} - -StorageAzureBlobSource::KeysIterator::KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , virtual_columns(virtual_columns_) -{ - Strings all_keys = keys_; - - ASTPtr filter_ast; - if (!all_keys.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - Strings paths; - paths.reserve(all_keys.size()); - for (const auto & key : all_keys) - paths.push_back(fs::path(container) / key); - - VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext()); - } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - keys.emplace_back(key, object_metadata); - } - - if (outer_blobs) - *outer_blobs = keys; -} - -RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - return keys[current_index]; -} - -Chunk StorageAzureBlobSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - fs::path(container) / reader.getRelativePath(), - reader.getRelativePathWithMetadata().metadata.size_bytes); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(connection_url) / container / path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) -{ - String source = fs::path(connection_url) / container / path_with_metadata.relative_path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = path_with_metadata.metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - return std::nullopt; - }; - - return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -StorageAzureBlobSource::StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , format(format_) - , name(std::move(name_)) - , sample_block(info.format_header) - , format_settings(format_settings_) - , columns_desc(info.columns_description) - , max_block_size(max_block_size_) - , compression_hint(compression_hint_) - , object_storage(std::move(object_storage_)) - , container(container_) - , connection_url(connection_url_) - , file_iterator(file_iterator_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "AzureReader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - - -StorageAzureBlobSource::~StorageAzureBlobSource() -{ - create_reader_pool.wait(); -} - -String StorageAzureBlobSource::getName() const -{ - return name; -} - -StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() -{ - auto path_with_metadata = file_iterator->next(); - if (path_with_metadata.relative_path.empty()) - return {}; - - if (path_with_metadata.metadata.size_bytes == 0) - path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); - read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, max_parsing_threads, std::nullopt, - /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageAzureBlobSource::createReaderAsync() -{ - return create_reader_scheduler([this] { return createReader(); }, Priority{}); -} - -std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size); - return createAsyncAzureReadBuffer(key, read_settings, object_size); - } - - return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::shared_ptr & file_iterator_, - AzureObjectStorage * object_storage_, - std::optional format_, - const StorageAzureBlob::Configuration & configuration_, - const std::optional & format_settings_, - const RelativePathsWithMetadata & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , object_storage(object_storage_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key : read_keys) - { - if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path)) - { - format = format_from_path; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - current_path_with_metadata = file_iterator->next(); - - if (current_path_with_metadata.relative_path.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually"); - } - - return {nullptr, std::nullopt, format}; - } - - first = false; - - /// AzureBlobStorage file iterator could get new keys after new iteration. - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path)) - { - format = format_from_file_name; - break; - } - } - } - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - RelativePathsWithMetadata paths = {current_path_with_metadata}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache, format}; - } - - first = false; - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return {wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt, format}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override { return current_path_with_metadata.relative_path; } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max); - } - - private: - std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_azure) - return std::nullopt; - - auto & schema_cache = StorageAzureBlob::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] -> std::optional - { - if (it->metadata.last_modified) - return it->metadata.last_modified->epochTime(); - return std::nullopt; - }; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - String source = host_and_bucket + '/' + it->relative_path; - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - AzureObjectStorage * object_storage; - const StorageAzureBlob::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - const RelativePathsWithMetadata & read_keys; - size_t prev_read_keys_size; - RelativePathWithMetadata current_path_with_metadata; - bool first = true; - }; -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - RelativePathsWithMetadata read_keys; - std::shared_ptr file_iterator; - if (configuration.withGlobs()) - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - else - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx); -} - -ColumnsDescription StorageAzureBlob::getTableStructureFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first; -} - -SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - - -std::unique_ptr StorageAzureBlobSource::createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto modified_settings{read_settings}; - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -} - -#endif diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp deleted file mode 100644 index 7975b42ac02..00000000000 --- a/src/Storages/StorageS3.cpp +++ /dev/null @@ -1,2310 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "Common/logger_useful.h" -#include "IO/CompressionMethod.h" -#include "IO/ReadBuffer.h" -#include "Interpreters/Context_fwd.h" -#include "Storages/MergeTree/ReplicatedMergeTreePartHeader.h" - -#if USE_AWS_S3 - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#include -#pragma clang diagnostic pop - -namespace fs = std::filesystem; - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; - extern const Metric StorageS3ThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -static const std::unordered_set required_configuration_keys = { - "url", -}; -static const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "compression_method", - "structure", - "access_key_id", - "secret_access_key", - "session_token", - "filename", - "use_environment_credentials", - "max_single_read_retries", - "min_upload_part_size", - "upload_part_size_multiply_factor", - "upload_part_size_multiply_parts_count_threshold", - "max_single_part_upload_size", - "max_connections", - "expiration_window_seconds", - "no_sign_request" -}; - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_TEXT; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int S3_ERROR; - extern const int UNEXPECTED_EXPRESSION; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int FILE_DOESNT_EXIST; - extern const int NO_ELEMENTS_IN_CONFIG; -} - - -class ReadFromStorageS3Step : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromStorageS3Step"; } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromStorageS3Step( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - StorageS3 & storage_, - ReadFromFormatInfo read_from_format_info_, - bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , column_names(column_names_) - , storage(storage_) - , read_from_format_info(std::move(read_from_format_info_)) - , need_only_count(need_only_count_) - , query_configuration(storage.getConfigurationCopy()) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - query_configuration.update(context); - virtual_columns = storage.getVirtualsList(); - } - -private: - Names column_names; - StorageS3 & storage; - ReadFromFormatInfo read_from_format_info; - bool need_only_count; - StorageS3::Configuration query_configuration; - NamesAndTypesList virtual_columns; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - - -class IOutputFormat; -using OutputFormatPtr = std::shared_ptr; - -class StorageS3Source::DisclosedGlobIterator::Impl : WithContext -{ -public: - Impl( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate_, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : WithContext(context_) - , client(client_.clone()) - , globbed_uri(globbed_uri_) - , predicate(predicate_) - , virtual_columns(virtual_columns_) - , read_keys(read_keys_) - , request_settings(request_settings_) - , list_objects_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) - , file_progress_callback(file_progress_callback_) - { - if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); - - expanded_keys = expandSelectionGlob(globbed_uri.key); - expanded_keys_iter = expanded_keys.begin(); - - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - } - - KeyWithInfoPtr next(size_t) - { - std::lock_guard lock(mutex); - return nextAssumeLocked(); - } - - size_t objectsCount() - { - return buffer.size(); - } - - bool hasMore() - { - if (buffer.empty()) - return !(expanded_keys_iter == expanded_keys.end() && is_finished_for_key); - else - return true; - } - - ~Impl() - { - list_objects_pool.wait(); - } - -private: - using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; - - void fillBufferForKey(const std::string & uri_key) - { - is_finished_for_key = false; - const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == uri_key.size()) - { - buffer.clear(); - buffer.emplace_back(std::make_shared(uri_key, std::nullopt)); - buffer_iter = buffer.begin(); - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - is_finished_for_key = true; - return; - } - - request = {}; - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - request.SetMaxKeys(static_cast(request_settings.list_object_keys_size)); - - outcome_future = listObjectsAsync(); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(uri_key)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", uri_key, matcher->error()); - - recursive = globbed_uri.key == "/**"; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - fillInternalBufferAssumeLocked(); - } - - KeyWithInfoPtr nextAssumeLocked() - { - do - { - if (buffer_iter != buffer.end()) - { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) - { - try - { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - } - catch (...) - { - /// if no such file AND there was no `{}` glob -- this is an exception - /// otherwise ignore it, this is acceptable - if (expanded_keys.size() == 1) - throw; - continue; - } - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); - } - - return answer; - } - - if (is_finished_for_key) - { - if (expanded_keys_iter != expanded_keys.end()) - { - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - continue; - } - else - return {}; - } - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished_for_key = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - } while (true); - } - - void fillInternalBufferAssumeLocked() - { - buffer.clear(); - assert(outcome_future.valid()); - auto outcome = outcome_future.get(); - - if (!outcome.IsSuccess()) - { - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - const auto & result_batch = outcome.GetResult().GetContents(); - - /// It returns false when all objects were returned - is_finished_for_key = !outcome.GetResult().GetIsTruncated(); - - if (!is_finished_for_key) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - list_objects_pool.wait(); - outcome_future = listObjectsAsync(); - } - - if (request_settings.throw_on_zero_files_match && result_batch.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix()); - - KeysWithInfo temp_buffer; - temp_buffer.reserve(result_batch.size()); - - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - S3::ObjectInfo info = - { - .size = size_t(row.GetSize()), - .last_modification_time = row.GetLastModified().Millis() / 1000, - }; - - temp_buffer.emplace_back(std::make_shared(std::move(key), std::move(info))); - } - } - - if (temp_buffer.empty()) - { - buffer_iter = buffer.begin(); - return; - } - - if (filter_dag) - { - std::vector paths; - paths.reserve(temp_buffer.size()); - for (const auto & key_with_info : temp_buffer) - paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key); - - VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext()); - } - - buffer = std::move(temp_buffer); - - if (file_progress_callback) - { - for (const auto & key_with_info : buffer) - file_progress_callback(FileProgress(0, key_with_info->info->size)); - } - - /// Set iterator only after the whole batch is processed - buffer_iter = buffer.begin(); - - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - } - - std::future listObjectsAsync() - { - return list_objects_scheduler([this] - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); - - /// Outcome failure will be handled on the caller side. - if (outcome.IsSuccess()) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return outcome; - }, Priority{}); - } - - std::mutex mutex; - - KeysWithInfo buffer; - KeysWithInfo::iterator buffer_iter; - - std::vector expanded_keys; - std::vector::iterator expanded_keys_iter; - - std::unique_ptr client; - S3::URI globbed_uri; - const ActionsDAG::Node * predicate; - ASTPtr query; - NamesAndTypesList virtual_columns; - ActionsDAGPtr filter_dag; - std::unique_ptr matcher; - bool recursive{false}; - bool is_finished_for_key{false}; - KeysWithInfo * read_keys; - - S3::ListObjectsV2Request request; - S3Settings::RequestSettings request_settings; - - ThreadPool list_objects_pool; - ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; - std::future outcome_future; - std::function file_progress_callback; -}; - -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() -{ - if (pimpl->hasMore()) - { - /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); - /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do - /// as it would lead to serious slow down of the execution, since objects are going - /// to be fetched sequentially rather than in-parallel with up to times. - return std::numeric_limits::max(); - } - else - return pimpl->objectsCount(); -} - -class StorageS3Source::KeysIterator::Impl -{ -public: - explicit Impl( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys_, - std::function file_progress_callback_) - : keys(keys_) - , client(client_.clone()) - , version_id(version_id_) - , bucket(bucket_) - , request_settings(request_settings_) - , file_progress_callback(file_progress_callback_) - { - if (read_keys_) - { - for (const auto & key : keys) - read_keys_->push_back(std::make_shared(key)); - } - } - - KeyWithInfoPtr next(size_t) - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - auto key = keys[current_index]; - std::optional info; - if (file_progress_callback) - { - info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); - file_progress_callback(FileProgress(0, info->size)); - } - - return std::make_shared(key, info); - } - - size_t objectsCount() - { - return keys.size(); - } - -private: - Strings keys; - std::atomic_size_t index = 0; - std::unique_ptr client; - String version_id; - String bucket; - S3Settings::RequestSettings request_settings; - std::function file_progress_callback; -}; - -StorageS3Source::KeysIterator::KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, read_keys, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::KeysIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -StorageS3Source::ReadTaskIterator::ReadTaskIterator( - const DB::ReadTaskCallback & callback_, - size_t max_threads_count) - : callback(callback_) -{ - ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "S3ReadTaskItr"); - - std::vector> keys; - keys.reserve(max_threads_count); - for (size_t i = 0; i < max_threads_count; ++i) - keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); - - pool.wait(); - buffer.reserve(max_threads_count); - for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get())); -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= buffer.size()) - return std::make_shared(callback()); - - while (current_index < buffer.size()) - { - if (const auto & key_info = buffer[current_index]; key_info && !key_info->key.empty()) - return buffer[current_index]; - - current_index = index.fetch_add(1, std::memory_order_relaxed); - } - - return nullptr; -} - -size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() -{ - return buffer.size(); -} - - -StorageS3Source::ArchiveIterator::ArchiveIterator( - std::unique_ptr basic_iterator_, - const std::string & archive_pattern_, - std::shared_ptr client_, - const String & bucket_, - const String & version_id_, - const S3Settings::RequestSettings & request_settings_, - ContextPtr context_, - KeysWithInfo * read_keys_) - : WithContext(context_) - , basic_iterator(std::move(basic_iterator_)) - , basic_key_with_info_ptr(nullptr) - , client(client_) - , bucket(bucket_) - , version_id(version_id_) - , request_settings(request_settings_) - , read_keys(read_keys_) -{ - if (archive_pattern_.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern_)); - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", archive_pattern_, matcher->error()); - filter = IArchiveReader::NameFilter{[matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }}; - } - else - { - path_in_archive = archive_pattern_; - } -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ArchiveIterator::next(size_t) -{ - if (!path_in_archive.empty()) - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - bool file_exists = archive_reader->fileExists(path_in_archive); - if (file_exists) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, path_in_archive, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } - else - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - if (!file_enumerator) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - file_enumerator = archive_reader->firstFile(); - if (!file_enumerator) - { - file_enumerator.reset(); - continue; - } - } - else if (!file_enumerator->nextFile()) - { - file_enumerator.reset(); - continue; - } - - String current_filename = file_enumerator->getFileName(); - bool satisfies = filter(current_filename); - if (satisfies) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, current_filename, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } -} - -size_t StorageS3Source::ArchiveIterator::estimatedKeysCount() -{ - return basic_iterator->estimatedKeysCount(); -} - -void StorageS3Source::ArchiveIterator::refreshArchiveReader() -{ - if (basic_key_with_info_ptr) - { - if (!basic_key_with_info_ptr->info) - { - basic_key_with_info_ptr->info = S3::getObjectInfo(*client, bucket, basic_key_with_info_ptr->key, version_id, request_settings); - } - archive_reader = createArchiveReader( - basic_key_with_info_ptr->key, - [key = basic_key_with_info_ptr->key, archive_size = basic_key_with_info_ptr->info.value().size, context = getContext(), this]() - { return createS3ReadBuffer(key, archive_size, context, client, bucket, version_id, request_settings); }, - basic_key_with_info_ptr->info.value().size); - } - else - { - archive_reader = nullptr; - } -} - -StorageS3Source::StorageS3Source( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket_, - const String & version_id_, - const String & url_host_and_port_, - std::shared_ptr file_iterator_, - const size_t max_parsing_threads_, - bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , bucket(bucket_) - , version_id(version_id_) - , url_host_and_port(url_host_and_port_) - , format(format_) - , columns_desc(info.columns_description) - , requested_columns(info.requested_columns) - , max_block_size(max_block_size_) - , request_settings(request_settings_) - , compression_hint(std::move(compression_hint_)) - , client(client_) - , sample_block(info.format_header) - , format_settings(format_settings_) - , requested_virtual_columns(info.requested_virtual_columns) - , file_iterator(file_iterator_) - , max_parsing_threads(max_parsing_threads_) - , need_only_count(need_only_count_) - , create_reader_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) -{ -} - -void StorageS3Source::lazyInitialize(size_t idx) -{ - if (initialized) - return; - - reader = createReader(idx); - if (reader) - reader_future = createReaderAsync(idx); - initialized = true; -} - -StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) -{ - KeyWithInfoPtr key_with_info; - do - { - key_with_info = file_iterator->next(idx); - if (!key_with_info || key_with_info->key.empty()) - return {}; - - if (!key_with_info->info) - key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings); - } - while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - auto compression_method = CompressionMethod::None; - if (!key_with_info->path_in_archive.has_value()) - { - compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer( - key_with_info->key, key_with_info->info->size, getContext(), client, bucket, version_id, request_settings); - } - else - { - compression_method = chooseCompressionMethod(key_with_info->path_in_archive.value(), compression_hint); - read_buf = key_with_info->archive_reader->readFile(key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - auto input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - getContext(), - max_block_size, - format_settings, - max_parsing_threads, - /* max_download_threads= */ std::nullopt, - /* is_remote_fs */ true, - compression_method, - need_only_count); - - if (key_condition) - input_format->setKeyCondition(key_condition); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageS3Source::createReaderAsync(size_t idx) -{ - return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); -} - -std::unique_ptr createS3ReadBuffer( - const String & key, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_settings = context->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = context->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - static LoggerPtr log = getLogger("StorageS3Source"); - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size, context, client_ptr, bucket, version_id, request_settings); - } - - - return std::make_unique( - client_ptr, - bucket, - key, - version_id, - request_settings, - read_settings, - /*use_external_buffer*/ false, - /*offset_*/ 0, - /*read_until_position_*/ 0, - /*restricted_seek_*/ false, - object_size); -} - -std::unique_ptr createAsyncS3ReadBuffer( - const String & key, - const ReadSettings & read_settings, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_buffer_creator = [=](bool restricted_seek, const StoredObject & object) -> std::unique_ptr - { - return std::make_unique( - client_ptr, - bucket, - object.remote_path, - version_id, - request_settings, - read_settings, - /* use_external_buffer */ true, - /* offset */ 0, - /* read_until_position */ 0, - restricted_seek, - object_size); - }; - - auto modified_settings{read_settings}; - /// User's S3 object may change, don't cache it. - modified_settings.use_page_cache_for_disks_without_file_cache = false; - - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - - auto s3_impl = std::make_unique( - std::move(read_buffer_creator), - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, - "", - read_settings, - /* cache_log */ nullptr, - /* use_external_buffer */ true); - - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -StorageS3Source::~StorageS3Source() -{ - create_reader_pool.wait(); -} - -String StorageS3Source::getName() const -{ - return name; -} - -Chunk StorageS3Source::generate() -{ - lazyInitialize(); - - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - String file_name = reader.getFile(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize(), reader.isArchive() ? (&file_name) : nullptr); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getPath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageS3Source::addNumRowsToCache(const String & bucket_with_key, size_t num_rows) -{ - String source = fs::path(url_host_and_port) / bucket_with_key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) -{ - String source = fs::path(url_host_and_port) / bucket / key_with_info.key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional { return key_with_info.info->last_modification_time; }; - - return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class StorageS3Sink : public SinkToStorage -{ -public: - StorageS3Sink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - const StorageS3::Configuration & configuration_, - const String & bucket, - const String & key) - : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_) - { - BlobStorageLogWriterPtr blob_log = nullptr; - if (auto blob_storage_log = context->getBlobStorageLog()) - { - blob_log = std::make_shared(std::move(blob_storage_log)); - blob_log->query_id = context->getCurrentQueryId(); - } - - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - configuration_.client, - bucket, - key, - DBMS_DEFAULT_BUFFER_SIZE, - configuration_.request_settings, - std::move(blob_log), - std::nullopt, - threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "S3ParallelWrite"), - context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageS3Sink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf.reset(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - -std::optional checkAndGetNewFileOnInsertIfNeeded( - const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) -{ - if (context->getSettingsRef().s3_truncate_on_insert - || !S3::objectExists( - *configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) - return std::nullopt; - - if (context->getSettingsRef().s3_create_new_file_on_insert) - { - auto pos = key.find_first_of('.'); - String new_key; - do - { - new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); - ++sequence_number; - } while (S3::objectExists( - *configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); - - return new_key; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.url.bucket, key); -} -} - - -class PartitionedStorageS3Sink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageS3Sink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - const StorageS3::Configuration & configuration_, - const String & bucket_, - const String & key_) - : PartitionedSink(partition_by, context_, sample_block_) - , WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , configuration(configuration_) - , bucket(bucket_) - , key(key_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */ 1)) - partition_key = *new_key; - - return std::make_shared( - format, sample_block, getContext(), format_settings, compression_method, configuration, partition_bucket, partition_key); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - const StorageS3::Configuration configuration; - const String bucket; - const String key; - const std::optional format_settings; - - static void validateBucket(const String & str) - { - S3::URI::validateBucket(str, {}); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); - - validatePartitionKey(str, false); - } - - static void validateKey(const String & str) - { - /// See: - /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject - - if (str.empty() || str.size() > 1024) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); - - validatePartitionKey(str, true); - } -}; - - -StorageS3::StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , configuration(configuration_) - , name(configuration.url.storage_name) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = getTableStructureFromData(configuration, format_settings, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - - /// We don't allow special columns in S3 storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -static std::shared_ptr createFileIterator( - StorageS3::Configuration configuration, - bool distributed_processing, - ContextPtr local_context, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - StorageS3Source::KeysWithInfo * read_keys = nullptr, - std::function file_progress_callback = {}) -{ - if (distributed_processing) - { - return std::make_shared( - local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else - { - auto basic_iterator = [&]() -> std::unique_ptr - { - StorageS3Source::KeysWithInfo * local_read_keys = configuration.url.archive_pattern.has_value() ? nullptr : read_keys; - if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_unique( - *configuration.client, - configuration.url, - predicate, - virtual_columns, - local_context, - local_read_keys, - configuration.request_settings, - file_progress_callback); - } - else - { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) - { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); - } - return std::make_unique( - *configuration.client, - configuration.url.version_id, - keys, - configuration.url.bucket, - configuration.request_settings, - local_read_keys, - file_progress_callback); - } - }(); - if (configuration.url.archive_pattern.has_value()) - { - return std::make_shared( - std::move(basic_iterator), - configuration.url.archive_pattern.value(), - configuration.client, - configuration.url.bucket, - configuration.url.version_id, - configuration.request_settings, - local_context, - read_keys); - } - else - { - return basic_iterator; - } - } -} - -bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getFormatCopy(), context, format_settings); -} - -bool StorageS3::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(getFormatCopy()); -} - -bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(getFormatCopy(), context); -} - -void StorageS3::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - updateConfiguration(local_context); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - *this, - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - createIterator(predicate); -} - -void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - iterator_wrapper = createFileIterator( - storage.getConfigurationCopy(), - storage.distributed_processing, - context, - predicate, - virtual_columns, - nullptr, - context->getFileProgressCallback()); -} - -void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (storage.partition_by && query_configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); - - createIterator(nullptr); - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); - if (estimated_keys_count > 1) - num_streams = std::min(num_streams, estimated_keys_count); - else - { - /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case. - num_streams = 1; - } - - const size_t max_threads = context->getSettingsRef().max_threads; - const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - - Pipes pipes; - pipes.reserve(num_streams); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - read_from_format_info, - query_configuration.format, - storage.getName(), - context, - storage.format_settings, - max_block_size, - query_configuration.request_settings, - query_configuration.compression_method, - query_configuration.client, - query_configuration.url.bucket, - query_configuration.url.version_id, - query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), - iterator_wrapper, - max_parsing_threads, - need_only_count); - - source->setKeyCondition(filter_actions_dag, context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(read_from_format_info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageS3::write( - const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - auto key = query_configuration.keys.front(); - - if (query_configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } - else - { - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, query_configuration, query_configuration.keys.front(), query_configuration.keys.size())) - { - std::lock_guard lock{configuration_update_mutex}; - query_configuration.keys.push_back(*new_key); - configuration.keys.push_back(*new_key); - key = *new_key; - } - - return std::make_shared( - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } -} - -void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - if (query_configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", - query_configuration.url.key); - } - - Aws::S3::Model::Delete delkeys; - - for (const auto & key : query_configuration.keys) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(key); - delkeys.AddObjects(std::move(obj)); - } - - ProfileEvents::increment(ProfileEvents::S3DeleteObjects); - S3::DeleteObjectsRequest request; - request.SetBucket(query_configuration.url.bucket); - request.SetDelete(delkeys); - - auto response = query_configuration.client->DeleteObjects(request); - - const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); - auto time_now = std::chrono::system_clock::now(); - if (auto blob_storage_log = BlobStorageLogWriter::create()) - for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw S3Exception(err.GetMessage(), err.GetErrorType()); - } - - for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); -} - -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); - return configuration; -} - -void StorageS3::updateConfiguration(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); -} - -void StorageS3::useConfiguration(const StorageS3::Configuration & new_configuration) -{ - std::lock_guard lock(configuration_update_mutex); - configuration = new_configuration; -} - -StorageS3::Configuration StorageS3::getConfigurationCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration; -} - -String StorageS3::getFormatCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration.format; -} - -bool StorageS3::Configuration::update(const ContextPtr & context) -{ - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); - - if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) - return false; - - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - connect(context); - return true; -} - -void StorageS3::Configuration::connect(const ContextPtr & context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - if (S3::isS3ExpressEndpoint(url.endpoint) && auth_settings.region.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets"); - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - /// seems as we don't use it - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; - client_configuration.http_keep_alive_timeout = S3::DEFAULT_KEEP_ALIVE_TIMEOUT; - client_configuration.http_keep_alive_max_requests = S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS; - - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), - }; - - auto credentials - = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); - client = S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - S3::CredentialsConfiguration{ - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }, - credentials.GetSessionToken()); -} - -bool StorageS3::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - auto filename = collection.getOrDefault("filename", ""); - if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); - else - configuration.url = S3::URI(collection.get("url")); - - configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); - configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); - configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); - configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds - = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method - = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); - configuration.structure = collection.getOrDefault("structure", "auto"); - - configuration.request_settings = S3Settings::RequestSettings(collection); -} - -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) -{ - StorageS3::Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - } - else - { - /// Supported signatures: - /// - /// S3('url') - /// S3('url', 'format') - /// S3('url', 'format', 'compression') - /// S3('url', NOSIGN) - /// S3('url', NOSIGN, 'format') - /// S3('url', NOSIGN, 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') - /// with optional headers() function - - size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); - - if (count == 0 || count > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 6 positional arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], [session_token], [name of used format], [compression_method], [headers], [extra_credentials]"); - - std::unordered_map engine_args_to_idx; - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - engine_args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 2 possible variants: - /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, access_key_id, secret_access_key, format) - /// - s3(source, NOSIGN, format, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - /// For 5 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (count == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - else if (count == 6) - { - engine_args_to_idx - = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; - } - - /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(engine_args[0], "url")); - - if (engine_args_to_idx.contains("format")) - configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); - - if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); - - if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); - - if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); - - if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - if (no_sign_request) - configuration.auth_settings.no_sign_request = no_sign_request; - } - - configuration.static_configuration - = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); - - configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto" && get_format_from_file) - { - if (configuration.url.archive_pattern.has_value()) - { - configuration.format = FormatFactory::instance() - .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) - .value_or("auto"); - } - else - { - configuration.format - = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); - } - } - - return configuration; -} - -ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration_.format, configuration_, format_settings_, ctx).first; -} - -std::pair StorageS3::getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); -} - -class ReadBufferIterator : public IReadBufferIterator, WithContext -{ -public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - std::optional format_, - const std::optional & format_settings_, - ContextPtr context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key_with_info : read_keys) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - while (true) - { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); - } - - return {nullptr, std::nullopt, format}; - } - - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3Source::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache, format}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - std::unique_ptr impl; - - if (!current_key_with_info->path_in_archive.has_value()) - { - impl = std::make_unique( - configuration.client, - configuration.url.bucket, - current_key_with_info->key, - configuration.url.version_id, - configuration.request_settings, - getContext()->getReadSettings()); - } - else - { - assert(current_key_with_info->archive_reader); - impl = current_key_with_info->archive_reader->readFile( - current_key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return { - wrapReadBufferWithCompressionMethod( - std::move(impl), - current_key_with_info->path_in_archive.has_value() - ? chooseCompressionMethod(current_key_with_info->path_in_archive.value(), configuration.compression_method) - : chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), - zstd_window_log_max), - std::nullopt, - format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform( - read_keys.begin(), - read_keys.end(), - std::back_inserter(sources), - [&](const auto & elem) { return host_and_bucket / elem->getPath(); }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->getPath(); - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_key_with_info); - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); - } - -private: - std::optional tryGetColumnsFromCache( - const StorageS3Source::KeysWithInfo::const_iterator & begin, const StorageS3Source::KeysWithInfo::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] - { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - String path = fs::path(configuration.url.bucket) / (*it)->getPath(); - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry fcreateor some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; -}; - -std::pair StorageS3::getTableStructureAndFormatFromDataImpl( - std::optional format, - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - KeysWithInfo read_keys; - - auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -void registerStorageS3Impl(const String & name, StorageFactory & factory) -{ - factory.registerStorage(name, [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - std::move(configuration), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing_ */false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} - -void registerStorageS3(StorageFactory & factory) -{ - registerStorageS3Impl("S3", factory); - registerStorageS3Impl("COSN", factory); - registerStorageS3Impl("OSS", factory); -} - -bool StorageS3::supportsPartitionBy() const -{ - return true; -} - -SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} -} - -#endif diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910..dadf2f35e6e 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index ba42795333c..6287156daaf 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,6 +16,7 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS +SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index e0cc8f0ce63..2d9f236ada9 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent -18 9 9 9 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data +18 18 18 18 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 88d3165d060..6a155bcda46 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index d2d19b76a1f..ba50902ebea 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; system flush logs; -- { echo } From 5f63abfd43e6946ff4f21d261f77e4eeb8b7d7c5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 14:31:11 +0200 Subject: [PATCH 034/141] work with tests --- .../Transforms/CountingTransform.cpp | 3 -- src/Processors/Transforms/CountingTransform.h | 2 -- .../Transforms/SquashingChunksTransform.cpp | 32 +++++++++---------- .../Transforms/buildPushingToViewsChain.cpp | 6 ++-- src/Storages/MergeTree/MergeTreeDataWriter.h | 1 - src/Storages/StorageLog.cpp | 1 + 6 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index c138eed69de..d39c6575292 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -18,9 +18,6 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { - LOG_DEBUG(getLogger("CountingTransform"), - "onConsume rows {} bytes {}, progress rows {} bytes {}", chunk.getNumRows(), chunk.bytes(), progress.written_rows, progress.written_bytes); - if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index ab8d083fd05..4efcf147ac7 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -45,8 +45,6 @@ public: void onConsume(Chunk chunk) override; GenerateResult onGenerate() override { - LOG_DEBUG(getLogger("CountingTransform"), - "onGenerate {}", cur_chunk.getNumRows()); GenerateResult res; res.chunk = std::move(cur_chunk); return res; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 2ee13c05b95..531d264a25a 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,8 +17,8 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - LOG_DEBUG(getLogger("SquashingChunksTransform"), - "onConsume {}", chunk.getNumRows()); + // LOG_DEBUG(getLogger("SquashingChunksTransform"), + // "onConsume {}", chunk.getNumRows()); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); @@ -37,14 +37,14 @@ void SquashingChunksTransform::onConsume(Chunk chunk) cur_chunkinfos = {}; } - LOG_DEBUG(getLogger("SquashingChunksTransform"), - "got result rows {}, size {}, columns {}, infos: {}/{}", - cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), - cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SquashingChunksTransform"), + // "got result rows {}, size {}, columns {}, infos: {}/{}", + // cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), + // cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); } else { - assert(!result.input_block_delayed); + assert(result.input_block_delayed); cur_chunkinfos = std::move(chunk.getChunkInfos()); } } @@ -90,10 +90,10 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform rows {}, size {}, columns {}, infos: {}/{}", - chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + // "transform rows {}, size {}, columns {}, infos: {}/{}", + // chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), + // chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); @@ -111,14 +111,14 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) squashed_info = {}; } - LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "got result rows {}, size {}, columns {}, infos: {}/{}", - squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), - squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + // "got result rows {}, size {}, columns {}, infos: {}/{}", + // squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), + // squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { - assert(!result.input_block_delayed); + chassert(result.input_block_delayed); squashed_info = std::move(chunk.getChunkInfos()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index d44796610ed..996fe3efdc5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -552,10 +552,8 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index a9a44813545..863c951d957 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,7 +47,6 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { - LOG_DEBUG(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 6ef16189335..8b1bf4637b4 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,7 @@ #include #include +#include "Common/logger_useful.h" #include #include #include From 62c764c2169cd5bed627bd670a5d93fa854c1fa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 17:36:43 +0200 Subject: [PATCH 035/141] work with tests --- tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index 06fe156500d..450d92476a9 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,7 +54,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 Now it is fixed. */ From c25e9ecde35fad2b72f919fbdf54381fa184538f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 30 May 2024 13:04:55 +0200 Subject: [PATCH 036/141] work with tests --- src/Processors/Sinks/SinkToStorage.cpp | 5 +++++ src/Processors/Transforms/NumberBlocksTransform.cpp | 8 +++++++- src/Processors/Transforms/NumberBlocksTransform.h | 2 ++ src/Processors/Transforms/buildPushingToViewsChain.cpp | 5 ++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..c166ec81af7 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -16,6 +17,10 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); + + // Add comment here + DeduplicationToken::SetInitialTokenTransform::setInitialToken(chunk); + cur_chunk = std::move(chunk); } diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 11054f652ff..d51fe67c868 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -105,7 +105,7 @@ void CheckTokenTransform::transform(Chunk & chunk) LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); } -void SetInitialTokenTransform::transform(Chunk & chunk) +void SetInitialTokenTransform::setInitialToken(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -127,6 +127,12 @@ void SetInitialTokenTransform::transform(Chunk & chunk) token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } + +void SetInitialTokenTransform::transform(Chunk & chunk) +{ + setInitialToken(chunk); +} + void SetUserTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index b4f61eb887c..a2e48d9b548 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -121,6 +121,8 @@ namespace DeduplicationToken String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } void transform(Chunk & chunk) override; + + static void setInitialToken(Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 996fe3efdc5..46ca109fe0f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -552,8 +552,11 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 59a97713b06f1bb2ffd24087114dbff5a0eecee8 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 30 May 2024 16:37:59 +0200 Subject: [PATCH 037/141] work with tests --- src/Processors/Sinks/SinkToStorage.cpp | 5 ----- src/Processors/Transforms/buildPushingToViewsChain.cpp | 7 ++----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index c166ec81af7..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace DB @@ -17,10 +16,6 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); - - // Add comment here - DeduplicationToken::SetInitialTokenTransform::setInitialToken(chunk); - cur_chunk = std::move(chunk); } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 9dc9531b7a1..7a32b6ff038 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -530,6 +530,8 @@ Chain buildPushingToViewsChain( result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); result_chain.setConcurrencyControl(settings.use_concurrency_control); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } if (auto * live_view = dynamic_cast(storage.get())) @@ -552,11 +554,6 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 5fe2249300d2d5951329a39a49322bbd99cce614 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 14:43:35 +0200 Subject: [PATCH 038/141] adjust tesy test_force_deduplication --- .../test_force_deduplication/test.py | 73 ++++++++++++++----- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 87b2c45bbc5..14c11bc8500 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,6 +29,8 @@ def get_counts(): def test_basic(start_cluster): + old_src, old_a, old_b, old_c = 0, 0, 0, 0 + node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -39,6 +41,15 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) + + src, a, b, c = get_counts() + assert src == old_src + 1 + assert a == old_a + 2 + assert b == old_b + 2 + assert c == old_c + 2 + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -46,22 +57,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c - old_src, old_a, old_b, old_c = get_counts() - # number of rows in test_mv_a and test_mv_c depends on order of inserts into views - assert old_src == 11 - assert old_a in (1, 11) - assert old_b == 1 - assert old_c in (1, 11) - + # deduplication only for src table node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - # no changes because of deduplication in source table assert src == old_src - assert a == old_a - assert b == old_b - assert c == old_c + assert a == old_a + 10 + assert b == old_b + 10 + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; @@ -69,11 +81,27 @@ def test_basic(start_cluster): """ ) src, a, b, c = get_counts() - assert src == 11 - assert a == old_a + 10 # first insert could be succesfull with disabled dedup - assert b == 11 + assert src == old_src + assert a == old_a + 10 + assert b == old_b + 10 assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all the tables + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == old_src + assert a == old_a + assert b == old_b + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated with pytest.raises(QueryRuntimeException): node.query( """ @@ -82,16 +110,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == 21 - assert a == old_a + 20 - assert b == 21 - assert c == old_c + 20 + assert src == old_src + assert a == old_a + assert b == old_b + 10 + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c From c3f72f0cf9180397359136941c7247a812576c61 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 14:44:55 +0200 Subject: [PATCH 039/141] revert changes at helpers/s3_mocks/broken_s3.py --- tests/integration/helpers/s3_mocks/broken_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 566d4739eb0..686abc76bdf 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -246,7 +246,7 @@ class _ServerRuntime: class BrokenPipeAction: def inject_error(self, request_handler): # partial read - request_handler.rfile.read(50) + self.rfile.read(50) time.sleep(1) request_handler.connection.setsockopt( From 2b3e1920ebfe32e99c2833acce357076a7480e40 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 15:22:09 +0200 Subject: [PATCH 040/141] break tests to meet the timeout --- ...uplication_insert_several_blocks.reference | 1922 ----------------- ...008_deduplication_insert_several_blocks.sh | 97 - ...ert_several_blocks_nonreplicated.reference | 962 +++++++++ ...ion_insert_several_blocks_nonreplicated.sh | 58 + ...insert_several_blocks_replicated.reference | 962 +++++++++ ...cation_insert_several_blocks_replicated.sh | 58 + ...tion_mv_generates_several_blocks.reference | 1922 ----------------- ...duplication_mv_generates_several_blocks.sh | 103 - ...tes_several_blocks_nonreplicated.reference | 962 +++++++++ ..._generates_several_blocks_nonreplicated.sh | 58 + ...erates_several_blocks_replicated.reference | 962 +++++++++ ..._mv_generates_several_blocks_replicated.sh | 58 + ...cation_several_mv_into_one_table.reference | 1410 ------------ ...deduplication_several_mv_into_one_table.sh | 111 - ..._mv_into_one_table_nonreplicated.reference | 706 ++++++ ...several_mv_into_one_table_nonreplicated.sh | 58 + ...ral_mv_into_one_table_replicated.reference | 706 ++++++ ...on_several_mv_into_one_table_replicated.sh | 58 + 18 files changed, 5608 insertions(+), 5565 deletions(-) delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference deleted file mode 100644 index 641735d1bb6..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ /dev/null @@ -1,1922 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh deleted file mode 100755 index ed50110b7eb..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# fails, it is a error. Several blocks in scr table with the same user token are processed in parallel and deduplicated - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False" -# fails, it is a error. The same situation as first one, but on dst table. - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -KNOWN_ERRORS=(8 9 10 11 12 13) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..bf900aa84d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..c758e2fb3de --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference new file mode 100644 index 00000000000..c815324b455 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh new file mode 100755 index 00000000000..45b222b1fc4 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference deleted file mode 100644 index 06f30793670..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ /dev/null @@ -1,1922 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh deleted file mode 100755 index 61996905135..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# failed due to race in multi thread insertion, blocks are deduplicated in different threads - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# the same as first but for dst table - -# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# dst table deduplicates all incoming blocks from one insert because not uniq hash - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..76ef4cf6b2c --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..50cf2a3bb75 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference new file mode 100644 index 00000000000..a84539df16b --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh new file mode 100755 index 00000000000..2b094e0309e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference deleted file mode 100644 index 4d517948a25..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ /dev/null @@ -1,1410 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh deleted file mode 100755 index 3d2814ed77d..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# race condition on insert into src table - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# race condition on insert into dst table - -# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# dst deduplicates blocks from one inserts from different materialized view - -# Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# dst deduplicates blocks from different inserts by hash - -KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28 17 21 25 29) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" - - diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference new file mode 100644 index 00000000000..b6a3e0175a7 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh new file mode 100755 index 00000000000..33da54b90f1 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference new file mode 100644 index 00000000000..1921103f49e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh new file mode 100755 index 00000000000..290d1f794b2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" From ddde0f5fed1a8d3f57e743f54b2d14dcdaf98908 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 16:25:03 +0200 Subject: [PATCH 041/141] fix headers --- src/Common/CollectionOfDerived.h | 4 +- src/Interpreters/InterpreterInsertQuery.cpp | 50 +++++++++---------- src/Interpreters/SquashingTransform.cpp | 18 +++---- src/Processors/Chunk.h | 6 --- src/Processors/ISimpleTransform.h | 2 - .../Algorithms/ReplacingSortedAlgorithm.h | 3 +- src/Processors/Sinks/SinkToStorage.h | 3 -- .../Transforms/AggregatingInOrderTransform.h | 2 +- .../Transforms/AggregatingTransform.h | 3 +- .../Transforms/CountingTransform.cpp | 6 +-- src/Processors/Transforms/CountingTransform.h | 2 - ...m.cpp => DeduplicationTokenTransforms.cpp} | 12 ++++- ...sform.h => DeduplicationTokenTransforms.h} | 15 +----- src/Processors/Transforms/JoiningTransform.h | 5 +- .../Transforms/MaterializingTransform.cpp | 2 - ...ergingAggregatedMemoryEfficientTransform.h | 2 +- .../Transforms/SquashingChunksTransform.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 7 ++- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 15 +++--- .../MergeTree/ReplicatedMergeTreeSink.cpp | 26 +++++----- src/Storages/StorageDistributed.cpp | 2 - src/Storages/StorageLog.cpp | 3 +- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 24 files changed, 88 insertions(+), 106 deletions(-) rename src/Processors/Transforms/{NumberBlocksTransform.cpp => DeduplicationTokenTransforms.cpp} (91%) rename src/Processors/Transforms/{NumberBlocksTransform.h => DeduplicationTokenTransforms.h} (89%) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index c98e375b4b1..60a91e593f9 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ private: using Records = std::vector; public: - void swap(Self & other) + void swap(Self & other) noexcept { records.swap(other.records); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 249c69b51b9..758ac4ab954 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -309,8 +309,8 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { - LOG_DEBUG(getLogger("InsertQuery"), - "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); + // LOG_DEBUG(getLogger("InsertQuery"), + // "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); ThreadStatus * thread_status = current_thread; @@ -413,9 +413,9 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink sink_streams table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); + // LOG_DEBUG(getLogger("InsertQuery"), + // "call buildSink sink_streams table name {}.{}, stream {}/{}", + // table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -425,9 +425,9 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink presink_streams table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + // LOG_DEBUG(getLogger("InsertQuery"), + // "call buildSink presink_streams table name {}.{}, stream {}/{}", + // table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); @@ -462,8 +462,8 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); + // LOG_DEBUG(getLogger("InsertQuery"), + // "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); if (is_trivial_insert_select) { @@ -511,9 +511,9 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); - LOG_DEBUG(getLogger("InsertQuery"), - "adding transforms, pipline size {}, threads {}, max_insert_threads {}", - pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); + // LOG_DEBUG(getLogger("InsertQuery"), + // "adding transforms, pipline size {}, threads {}, max_insert_threads {}", + // pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. @@ -743,13 +743,13 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - bool is_table_dist = false; - if (auto * dist_storage = dynamic_cast(table.get())) - { - is_table_dist = true; - LOG_DEBUG(getLogger("InsertQuery"), - "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); - } + // bool is_table_dist = false; + // if (auto * dist_storage = dynamic_cast(table.get())) + // { + // is_table_dist = true; + // // LOG_DEBUG(getLogger("InsertQuery"), + // // "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); + // } if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); @@ -780,24 +780,24 @@ BlockIO InterpreterInsertQuery::execute() auto distributed = table->distributedWrite(query, getContext()); if (distributed) { - LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); } else { - LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); res.pipeline = buildInsertPipeline(query, table); } diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 30c801aaaff..a539870d50c 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -72,10 +72,10 @@ void SquashingTransform::append(Block && input_block) return; } - LOG_DEBUG(getLogger("SquashingTransform"), - "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", - input_block.rows(), input_block.bytes(), input_block.columns(), - accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); + // LOG_DEBUG(getLogger("SquashingTransform"), + // "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", + // input_block.rows(), input_block.bytes(), input_block.columns(), + // accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); assert(blocksHaveEqualStructure(input_block, accumulated_block)); @@ -86,11 +86,11 @@ void SquashingTransform::append(Block && input_block) const auto source_column = std::move(input_block.getByPosition(i).column); auto acc_column = std::move(accumulated_block.getByPosition(i).column); - LOG_DEBUG(getLogger("SquashingTransform"), - "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", - i, source_column->getName(), - acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), - source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); + // LOG_DEBUG(getLogger("SquashingTransform"), + // "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", + // i, source_column->getName(), + // acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), + // source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); auto mutable_column = IColumn::mutate(std::move(acc_column)); diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index b4345d18a08..1348966c0d3 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,15 +1,9 @@ #pragma once -#include "base/defines.h" - #include #include -#include -#include #include -#include -#include namespace DB { diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index a47e0e49121..629529cdffa 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -2,8 +2,6 @@ #include -#include - namespace DB { diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index f36e07b8a96..2f23f2a5c4d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include #include #include -#include "Processors/Chunk.h" +#include namespace Poco { diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c350b9f79b0..c728fa87b1e 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,9 +1,6 @@ #pragma once -#include #include -#include #include -#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 6433f862dfd..41a0d7fc7f1 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,7 @@ #include #include #include -#include "Processors/Chunk.h" +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 430a9a6e50a..95983c39d1e 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -1,15 +1,14 @@ #pragma once -#include #include #include #include +#include #include #include #include #include #include #include -#include "Processors/Chunk.h" namespace CurrentMetrics { diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index d39c6575292..2c6b3bd8638 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,9 +1,9 @@ - -#include #include + +#include +#include #include #include -#include "IO/Progress.h" namespace ProfileEvents diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 4efcf147ac7..05d8e2aeac8 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp similarity index 91% rename from src/Processors/Transforms/NumberBlocksTransform.cpp rename to src/Processors/Transforms/DeduplicationTokenTransforms.cpp index d51fe67c868..ea4537bb5ad 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -18,6 +18,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +void RestoreChunkInfosTransform::transform(Chunk & chunk) +{ + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + chunk_infos.size(), chunk_infos.debug(), + chunk.getNumRows()); + + chunk.getChunkInfos().append(chunk_infos.clone()); +} + namespace DeduplicationToken { diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h similarity index 89% rename from src/Processors/Transforms/NumberBlocksTransform.h rename to src/Processors/Transforms/DeduplicationTokenTransforms.h index a2e48d9b548..f0bcc3052f7 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -14,22 +14,11 @@ namespace DB RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) : ISimpleTransform(header_, header_, true) , chunk_infos(std::move(chunk_infos_)) - { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "create RestoreChunkInfosTransform to append {}:{}", - chunk_infos.size(), chunk_infos.debug()); - } + {} String getName() const override { return "RestoreChunkInfosTransform"; } - void transform(Chunk & chunk) override - { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), - chunk_infos.size(), chunk_infos.debug(), - chunk.getNumRows()); - - chunk.getChunkInfos().append(chunk_infos.clone()); - } + void transform(Chunk & chunk) override; private: Chunk::ChunkInfoCollection chunk_infos; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5fdea2524e2..5f6d9d6fff2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,8 +1,7 @@ #pragma once -#include #include -#include "Processors/Chunk.h" - +#include +#include namespace DB { diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 4a7f5187c75..9ae80e21a68 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 958b43b11ed..3a3c1bd9c1e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -2,8 +2,8 @@ #include #include -#include "Processors/Chunk.h" #include +#include #include #include #include diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f0334549d4c..860e84f2cd3 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -4,7 +4,7 @@ #include #include #include -#include "Processors/Chunk.h" +#include namespace DB { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7a32b6ff038..bef00fa3f1d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -15,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -24,9 +25,7 @@ #include #include #include -#include "Core/Field.h" -#include -#include +#include #include #include diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index b9d29a90f56..dd20bea4dd6 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -22,12 +22,12 @@ limitations under the License. */ #include #include #include +#include #include #include #include #include #include -#include "Processors/Transforms/NumberBlocksTransform.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 0953cdc5d72..ba81bb7a56d 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,12 +1,13 @@ -#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include "Common/Exception.h" -#include -#include "Interpreters/StorageID.h" + +#include namespace ProfileEvents { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 62d30764ca8..16bb9827c6e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,23 +1,25 @@ -#include -#include -#include -#include -#include -#include #include "Common/Exception.h" #include #include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include + #include +#include + namespace ProfileEvents { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 257c8c312e5..5e03840fa36 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -23,7 +23,6 @@ #include -#include "Common/logger_useful.h" #include #include #include @@ -107,7 +106,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 8b1bf4637b4..1a84f578cf8 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,7 +1,7 @@ #include #include +#include -#include "Common/logger_useful.h" #include #include #include @@ -22,7 +22,6 @@ #include #include -#include "StorageLogSettings.h" #include #include #include diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 4ae91d64023..17ecba2b4a5 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,12 +32,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include From 6dfd226daa8421055f3a1103fa72323c68c71959 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 17:27:13 +0200 Subject: [PATCH 042/141] fix populate --- .../DeduplicationTokenTransforms.cpp | 29 ++++++++++--------- .../Transforms/DeduplicationTokenTransforms.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 15 ++++++++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index ea4537bb5ad..4f822e4aebb 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -67,6 +67,9 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) void TokenInfo::setViewID(const String & id) { + LOG_DEBUG(getLogger("TokenInfo"), + "token: {}, stage: {}, view id: {}", + getToken(false), stage, id); chassert(stage == VIEW_ID); addTokenPart(fmt::format(":view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; @@ -115,7 +118,18 @@ void CheckTokenTransform::transform(Chunk & chunk) LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); } -void SetInitialTokenTransform::setInitialToken(Chunk & chunk) +String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) +{ + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); +} + + +void SetInitialTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -129,18 +143,7 @@ void SetInitialTokenTransform::setInitialToken(Chunk & chunk) if (token_info->tokenInitialized()) return; - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); -} - - -void SetInitialTokenTransform::transform(Chunk & chunk) -{ - setInitialToken(chunk); + token_info->setInitialToken(getInitialToken(chunk)); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index f0bcc3052f7..46d355eb487 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -111,7 +111,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; - static void setInitialToken(Chunk & chunk); + static String getInitialToken(const Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index bef00fa3f1d..b259e803f80 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -529,8 +529,6 @@ Chain buildPushingToViewsChain( result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); result_chain.setConcurrencyControl(settings.use_concurrency_control); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } if (auto * live_view = dynamic_cast(storage.get())) @@ -538,12 +536,25 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } + else if (dynamic_cast(storage.get())) + { + auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); + metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) From d72fac13ec7d02d35e49be8f799c82c4762b242b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 18:58:05 +0200 Subject: [PATCH 043/141] mark insert block with a set of block ids from all partitions --- .../DeduplicationTokenTransforms.cpp | 26 ++++++-- .../Transforms/DeduplicationTokenTransforms.h | 3 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 ++- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 ++- ...on_insert_into_partitioned_table.reference | 35 +++++++++++ ...lication_insert_into_partitioned_table.sql | 63 +++++++++++++++++++ 6 files changed, 132 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 4f822e4aebb..dba6fc40b11 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -39,15 +39,24 @@ String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const result.reserve(getTotalSize()); for (const auto & part : parts) + { + if (!result.empty()) + result.append(":"); result.append(part); + } return result; } -void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) +void DB::DeduplicationToken::TokenInfo::addPieceToInitialToken(String part) { chassert(stage == INITIAL); addTokenPart(std::move(part)); +} + +void DB::DeduplicationToken::TokenInfo::closeInitialToken() +{ + chassert(stage == INITIAL); stage = VIEW_ID; } @@ -61,7 +70,7 @@ void TokenInfo::setUserToken(const String & token) void TokenInfo::setSourceBlockNumber(size_t sbn) { chassert(stage == SOURCE_BLOCK_NUMBER); - addTokenPart(fmt::format(":source-number-{}", sbn)); + addTokenPart(fmt::format("source-number-{}", sbn)); stage = VIEW_ID; } @@ -71,14 +80,14 @@ void TokenInfo::setViewID(const String & id) "token: {}, stage: {}, view id: {}", getToken(false), stage, id); chassert(stage == VIEW_ID); - addTokenPart(fmt::format(":view-id-{}", id)); + addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; } void TokenInfo::setViewBlockNumber(size_t mvbn) { chassert(stage == VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format(":view-block-{}", mvbn)); + addTokenPart(fmt::format("view-block-{}", mvbn)); stage = VIEW_ID; } @@ -96,10 +105,14 @@ void TokenInfo::addTokenPart(String part) size_t TokenInfo::getTotalSize() const { + if (parts.empty()) + return 0; + size_t size = 0; for (const auto & part : parts) size += part.size(); - return size; + + return size + parts.size() - 1; } void CheckTokenTransform::transform(Chunk & chunk) @@ -143,7 +156,8 @@ void SetInitialTokenTransform::transform(Chunk & chunk) if (token_info->tokenInitialized()) return; - token_info->setInitialToken(getInitialToken(chunk)); + token_info->addPieceToInitialToken(getInitialToken(chunk)); + token_info->closeInitialToken(); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 46d355eb487..27bb21dfad1 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -38,7 +38,8 @@ namespace DeduplicationToken bool empty() const { return parts.empty(); } bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } - void setInitialToken(String part); + void addPieceToInitialToken(String part); + void closeInitialToken(); void setUserToken(const String & token); void setSourceBlockNumber(size_t sbn); void setViewID(const String & id); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index ba81bb7a56d..b31e7e6a562 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -147,7 +147,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->tokenInitialized()) { chassert(temp_part.part); - token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -194,6 +194,11 @@ void MergeTreeSink::consume(Chunk & chunk) }); } + if (!token_info->tokenInitialized()) + { + token_info->closeInitialToken(); + } + finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 16bb9827c6e..8cb4095f1e6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -393,7 +393,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->tokenInitialized()) { chassert(temp_part.part); - token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); } } @@ -440,6 +440,11 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } + if (!token_info->tokenInitialized()) + { + token_info->closeInitialToken(); + } + finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference new file mode 100644 index 00000000000..e69cf2be182 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -0,0 +1,35 @@ +no user deduplication token +partitioned_table: +1 A +1 D +2 B +2 C +mv_table: +1 A +1 A +1 D +2 B +2 B +2 C +with user deduplication token +partitioned_table: +1 A +1 A +1 D +2 B +2 B +2 C +mv_table: +1 A +1 A +1 D +2 B +2 B +2 C +with incorrect ussage of user deduplication token +partitioned_table: +1 A +2 B +mv_table: +1 A +2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql new file mode 100644 index 00000000000..918b7f2553d --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS partitioned_table; +DROP TABLE IF EXISTS mv_table; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + + +SELECT 'with user deduplication token'; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is not deduplicated because different tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because different tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + + +SELECT 'with incorrect ussage of user deduplication token'; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated because equal tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is deduplicated because equal tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; From dbc07ec573d3310b4f5019b8887fd34288bf23cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 20:28:38 +0200 Subject: [PATCH 044/141] adjust tests --- ...02912_ingestion_mv_deduplication.reference | 2 +- .../02912_ingestion_mv_deduplication.sql | 2 +- ...on_insert_into_partitioned_table.reference | 12 ++--- ...lication_insert_into_partitioned_table.sql | 44 ++++++++++++++----- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index ae82b9c0463..07deb7c2565 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -17,7 +17,7 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index 450d92476a9..a2378fd8f67 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -98,7 +98,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables + - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference index e69cf2be182..c82a6eaa213 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -1,10 +1,10 @@ no user deduplication token -partitioned_table: +partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: 1 A 1 D 2 B 2 C -mv_table: +mv_table is not deduplicated because the inserted blocks was different: 1 A 1 A 1 D @@ -12,14 +12,14 @@ mv_table: 2 B 2 C with user deduplication token -partitioned_table: +partitioned_table is not deduplicated because different tokens: 1 A 1 A 1 D 2 B 2 B 2 C -mv_table: +mv_table is not deduplicated because different tokens: 1 A 1 A 1 D @@ -27,9 +27,9 @@ mv_table: 2 B 2 C with incorrect ussage of user deduplication token -partitioned_table: +partitioned_table is deduplicated because equal tokens: 1 A 2 B -mv_table: +mv_table is deduplicated because equal tokens: 1 A 2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql index 918b7f2553d..2eb931f7f73 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -1,6 +1,12 @@ DROP TABLE IF EXISTS partitioned_table; DROP TABLE IF EXISTS mv_table; + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + CREATE TABLE partitioned_table (key Int64, value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') @@ -12,11 +18,6 @@ CREATE MATERIALIZED VIEW mv_table (key Int64, value String) ORDER BY tuple() AS SELECT key, value FROM partitioned_table; -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); @@ -26,12 +27,23 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; +DROP TABLE partitioned_table; +DROP TABLE mv_table; SELECT 'with user deduplication token'; +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); @@ -41,12 +53,23 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is not deduplicated because different tokens:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; +DROP TABLE partitioned_table; +DROP TABLE mv_table; SELECT 'with incorrect ussage of user deduplication token'; +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); @@ -56,8 +79,5 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is deduplicated because equal tokens:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; - DROP TABLE partitioned_table; DROP TABLE mv_table; From 273571c6f519b99c556b3b443b391e5dc592a682 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 7 Jun 2024 19:05:19 +0200 Subject: [PATCH 045/141] fix tests --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index b259e803f80..8ba172bf32b 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -562,6 +562,9 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); + + result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::move(sink)); } From 3db3b365ea46ee1fc388a1788ce59b9426b99c71 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 10 Jun 2024 15:42:13 +0200 Subject: [PATCH 046/141] fix tests --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8ba172bf32b..ed44a20e397 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -567,6 +567,10 @@ Chain buildPushingToViewsChain( result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(storage_header)); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From bdcf3a0739580c8c1e9689dfa416b2fae07feed7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 10 Jun 2024 22:16:26 +0200 Subject: [PATCH 047/141] fix tidy build --- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b31e7e6a562..faf3267a759 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -189,7 +189,7 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = std::move(block_dedup_token), + .block_dedup_token = block_dedup_token, .part_counters = std::move(part_counters), }); } From 24bf946c00bc9e681ec4b26dbdae0a7a786bf355 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 19:44:49 +0200 Subject: [PATCH 048/141] rm debug printing --- src/Core/Settings.h | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 31 ----------------- src/Interpreters/SquashingTransform.cpp | 12 ------- .../DeduplicationTokenTransforms.cpp | 11 ------ .../Transforms/ExpressionTransform.cpp | 2 -- .../Transforms/SquashingChunksTransform.cpp | 18 ---------- src/Storages/MergeTree/MergeTreeSink.cpp | 34 ------------------- .../MergeTree/MergedBlockOutputStream.cpp | 3 -- .../MergeTree/ReplicatedMergeTreeSink.cpp | 13 ------- src/Storages/WindowView/StorageWindowView.cpp | 11 ------ 10 files changed, 136 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4128f24052b..d6779a531ae 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,7 +626,6 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 758ac4ab954..64fccdbe14d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -309,9 +309,6 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); - ThreadStatus * thread_status = current_thread; if (!thread_status_holder) @@ -413,10 +410,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "call buildSink sink_streams table name {}.{}, stream {}/{}", - // table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -425,10 +418,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "call buildSink presink_streams table name {}.{}, stream {}/{}", - // table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); } @@ -462,9 +451,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); - // LOG_DEBUG(getLogger("InsertQuery"), - // "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); - if (is_trivial_insert_select) { /** When doing trivial INSERT INTO ... SELECT ... FROM table, @@ -511,11 +497,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); - // LOG_DEBUG(getLogger("InsertQuery"), - // "adding transforms, pipline size {}, threads {}, max_insert_threads {}", - // pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { @@ -743,14 +724,6 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - // bool is_table_dist = false; - // if (auto * dist_storage = dynamic_cast(table.get())) - // { - // is_table_dist = true; - // // LOG_DEBUG(getLogger("InsertQuery"), - // // "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); - // } - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); @@ -780,24 +753,20 @@ BlockIO InterpreterInsertQuery::execute() auto distributed = table->distributedWrite(query, getContext()); if (distributed) { - // LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); res.pipeline = buildInsertPipeline(query, table); } diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index a539870d50c..27437d1b647 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -72,11 +72,6 @@ void SquashingTransform::append(Block && input_block) return; } - // LOG_DEBUG(getLogger("SquashingTransform"), - // "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", - // input_block.rows(), input_block.bytes(), input_block.columns(), - // accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); - assert(blocksHaveEqualStructure(input_block, accumulated_block)); try @@ -86,13 +81,6 @@ void SquashingTransform::append(Block && input_block) const auto source_column = std::move(input_block.getByPosition(i).column); auto acc_column = std::move(accumulated_block.getByPosition(i).column); - // LOG_DEBUG(getLogger("SquashingTransform"), - // "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", - // i, source_column->getName(), - // acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), - // source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); - - auto mutable_column = IColumn::mutate(std::move(acc_column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index dba6fc40b11..0701e958877 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -20,11 +20,6 @@ namespace ErrorCodes void RestoreChunkInfosTransform::transform(Chunk & chunk) { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), - chunk_infos.size(), chunk_infos.debug(), - chunk.getNumRows()); - chunk.getChunkInfos().append(chunk_infos.clone()); } @@ -76,9 +71,6 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) void TokenInfo::setViewID(const String & id) { - LOG_DEBUG(getLogger("TokenInfo"), - "token: {}, stage: {}, view id: {}", - getToken(false), stage, id); chassert(stage == VIEW_ID); addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; @@ -146,8 +138,6 @@ void SetInitialTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - LOG_DEBUG(getLogger("SetInitialTokenTransform"), "has token_info {}", bool(token_info)); - if (!token_info) throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -208,7 +198,6 @@ void ResetTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - LOG_DEBUG(getLogger("ResetTokenTransform"), "token_info was {}", token_info->getToken(false)); token_info->reset(); } diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 73d41828bc0..04fabc9a3c6 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 531d264a25a..75228eb5c2d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,9 +17,6 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - // LOG_DEBUG(getLogger("SquashingChunksTransform"), - // "onConsume {}", chunk.getNumRows()); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); @@ -36,11 +33,6 @@ void SquashingChunksTransform::onConsume(Chunk chunk) cur_chunk.setChunkInfos(chunk.getChunkInfos()); cur_chunkinfos = {}; } - - // LOG_DEBUG(getLogger("SquashingChunksTransform"), - // "got result rows {}, size {}, columns {}, infos: {}/{}", - // cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), - // cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); } else { @@ -90,11 +82,6 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - // "transform rows {}, size {}, columns {}, infos: {}/{}", - // chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), - // chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -110,11 +97,6 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) squashed_chunk.setChunkInfos(chunk.getChunkInfos()); squashed_info = {}; } - - // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - // "got result rows {}, size {}, columns {}, infos: {}/{}", - // squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), - // squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index faf3267a759..1fdcd4c5b74 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -50,8 +50,6 @@ MergeTreeSink::MergeTreeSink( , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { - LOG_INFO(storage.log, "MergeTreeSink() called for {}.{}", - storage_.getStorageID().database_name, storage_.getStorageID().getTableName()); } void MergeTreeSink::onStart() @@ -68,10 +66,6 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk & chunk) { - LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", - num_blocks_processed, - chunk.getNumRows(), chunk.getNumColumns(), chunk.bytes()); - if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); @@ -81,8 +75,6 @@ void MergeTreeSink::consume(Chunk & chunk) auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); - LOG_INFO(storage.log, "consume() called part_blocks.count {}", part_blocks.size()); - using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -106,18 +98,7 @@ void MergeTreeSink::consume(Chunk & chunk) context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) - { block_dedup_token = token_info->getToken(); - - LOG_DEBUG(storage.log, - "dedup token from insert deduplication token in chunk: {}", - block_dedup_token); - } - else - { - LOG_DEBUG(storage.log, - "dedup token from hash is calculated"); - } } for (auto & current_block : part_blocks) @@ -162,13 +143,6 @@ void MergeTreeSink::consume(Chunk & chunk) else max_insert_delayed_streams_for_parallel_write = 0; - LOG_INFO(storage.log, "consume() called for {}.{} " - "streams {} + {} -> {}, " - "max {} support_parallel_write {}", - storage.getStorageID().database_name, storage.getStorageID().getTableName(), - streams, temp_part.streams.size(), streams + temp_part.streams.size(), - max_insert_delayed_streams_for_parallel_write, support_parallel_write); - /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); @@ -211,12 +185,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - LOG_INFO(storage.log, "finishDelayedChunk() called partitions count {}", delayed_chunk->partitions.size()); - for (auto & partition : delayed_chunk->partitions) { - LOG_INFO(storage.log, "finishDelayedChunk() part name {} dedup_token {}", partition.temp_part.part->name, partition.block_dedup_token); - ProfileEventsScope scoped_attach(&partition.part_counters); partition.temp_part.finalize(); @@ -234,14 +204,10 @@ void MergeTreeSink::finishDelayedChunk() auto * deduplication_log = storage.getDeduplicationLog(); - LOG_INFO(storage.log, "finishDelayedChunk() has dedup log {}", bool(deduplication_log)); - if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); - LOG_INFO(storage.log, "finishDelayedChunk() block_dedup_token={}, block_id={}", partition.block_dedup_token, block_id); - auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 09cdc6a78bc..c5799fab09f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -336,9 +336,6 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { - LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", - block.rows(), block.bytes(), data_part_storage->getPartDirectory()); - block.checkNumberOfRows(); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 8cb4095f1e6..cf3af59118e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -311,20 +311,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block block_dedup_token = token_info->getToken(); - - LOG_DEBUG(storage.log, - "dedup token from insert deduplication token in chunk: {}", - block_dedup_token); - } - else - { - LOG_DEBUG(storage.log, - "dedup token from hash is calculated"); - } } auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 17ecba2b4a5..d4f6621b4fc 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1415,11 +1415,6 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: rows {}, infos {} with {}, window column {}", - block.rows(), - chunk_infos.size(), chunk_infos.debug(), - window_view.timestamp_column_name); - window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -1464,9 +1459,6 @@ void StorageWindowView::writeIntoWindowView( lateness_bound = t_max_fired_watermark; } - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: lateness_bound {}, window_view.is_proctime {}", - lateness_bound, window_view.is_proctime); - if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { auto filter_function = makeASTFunction( @@ -1583,9 +1575,6 @@ void StorageWindowView::writeIntoWindowView( if (block_max_timestamp) window_view.updateMaxTimestamp(block_max_timestamp); - - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: block_max_timestamp {}", - block_max_timestamp); } UInt32 lateness_upper_bound = 0; From 4998c5888e6723f81627a14799ae0ade7676189b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 20:57:06 +0200 Subject: [PATCH 049/141] depricate update_insert_deduplication_token_in_dependent_materialized_views --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d6779a531ae..8ab66ba2a3e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,6 +626,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Depricated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ From 3345f27b645838b058243a91bd69c8383f812324 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 22:38:54 +0200 Subject: [PATCH 050/141] fix typo --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8ab66ba2a3e..27201cc6cf0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,7 +626,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Depricated.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ From 89234371438b94706fe903b9e55a30651bed0238 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 12 Jun 2024 19:36:23 +0200 Subject: [PATCH 051/141] add tests for cases from docs --- src/Core/Settings.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 +- ...08_deduplication_cases_from_docs.reference | 41 +++ .../03008_deduplication_cases_from_docs.sql | 331 ++++++++++++++++++ 5 files changed, 381 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 27201cc6cf0..1bfb5a1e18f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -624,7 +624,7 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ed44a20e397..b35b6266735 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -441,9 +442,7 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = false; - if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); + bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 1fdcd4c5b74..4e20eade589 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,5 +1,8 @@ +#include +#include #include #include +#include #include #include #include @@ -185,6 +188,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + const Settings & settings = context->getSettingsRef(); + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -204,7 +209,7 @@ void MergeTreeSink::finishDelayedChunk() auto * deduplication_log = storage.getDeduplicationLog(); - if (deduplication_log) + if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference new file mode 100644 index 00000000000..4893274c1cd --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference @@ -0,0 +1,41 @@ +Different materialized view insert into one underlayed table equal data. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +second attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Different insert operations generate the same data after transformation in underlied table of materialized view. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +second attempt +from dst 1 A all_1_1_0 +from dst 2 A all_2_2_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Indentical blocks in insertion with `insert_deduplication_token` +first attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +second attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +third attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +Indentical blocks in insertion +from dst 0 A all_1_1_0 +Indentical blocks after materialised view`s transformation +first attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 +second attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql new file mode 100644 index 00000000000..7927a6b1edf --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql @@ -0,0 +1,331 @@ +-- ######### +select 'Different materialized view insert into one underlayed table equal data.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; +DROP TABLE IF EXISTS mv_first; +DROP TABLE IF EXISTS mv_second; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE TABLE mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_first +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +CREATE MATERIALIZED VIEW mv_second +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_second; +DROP TABLE mv_first; +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (2, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion with `insert_deduplication_token`'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +select 'first attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'third attempt'; + +INSERT INTO dst SELECT + 1 AS key, + 'b' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks after materialised view`s transformation'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; From 63852d9b0015b47ec93e2b7755c14bb7b002fcbd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 12 Jun 2024 20:45:29 +0200 Subject: [PATCH 052/141] fix fast test 00633_materialized_view_and_too_many_parts_zookeeper --- .../00633_materialized_view_and_too_many_parts_zookeeper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 1fb219108da..8f7d19028b0 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" From a0a7f176126a8f9dc65d7ea8cb488e13feda6ccb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 17 Jun 2024 16:32:16 +0200 Subject: [PATCH 053/141] add tags to the new tests --- .../03008_deduplication_insert_several_blocks_nonreplicated.sh | 1 + .../03008_deduplication_insert_several_blocks_replicated.sh | 1 + ...08_deduplication_mv_generates_several_blocks_nonreplicated.sh | 1 + ...03008_deduplication_mv_generates_several_blocks_replicated.sh | 1 + ...3008_deduplication_several_mv_into_one_table_nonreplicated.sh | 1 + .../03008_deduplication_several_mv_into_one_table_replicated.sh | 1 + 6 files changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh index c758e2fb3de..49eb52b47fd 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh index 45b222b1fc4..53af06d4a6f 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh index 50cf2a3bb75..7d4f5240cd1 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh index 2b094e0309e..109d1674f3a 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh index 33da54b90f1..fe3d610a758 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh index 290d1f794b2..9adee6d53d4 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 90faa7b1eced87e0f2979e792d6569d1e2e005e8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 19 Jun 2024 21:44:37 +0100 Subject: [PATCH 054/141] impl --- src/Common/CgroupsMemoryUsageObserver.cpp | 178 +++++++++++----------- src/Common/CgroupsMemoryUsageObserver.h | 14 +- 2 files changed, 102 insertions(+), 90 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 8a4792f0a5a..20db6a64a31 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,3 +1,5 @@ +#include +#include #include #if defined(OS_LINUX) @@ -28,8 +30,6 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_OPEN_FILE; extern const int FILE_DOESNT_EXIST; extern const int INCORRECT_DATA; } @@ -107,6 +107,75 @@ void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmo namespace { +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t mem_usage = 0; + readIntText(mem_usage, buf); + return mem_usage; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); +} + +struct CgroupsV1Reader : ICgroupsReader +{ + CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") + { + } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + current_buf.rewind(); + stat_buf.rewind(); + + uint64_t mem_usage = 0; + /// memory.current contains a single number + readIntText(mem_usage, current_buf); + mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + return mem_usage; + } + +private: + std::mutex mutex; + ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + /// Caveats: /// - All of the logic in this file assumes that the current process is the only process in the /// containing cgroup (or more precisely: the only process with significant memory consumption). @@ -117,7 +186,7 @@ namespace /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such /// systems existed only for a short transition period. -std::optional getCgroupsV2FileName() +std::optional getCgroupsV2Path() { if (!cgroupsV2Enabled()) return {}; @@ -132,29 +201,30 @@ std::optional getCgroupsV2FileName() /// level, try again at the parent level as memory settings are inherited. while (current_cgroup != default_cgroups_mount.parent_path()) { - auto path = current_cgroup / "memory.current"; - if (std::filesystem::exists(path)) - return {path}; + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + return {current_cgroup}; current_cgroup = current_cgroup.parent_path(); } return {}; } -std::optional getCgroupsV1FileName() +std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; if (!std::filesystem::exists(path)) return {}; - return {path}; + return {default_cgroups_mount / "memory"}; } -std::pair getCgroupsFileName() +std::pair getCgroupsPath() { - auto v2_file_name = getCgroupsV2FileName(); + auto v2_file_name = getCgroupsV2Path(); if (v2_file_name.has_value()) return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - auto v1_file_name = getCgroupsV1FileName(); + auto v1_file_name = getCgroupsV1Path(); if (v1_file_name.has_value()) return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; @@ -166,87 +236,25 @@ std::pair getCgroupsFil CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) : log(log_) { - std::tie(file_name, version) = getCgroupsFileName(); + const auto [cgroup_path, version] = getCgroupsPath(); - LOG_INFO(log, "Will read the current memory usage from '{}' (cgroups version: {})", file_name, (version == CgroupsVersion::V1) ? "v1" : "v2"); + if (version == CgroupsVersion::V2) + cgroup_reader = std::make_unique(cgroup_path); + else + cgroup_reader = std::make_unique(cgroup_path); - fd = ::open(file_name.data(), O_RDONLY); - if (fd == -1) - ErrnoException::throwFromPath( - (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, - file_name, "Cannot open file '{}'", file_name); -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::~MemoryUsageFile() -{ - assert(fd != -1); - if (::close(fd) != 0) - { - try - { - ErrnoException::throwFromPath( - ErrorCodes::CANNOT_CLOSE_FILE, - file_name, "Cannot close file '{}'", file_name); - } - catch (const ErrnoException &) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } + LOG_INFO( + log, + "Will read the current memory usage from '{}' (cgroups version: {})", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2"); } uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const { - /// File read is probably not read is thread-safe, just to be sure - std::lock_guard lock(mutex); - - ReadBufferFromFileDescriptor buf(fd); - buf.rewind(); - - uint64_t mem_usage = 0; - - switch (version) - { - case CgroupsVersion::V1: - { - /// Format is - /// kernel 5 - /// rss 15 - /// [...] - std::string key; - bool found_rss = false; - - while (!buf.eof()) - { - readStringUntilWhitespace(key, buf); - if (key != "rss") - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - readIntText(mem_usage, buf); - found_rss = true; - break; - } - - if (!found_rss) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name); - - break; - } - case CgroupsVersion::V2: - { - readIntText(mem_usage, buf); - break; - } - } - + chassert(cgroup_reader); + const auto mem_usage = cgroup_reader->readMemoryUsage(); LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - return mem_usage; } diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index edc1cee750a..62bbabb9e86 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,11 +3,19 @@ #include #include +#include #include namespace DB { +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -66,14 +74,10 @@ private: { public: explicit MemoryUsageFile(LoggerPtr log_); - ~MemoryUsageFile(); uint64_t readMemoryUsage() const; private: LoggerPtr log; - mutable std::mutex mutex; - int fd TSA_GUARDED_BY(mutex) = -1; - CgroupsVersion version; - std::string file_name; + std::unique_ptr cgroup_reader; }; MemoryUsageFile memory_usage_file; From 1fa5212836219f89fe2ea8877d882daf0a928bce Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 17:49:51 +0100 Subject: [PATCH 055/141] remove MemoryUsageFile --- src/Common/CgroupsMemoryUsageObserver.cpp | 153 +++++++++++----------- src/Common/CgroupsMemoryUsageObserver.h | 15 +-- 2 files changed, 75 insertions(+), 93 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 20db6a64a31..23bfec4322b 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -24,84 +24,17 @@ #define STRINGIFY(x) STRINGIFY_HELPER(x) #endif +using namespace DB; namespace DB { namespace ErrorCodes { - extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; +extern const int FILE_DOESNT_EXIST; +extern const int INCORRECT_DATA; } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")) - , wait_time(wait_time_) - , memory_usage_file(log) -{ - LOG_INFO(log, "Initialized cgroups memory limit observer, wait time is {} sec", wait_time.count()); -} - -CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() -{ - stopThread(); -} - -void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) -{ - std::lock_guard limit_lock(limit_mutex); - - if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) - return; - - hard_limit = hard_limit_; - soft_limit = soft_limit_; - - on_hard_limit = [this, hard_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); - - /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are - /// really zero. Trying to avoid OOM ... - MemoryTracker::setRSS(hard_limit_, 0); - } - else - { - LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); - } - }; - - on_soft_limit = [this, soft_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); - -#if USE_JEMALLOC - LOG_INFO(log, "Purging jemalloc arenas"); - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -#endif - /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); - MemoryTracker::setRSS(memory_usage, 0); - - LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); - } - else - { - LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); - } - }; - - LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); -} - -void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) -{ - std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); - on_memory_amount_available_changed = on_memory_amount_available_changed_; } namespace @@ -233,8 +166,11 @@ std::pair getCgroupsPat } -CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) - : log(log_) +namespace DB +{ + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) { const auto [cgroup_path, version] = getCgroupsPath(); @@ -245,17 +181,73 @@ CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) LOG_INFO( log, - "Will read the current memory usage from '{}' (cgroups version: {})", + "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", cgroup_path, - (version == CgroupsVersion::V1) ? "v1" : "v2"); + (version == CgroupsVersion::V1) ? "v1" : "v2", + wait_time.count()); } -uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const +CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() { - chassert(cgroup_reader); - const auto mem_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - return mem_usage; + stopThread(); +} + +void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) +{ + std::lock_guard limit_lock(limit_mutex); + + if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) + return; + + hard_limit = hard_limit_; + soft_limit = soft_limit_; + + on_hard_limit = [this, hard_limit_](bool up) + { + if (up) + { + LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); + + /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are + /// really zero. Trying to avoid OOM ... + MemoryTracker::setRSS(hard_limit_, 0); + } + else + { + LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); + } + }; + + on_soft_limit = [this, soft_limit_](bool up) + { + if (up) + { + LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); + +# if USE_JEMALLOC + LOG_INFO(log, "Purging jemalloc arenas"); + mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); +# endif + /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + MemoryTracker::setRSS(memory_usage, 0); + + LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); + } + else + { + LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); + } + }; + + LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); +} + +void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) +{ + std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); + on_memory_amount_available_changed = on_memory_amount_available_changed_; } void CgroupsMemoryUsageObserver::startThread() @@ -309,7 +301,8 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard limit_lock(limit_mutex); if (soft_limit > 0 && hard_limit > 0) { - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index 62bbabb9e86..b848a2bff3c 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -69,23 +69,12 @@ private: uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use - /// Represents the cgroup virtual file that shows the memory consumption of the process's cgroup. - struct MemoryUsageFile - { - public: - explicit MemoryUsageFile(LoggerPtr log_); - uint64_t readMemoryUsage() const; - private: - LoggerPtr log; - std::unique_ptr cgroup_reader; - }; - - MemoryUsageFile memory_usage_file; - void stopThread(); void runThread(); + std::unique_ptr cgroup_reader; + std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; From 0f0e1cee63a8e2047f2092d7db4e81c5a3b53572 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 20:13:59 +0100 Subject: [PATCH 056/141] fix tidy --- src/Common/CgroupsMemoryUsageObserver.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 23bfec4322b..c37e3c74db9 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,5 +1,5 @@ +#include #include -#include #include #if defined(OS_LINUX) @@ -69,7 +69,7 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke struct CgroupsV1Reader : ICgroupsReader { - CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } uint64_t readMemoryUsage() override { @@ -85,7 +85,7 @@ private: struct CgroupsV2Reader : ICgroupsReader { - CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") { } From 7e0ed1b02cb55b0ce5788c9f1cf7c69c163ad14b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 21 Jun 2024 21:29:46 +0100 Subject: [PATCH 057/141] add test --- src/Common/CgroupsMemoryUsageObserver.cpp | 4 +-- .../test_memory_limit_observer/test.py | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index c37e3c74db9..33393a8b9c6 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -230,7 +230,7 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint # endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -302,7 +302,7 @@ void CgroupsMemoryUsageObserver::runThread() if (soft_limit > 0 && hard_limit > 0) { uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index fe3acd9a0cf..369b9241f07 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -35,7 +35,7 @@ def get_latest_mem_limit(): ).strip() ) return mem_limit - except Exception as e: + except Exception: time.sleep(1) raise Exception("Cannot get memory limit") @@ -51,3 +51,26 @@ def test_observe_memory_limit(started_cluster): if new_max_mem > original_max_mem: return raise Exception("the memory limit does not increase as expected") + + +def test_memory_usage_doesnt_include_page_cache_size(started_cluster): + # populate page cache with 10GB of data + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ) + + observer_refresh_period = int( + node1.query( + "select value from system.server_settings where name = 'cgroups_memory_usage_observer_wait_time'" + ).strip() + ) + time.sleep(observer_refresh_period + 1) + + max_mem_usage_from_cgroup = node1.query( + """ + SELECT max(toUInt64(replaceRegexpAll(message, 'Read current memory usage (\\d+) bytes.*', '\\1'))) AS max_mem + FROM system.text_log + WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' + """ + ).strip() + assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 From 750c902671bb64f02c7bf6918779561a06711de6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 21 Jun 2024 20:44:53 +0000 Subject: [PATCH 058/141] Automatic style fix --- tests/integration/test_memory_limit_observer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index 369b9241f07..d8c2a0e8ad7 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -73,4 +73,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 From 556c7deeff11e404630948adf3bbd171170dd3eb Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 24 Jun 2024 01:19:50 +0000 Subject: [PATCH 059/141] add drop option in lightweight delete on table with projections --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/InterpreterDeleteQuery.cpp | 91 ++++++++++++++----- ...61_lightweight_delete_projection.reference | 2 + .../03161_lightweight_delete_projection.sql | 11 ++- 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3e83092a77..d85edcdae1f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -608,6 +608,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ + M(String, lightweight_mutation_projection_mode, "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 69bc8c5d207..abad02f67c3 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -105,6 +105,7 @@ static const std::maplockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (table->supportsLightweightDelete()) + auto lightweightDelete = [&]() { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, @@ -105,17 +88,77 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); + }; + + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) + { + return lightweightDelete(); } else { - /// Currently just better exception for the case of a table with projection, - /// can act differently according to the setting. if (table->hasProjection()) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); + auto context = Context::createCopy(getContext()); + auto mode = Field(context->getSettingsRef().lightweight_mutation_projection_mode); + if (mode == "throw") + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + else if (mode == "drop") + { + std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); + + context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); + + /// Drop projections first so that lightweight delete can be performed. + for (const auto & projection : all_projections) + { + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " DROP PROJECTION IF EXISTS " + projection; + + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + InterpreterAlterQuery alter_interpreter(alter_ast, context); + alter_interpreter.execute(); + } + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); + } + + return lightweightDelete(); } throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index e69de29bb2d..15832d4cdfa 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -0,0 +1,2 @@ +8888 Alice 50 +1231 John 33 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index cd29fae8fd7..786f6a3cc34 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -5,7 +5,8 @@ CREATE TABLE users ( uid Int16, name String, age Int16, - projection p1 (select count(), age group by age) + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid; INSERT INTO users VALUES (1231, 'John', 33); @@ -13,3 +14,11 @@ INSERT INTO users VALUES (6666, 'Ksenia', 48); INSERT INTO users VALUES (8888, 'Alice', 50); DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; + +SELECT * FROM users; + +DROP TABLE users; From e20136ce25f85d463f4e3a033ac0a2a1d97431e5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jun 2024 17:29:41 +0100 Subject: [PATCH 060/141] fix test --- tests/integration/test_memory_limit_observer/test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index d8c2a0e8ad7..f19e119c019 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -54,10 +54,13 @@ def test_observe_memory_limit(started_cluster): def test_memory_usage_doesnt_include_page_cache_size(started_cluster): - # populate page cache with 10GB of data - node1.exec_in_container( - ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] - ) + try: + # populate page cache with 10GB of data; it might be killed by OOM killer but it is fine + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ) + except Exception: + pass observer_refresh_period = int( node1.query( From b1f87c578161578a03ee99ab87899cf4deb2c2ef Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jun 2024 21:04:04 +0000 Subject: [PATCH 061/141] return back settings and fix build --- src/Backups/BackupIO_AzureBlobStorage.cpp | 17 +++-- src/Core/Settings.h | 6 ++ .../AzureBlobStorageCommon.cpp | 68 ++++++++++++------- .../AzureBlobStorage/AzureBlobStorageCommon.h | 9 ++- .../ObjectStorages/ObjectStorageFactory.cpp | 3 +- .../ObjectStorage/Azure/Configuration.cpp | 12 +++- 6 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 596c308ca8a..0ee0160a969 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -41,18 +41,17 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , blob_path(blob_path_) { auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); object_storage = std::make_unique( "BackupReaderAzureBlobStorage", std::move(client_ptr), - AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + std::move(settings_ptr), connection_params.getContainer(), connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; @@ -122,8 +121,8 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const AzureBlobStorage::ConnectionParams & connection_params_, - bool allow_azure_native_copy, const String & blob_path_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, @@ -137,17 +136,17 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( connection_params.endpoint.container_already_exists = true; auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); + object_storage = std::make_unique( "BackupWriterAzureBlobStorage", std::move(client_ptr), - AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + std::move(settings_ptr), connection_params.getContainer(), connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } void BackupWriterAzureBlobStorage::copyFileFromDisk( diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ebdb6860986..9d3fedc3063 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -112,16 +112,22 @@ class IColumn; M(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ + M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \ + M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index c2e4bc0dc89..d9dfedadd48 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -197,7 +197,7 @@ void processURL(const String & url, const String & container_name, Endpoint & en return; } - size_t pos = url.find('?'); + auto pos = url.find('?'); /// If conneciton_url does not have '?', then its not SAS if (pos == std::string::npos) @@ -273,42 +273,60 @@ BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_di std::unique_ptr getRequestSettings(const Settings & query_settings) { - auto settings_ptr = std::make_unique(); + auto settings = std::make_unique(); - settings_ptr->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = query_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(query_settings.azure_list_object_keys_size); + settings->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings->max_single_download_retries = query_settings.azure_max_single_read_retries; + settings->list_object_keys_size = query_settings.azure_list_object_keys_size; + settings->min_upload_part_size = query_settings.azure_min_upload_part_size; + settings->max_upload_part_size = query_settings.azure_max_upload_part_size; + settings->max_single_part_copy_size = query_settings.azure_max_single_part_copy_size; + settings->max_blocks_in_multipart_upload = query_settings.azure_max_blocks_in_multipart_upload; + settings->max_unexpected_write_error_retries = query_settings.azure_max_unexpected_write_error_retries; + settings->max_inflight_parts_for_one_file = query_settings.azure_max_inflight_parts_for_one_file; + settings->strict_upload_part_size = query_settings.azure_strict_upload_part_size; + settings->upload_part_size_multiply_factor = query_settings.azure_upload_part_size_multiply_factor; + settings->upload_part_size_multiply_parts_count_threshold = query_settings.azure_upload_part_size_multiply_parts_count_threshold; + settings->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; - settings_ptr->sdk_max_retries = query_settings.azure_sdk_max_retries; - settings_ptr->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; - settings_ptr->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + return settings; +} - return settings_ptr; +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy) +{ + auto settings = getRequestSettings(query_settings); + settings->use_native_copy = use_native_copy; + return settings; } std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { auto settings = std::make_unique(); + const auto & settings_ref = context->getSettingsRef(); - settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); - settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); - settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); - settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); - settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); - settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); - settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); - settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); - settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); - settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); - settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); - settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); - settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); - settings->sdk_max_retries = config.getUInt(config_prefix + ".max_tries", 10); - settings->sdk_retry_initial_backoff_ms = config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10); - settings->sdk_retry_max_backoff_ms = config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", settings_ref.azure_max_single_part_upload_size); + settings->max_single_read_retries = config.getUInt64(config_prefix + ".max_single_read_retries", settings_ref.azure_max_single_read_retries); + settings->max_single_download_retries = config.getUInt64(config_prefix + ".max_single_download_retries", settings_ref.azure_max_single_read_retries); + settings->list_object_keys_size = config.getUInt64(config_prefix + ".list_object_keys_size", settings_ref.azure_list_object_keys_size); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", settings_ref.azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", settings_ref.azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", settings_ref.azure_max_single_part_copy_size); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", settings_ref.azure_max_blocks_in_multipart_upload); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", settings_ref.azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", settings_ref.azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", settings_ref.azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", settings_ref.azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", settings_ref.azure_upload_part_size_multiply_parts_count_threshold); + + settings->sdk_max_retries = config.getUInt64(config_prefix + ".max_tries", settings_ref.azure_sdk_max_retries); + settings->sdk_retry_initial_backoff_ms = config.getUInt64(config_prefix + ".retry_initial_backoff_ms", settings_ref.azure_sdk_retry_initial_backoff_ms); + settings->sdk_retry_max_backoff_ms = config.getUInt64(config_prefix + ".retry_max_backoff_ms", settings_ref.azure_sdk_retry_max_backoff_ms); if (config.has(config_prefix + ".curl_ip_resolve")) { diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 5f9f280ad4a..19ba48ea225 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -32,22 +32,23 @@ struct RequestSettings RequestSettings() = default; size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; + size_t min_bytes_for_seek = 1024 * 1024; size_t max_single_read_retries = 3; size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; + size_t list_object_keys_size = 1000; size_t min_upload_part_size = 16 * 1024 * 1024; size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; size_t max_unexpected_write_error_retries = 4; size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; size_t strict_upload_part_size = 0; size_t upload_part_size_multiply_factor = 2; size_t upload_part_size_multiply_parts_count_threshold = 500; size_t sdk_max_retries = 10; size_t sdk_retry_initial_backoff_ms = 10; size_t sdk_retry_max_backoff_ms = 1000; + bool use_native_copy = false; using CurlOptions = Azure::Core::Http::CurlTransportOptions; CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; @@ -125,7 +126,9 @@ std::unique_ptr getContainerClient(const ConnectionParams & par BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy); std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 3f32b4b410e..092277aca50 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -332,8 +332,9 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) ObjectStorageType::Azure, config, config_prefix, name, AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix, - endpoint.getEndpointWithoutContainer()); + params.endpoint.getEndpointWithoutContainer()); }; + factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); } diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index e4b3d61f659..595d4da3609 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -73,10 +73,16 @@ StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings( ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); - auto client = createClient(is_readonly, /* attempt_to_create_container */true); - auto settings = createSettings(context); + + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(connection_params, is_readonly); + return std::make_unique( - "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); + "AzureBlobStorage", + connection_params.createForContainer(), + std::move(settings), + connection_params.getContainer(), + connection_params.getConnectionURL()); } static AzureBlobStorage::ConnectionParams getConnectionParams( From 51f300356e0c0df9c7d001ffe0b7967c7d9f438e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jun 2024 23:57:33 +0000 Subject: [PATCH 062/141] fix style --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d3fedc3063..067e46226ea 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -126,8 +126,8 @@ class IColumn; M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ From 615cd96c6e6893973b14cc0190e510894b747b22 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 25 Jun 2024 12:48:43 +0100 Subject: [PATCH 063/141] fix test --- tests/integration/test_memory_limit_observer/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index f19e119c019..2840c830396 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -55,9 +55,9 @@ def test_observe_memory_limit(started_cluster): def test_memory_usage_doesnt_include_page_cache_size(started_cluster): try: - # populate page cache with 10GB of data; it might be killed by OOM killer but it is fine + # populate page cache with 4GB of data; it might be killed by OOM killer but it is fine node1.exec_in_container( - ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=4K"] ) except Exception: pass @@ -76,4 +76,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2**30 + assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 From c30ecee10c8cba5d245916d458ddd60345ea359c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jun 2024 12:40:55 +0000 Subject: [PATCH 064/141] remove unused code --- src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 2 +- src/Storages/ObjectStorage/Azure/Configuration.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index baad3bdf223..2c7ce5e18dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,5 +1,4 @@ #pragma once -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -9,6 +8,7 @@ #include #include #include +#include namespace Poco { diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 272d155e337..4e6bfbc0745 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -57,9 +57,6 @@ protected: std::string blob_path; std::vector blobs_paths; AzureBlobStorage::ConnectionParams connection_params; - - // AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); - // AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); }; } From b06eac085bddc3f0ed6a2f5d2ac0a4ddcdd8259c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jun 2024 20:01:17 +0200 Subject: [PATCH 065/141] work with review --- src/Common/CollectionOfDerived.h | 21 +- src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 8 +- src/Interpreters/InterpreterCreateQuery.cpp | 6 +- src/Interpreters/InterpreterExplainQuery.cpp | 8 +- src/Interpreters/SystemLog.cpp | 8 +- .../DeduplicationTokenTransforms.cpp | 56 +- .../Transforms/DeduplicationTokenTransforms.h | 22 +- .../Transforms/buildPushingToViewsChain.cpp | 22 +- src/Storages/Distributed/DistributedSink.cpp | 16 +- src/Storages/FileLog/StorageFileLog.cpp | 9 +- src/Storages/HDFS/StorageHDFS.cpp | 1207 ----------------- src/Storages/Kafka/StorageKafka.cpp | 8 +- src/Storages/MaterializedView/RefreshTask.cpp | 8 +- src/Storages/MergeTree/MergeTreeSink.cpp | 24 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 6 - src/Storages/NATS/StorageNATS.cpp | 8 +- .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 +- src/Storages/S3Queue/StorageS3Queue.cpp | 8 +- src/Storages/StorageBuffer.cpp | 8 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/WindowView/StorageWindowView.cpp | 17 +- 24 files changed, 220 insertions(+), 1284 deletions(-) delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 60a91e593f9..97c0c3fbc06 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -2,6 +2,8 @@ #include +#include + #include #include #include @@ -12,6 +14,16 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/* This is a collections of objects derived from ItemBase. +* Collection contains no more than one instance for each derived type. +* The derived type is used to access the instance. +*/ + template class CollectionOfDerivedItems { @@ -67,15 +79,16 @@ public: { Self result; result.records.reserve(records.size()); - for (const auto & rec: records) + for (const auto & rec : records) result.records.emplace_back(rec.type_idx, rec.ptr->clone()); return result; } void append(Self && other) { + auto middle_idx = records.size(); std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::sort(records.begin(), records.end()); + std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); chassert(isUniqTypes()); } @@ -143,7 +156,9 @@ private: return; } - chassert(it->type_idx != type_idx); + if (it->type_idx == type_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); + records.emplace(it, type_idx, item); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a272456470a..c400873a47c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -632,7 +632,6 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -948,6 +947,7 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 94c024ba786..dd1166a9228 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,13 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns, false, false, false); + InterpreterInsertQuery interpreter( + query, + query_context, + query_context->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_insert */ false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ef222a6842f..dbbdb546260 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1750,9 +1750,9 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns, - false, - false, - false).execute(); + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index b837490dad9..8392f0541f1 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -524,7 +524,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext(), false, false, false, false); + InterpreterInsertQuery insert( + ast.getExplainedQuery(), + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 8f97b6ea263..8d4882372ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -537,7 +537,13 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter(query_ptr, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + query_ptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 0701e958877..23e32415f6a 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -26,10 +26,16 @@ void RestoreChunkInfosTransform::transform(Chunk & chunk) namespace DeduplicationToken { -String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const +String TokenInfo::getToken() const { - chassert(stage == VIEW_ID || !enable_assert); + if (stage != VIEW_ID) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + return getTokenImpl(); +} + +String TokenInfo::getTokenImpl() const +{ String result; result.reserve(getTotalSize()); @@ -43,13 +49,20 @@ String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const return result; } -void DB::DeduplicationToken::TokenInfo::addPieceToInitialToken(String part) +String TokenInfo::debugToken() const { - chassert(stage == INITIAL); + return getTokenImpl(); +} + + +void TokenInfo::addPieceToInitialToken(String part) +{ + if (stage != INITIAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(std::move(part)); } -void DB::DeduplicationToken::TokenInfo::closeInitialToken() +void TokenInfo::closeInitialToken() { chassert(stage == INITIAL); stage = VIEW_ID; @@ -57,29 +70,37 @@ void DB::DeduplicationToken::TokenInfo::closeInitialToken() void TokenInfo::setUserToken(const String & token) { - chassert(stage == INITIAL); + if (stage != INITIAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(fmt::format("user-token-{}", token)); stage = SOURCE_BLOCK_NUMBER; } -void TokenInfo::setSourceBlockNumber(size_t sbn) +void TokenInfo::setSourceBlockNumber(size_t block_number) { - chassert(stage == SOURCE_BLOCK_NUMBER); - addTokenPart(fmt::format("source-number-{}", sbn)); + if (stage != SOURCE_BLOCK_NUMBER) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("source-number-{}", block_number)); stage = VIEW_ID; } void TokenInfo::setViewID(const String & id) { - chassert(stage == VIEW_ID); + if (stage != VIEW_ID) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setViewBlockNumber(size_t mvbn) +void TokenInfo::setViewBlockNumber(size_t block_number) { - chassert(stage == VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format("view-block-{}", mvbn)); + if (stage != VIEW_BLOCK_NUMBER) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-block-{}", block_number)); stage = VIEW_ID; } @@ -91,8 +112,7 @@ void TokenInfo::reset() void TokenInfo::addTokenPart(String part) { - if (!part.empty()) - parts.push_back(std::move(part)); + parts.push_back(std::move(part)); } size_t TokenInfo::getTotalSize() const @@ -107,6 +127,7 @@ size_t TokenInfo::getTotalSize() const return size + parts.size() - 1; } +#ifdef ABORT_ON_LOGICAL_ERROR void CheckTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -116,12 +137,13 @@ void CheckTokenTransform::transform(Chunk & chunk) if (!must_be_present) { - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, no token required, token {}", debug, token_info->getToken(false)); + LOG_DEBUG(log, "{}, no token required, token {}", debug, token_info->debugToken()); return; } - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); + LOG_DEBUG(log, "{}, token: {}", debug, token_info->debugToken()); } +#endif String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) { diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 27bb21dfad1..ebbbb0f7590 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -4,6 +4,7 @@ #include #include +#include "Common/Logger.h" namespace DB @@ -33,7 +34,8 @@ namespace DeduplicationToken TokenInfo() = default; TokenInfo(const TokenInfo & other) = default; - String getToken(bool enable_assert = true) const; + String getToken() const; + String debugToken() const; bool empty() const { return parts.empty(); } bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } @@ -41,15 +43,25 @@ namespace DeduplicationToken void addPieceToInitialToken(String part); void closeInitialToken(); void setUserToken(const String & token); - void setSourceBlockNumber(size_t sbn); + void setSourceBlockNumber(size_t block_number); void setViewID(const String & id); - void setViewBlockNumber(size_t mvbn); + void setViewBlockNumber(size_t block_number); void reset(); private: + String getTokenImpl() const; + void addTokenPart(String part); size_t getTotalSize() const; + /* Token has to be prepared in a particular order. BuildingStage ensure that token is expanded according the foloving order. + * Firstly token has expand with information about the souce. + * INITIAL -- in that stage token is expanded with several hash sums or with the user defined deduplication token. + * SOURCE_BLOCK_NUMBER -- when token is expand with user defined deduplication token, after token has to be expanded with source block number. + * After that token is considered as prepared for usage, hovewer it could be expanded with following details: + * VIEW_ID -- in that stage token is expanded with view id, token could not be used until nex stage is passed. + * VIEW_BLOCK_NUMBER - in that stage token is expanded with view block number. + */ enum BuildingStage { INITIAL, @@ -63,6 +75,8 @@ namespace DeduplicationToken }; +#ifdef ABORT_ON_LOGICAL_ERROR + /// use that class only with debug builds in CI for introspection class CheckTokenTransform : public ISimpleTransform { public: @@ -79,8 +93,10 @@ namespace DeduplicationToken private: String debug; + LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); bool must_be_present = false; }; +#endif class AddTokenInfoTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index aba28391879..713ab25600f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -27,6 +27,7 @@ #include #include #include +#include "base/defines.h" #include #include @@ -225,7 +226,6 @@ std::optional generateViewChain( if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); - insert_context->setSetting("insert_deduplication_token", Field{""}); } // Processing of blocks for MVs is done block by block, and there will @@ -333,7 +333,13 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + nullptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -350,7 +356,9 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); +#endif auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -394,7 +402,9 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); +#endif auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); @@ -402,7 +412,9 @@ std::optional generateViewChain( out.addSource(std::move(executing_inner_query)); +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); +#endif } return out; @@ -459,8 +471,6 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - LOG_DEBUG(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); - try { auto out = generateViewChain( @@ -569,7 +579,7 @@ Chain buildPushingToViewsChain( } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) @@ -586,7 +596,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) { const auto & context = view.context; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 2e3096683d0..8791668cd89 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -420,7 +420,13 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized, false, false, false); + InterpreterInsertQuery interp( + copy_query_ast, + job.local_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +721,13 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized, false, false, false); + InterpreterInsertQuery interp( + query_ast, + context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b86845d48e0..0f9bd8b6ff9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -743,10 +743,11 @@ bool StorageFileLog::streamToViews() InterpreterInsertQuery interpreter( insert, new_context, - false, - true, - true, - false); + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); + auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index 1ca7c1f71d0..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1207 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_DETECT_FORMAT; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - if (format_name != "auto") - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); - else - columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; - - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - std::optional format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(std::move(format_)) - , compression_method(compression_method_) - { - } - - Data next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns, format}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - { - if (format) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because all files are empty. " - "You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); - } - return {nullptr, std::nullopt, format}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns, format}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_index > 0 && current_index <= paths_with_info.size()); - auto path_with_info = paths_with_info[current_index - 1]; - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - auto context = getContext(); - - if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(context); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - if (format) - { - auto cache_key = getKeyForSchemaCache(url, *format, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - std::optional format; - const String & compression_method; - size_t current_index = 0; - }; -} - -std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." - " You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." - " You can specify the format manually"); - } - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - if (format) - return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); -} - -std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); -} - -ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -HDFSSource::~HDFSSource() = default; - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - const ContextPtr & context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const String & uri, size_t sequence_number) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - if (context->getSettingsRef().hdfs_truncate_on_insert || hdfsExists(fs.get(), path_from_uri.c_str())) - return std::nullopt; - - if (context->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uri.find_first_of('.', uri.find_last_of('/')); - String new_uri; - do - { - new_uri = uri.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : uri.substr(pos)); - ++sequence_number; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - - return new_uri; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } -} - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(context, path, 1)) - path = *new_path; - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromHDFS( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - context_, - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.front(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - String path = current_uri.substr(current_uri.find('/', current_uri.find("//") + 2)); - if (PartitionedSink::replaceWildcards(path, "").find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - if (auto new_uri = checkAndGetNewFileOnInsertIfNeeded(context_, uris.front(), uris.size())) - { - uris.push_back(*new_uri); - current_uri = *new_uri; - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index f92c6ae67c9..809401bb279 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,7 +1099,13 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + kafka_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 57d75b969c3..ff5214a5e51 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,13 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(); - if (storage.getDeduplicationLog()) - { - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); - if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}, user dedup token {}", - storage.getStorageID().getNameForLogs(), - context->getSettingsRef().insert_deduplication_token.value); - - if (token_info->tokenInitialized()) - block_dedup_token = token_info->getToken(); - } + String block_dedup_token; + if (token_info->tokenInitialized()) + block_dedup_token = token_info->getToken(); for (auto & current_block : part_blocks) { @@ -161,7 +152,6 @@ void MergeTreeSink::consume(Chunk & chunk) partitions = DelayedPartitions{}; } - /// TODO block_dedup_token partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition { .temp_part = std::move(temp_part), diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index cf3af59118e..b15b80864e5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,12 +304,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {} user dedup token {}", - storage.getStorageID().getNameForLogs(), - context->getSettingsRef().insert_deduplication_token.value); - if (token_info->tokenInitialized()) block_dedup_token = token_info->getToken(); } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 9c6d70f2c5b..8f0e2d76473 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,13 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + nats_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 57c8d24ccc2..44479bd01e2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,13 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ true, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 4a5a621aa43..f632e553a0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,13 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 5bf5ab9b2f5..f3d2aff68c8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,13 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true, false); + InterpreterInsertQuery interpreter( + insert, + rabbitmq_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index b9aa7881bdd..d1607843364 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -477,7 +477,13 @@ bool StorageS3Queue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + s3queue_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 10eecd63e3c..b064fba223a 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1020,7 +1020,13 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter(insert, insert_context, allow_materialized, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1c129e34170..67586985ce8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,7 +1050,13 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context, false, false, false, false); + InterpreterInsertQuery interpreter( + new_query, + query_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 65aa06f8506..b1dd5f8a114 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -690,7 +690,13 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext(), false, false, false, false); + InterpreterInsertQuery interpreter( + insert, + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1548,11 +1554,12 @@ void StorageWindowView::writeIntoWindowView( return std::make_shared(stream_header); }); +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", true, stream_header); }); - +#endif builder.addSimpleTransform([&](const Block & current_header) { @@ -1593,10 +1600,12 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); }); +#endif auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( @@ -1617,10 +1626,12 @@ void StorageWindowView::writeIntoWindowView( builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared("StorageWindowView: Before out", true, stream_header); }); +#endif builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) From a38f8d6c459ed597ce60de0108ad79dac6044b37 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jun 2024 21:03:53 +0200 Subject: [PATCH 066/141] rework TokenInfo::BuildingStage --- .../DeduplicationTokenTransforms.cpp | 61 +++++++++++-------- .../Transforms/DeduplicationTokenTransforms.h | 54 ++++++++++------ src/Storages/MergeTree/MergeTreeSink.cpp | 10 +-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 10 +-- 4 files changed, 82 insertions(+), 53 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 23e32415f6a..10c21249ebc 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -28,8 +28,8 @@ namespace DeduplicationToken String TokenInfo::getToken() const { - if (stage != VIEW_ID) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + if (!isDefined()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); return getTokenImpl(); } @@ -54,59 +54,70 @@ String TokenInfo::debugToken() const return getTokenImpl(); } - -void TokenInfo::addPieceToInitialToken(String part) +void TokenInfo::addChunkHash(String part) { - if (stage != INITIAL) + if (stage == UNDEFINED) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(std::move(part)); } -void TokenInfo::closeInitialToken() +void TokenInfo::defineSourceWithChunkHashes() { - chassert(stage == INITIAL); - stage = VIEW_ID; + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + stage = DEFINED; } void TokenInfo::setUserToken(const String & token) { - if (stage != INITIAL) + if (stage == UNDEFINED) + stage = DEFINE_SOURCE_USER_TOKEN; + + if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("user-token-{}", token)); - stage = SOURCE_BLOCK_NUMBER; } -void TokenInfo::setSourceBlockNumber(size_t block_number) +void TokenInfo::defineSourceWithUserToken(size_t block_number) { - if (stage != SOURCE_BLOCK_NUMBER) + if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("source-number-{}", block_number)); - stage = VIEW_ID; + + stage = DEFINED; } void TokenInfo::setViewID(const String & id) { - if (stage != VIEW_ID) + if (stage == DEFINED) + stage = DEFINE_VIEW; + + if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("view-id-{}", id)); - stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setViewBlockNumber(size_t block_number) +void TokenInfo::defineViewID(size_t block_number) { - if (stage != VIEW_BLOCK_NUMBER) + if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("view-block-{}", block_number)); - stage = VIEW_ID; + + stage = DEFINED; } void TokenInfo::reset() { - stage = INITIAL; + stage = UNDEFINED; parts.clear(); } @@ -145,7 +156,7 @@ void CheckTokenTransform::transform(Chunk & chunk) } #endif -String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) +String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -165,11 +176,11 @@ void SetInitialTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); - if (token_info->tokenInitialized()) + if (token_info->isDefined()) return; - token_info->addPieceToInitialToken(getInitialToken(chunk)); - token_info->closeInitialToken(); + token_info->addChunkHash(getChunkHash(chunk)); + token_info->defineSourceWithChunkHashes(); } void SetUserTokenTransform::transform(Chunk & chunk) @@ -189,7 +200,7 @@ void SetSourceBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceBlockNumber(block_number++); + token_info->defineSourceWithUserToken(block_number++); } void SetViewIDTransform::transform(Chunk & chunk) @@ -209,7 +220,7 @@ void SetViewBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); + token_info->defineViewID(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index ebbbb0f7590..416d4bb5f62 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -38,14 +38,18 @@ namespace DeduplicationToken String debugToken() const; bool empty() const { return parts.empty(); } - bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } - void addPieceToInitialToken(String part); - void closeInitialToken(); + bool isDefined() const { return stage == DEFINED; } + + void addChunkHash(String part); + void defineSourceWithChunkHashes(); + void setUserToken(const String & token); - void setSourceBlockNumber(size_t block_number); + void defineSourceWithUserToken(size_t block_number); + void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); + void defineViewID(size_t block_number); + void reset(); private: @@ -54,23 +58,37 @@ namespace DeduplicationToken void addTokenPart(String part); size_t getTotalSize() const; - /* Token has to be prepared in a particular order. BuildingStage ensure that token is expanded according the foloving order. - * Firstly token has expand with information about the souce. - * INITIAL -- in that stage token is expanded with several hash sums or with the user defined deduplication token. - * SOURCE_BLOCK_NUMBER -- when token is expand with user defined deduplication token, after token has to be expanded with source block number. - * After that token is considered as prepared for usage, hovewer it could be expanded with following details: - * VIEW_ID -- in that stage token is expanded with view id, token could not be used until nex stage is passed. - * VIEW_BLOCK_NUMBER - in that stage token is expanded with view block number. + /* Token has to be prepared in a particular order. + * BuildingStage ensures that token is expanded according the foloving order. + * Firstly token is expanded with information about the source. + * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken + * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken + * + * After token is define it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * + * transition // method + * DEFINED -> DEFINE_VIEW // setViewID + * DEFINE_VIEW -> DEFINED // defineViewID */ + enum BuildingStage { - INITIAL, - SOURCE_BLOCK_NUMBER, - VIEW_ID, - VIEW_BLOCK_NUMBER, + UNDEFINED, + DEFINE_SOURCE_WITH_HASHES, + DEFINE_SOURCE_USER_TOKEN, + DEFINE_VIEW, + DEFINED, }; - BuildingStage stage = INITIAL; + BuildingStage stage = UNDEFINED; std::vector parts; }; @@ -128,7 +146,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; - static String getInitialToken(const Chunk & chunk); + static String getChunkHash(const Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7cb89fa7239..532fa718efd 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -92,7 +92,7 @@ void MergeTreeSink::consume(Chunk & chunk) storage.getStorageID().getNameForLogs()); String block_dedup_token; - if (token_info->tokenInitialized()) + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); for (auto & current_block : part_blocks) @@ -119,10 +119,10 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -161,9 +161,9 @@ void MergeTreeSink::consume(Chunk & chunk) }); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { - token_info->closeInitialToken(); + token_info->defineSourceWithChunkHashes(); } finishDelayedChunk(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b15b80864e5..228b5c596ab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,7 +304,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info->tokenInitialized()) + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); } @@ -371,10 +371,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); } } @@ -421,9 +421,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { - token_info->closeInitialToken(); + token_info->defineSourceWithChunkHashes(); } finishDelayedChunk(zookeeper); From 8efa045a97517bbcf28b80c178e9df84d92973b2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 00:09:55 +0200 Subject: [PATCH 067/141] fix resolving conflicts with squashing --- src/Interpreters/Squashing.cpp | 86 ++++++++++--------- src/Interpreters/Squashing.h | 29 ++++--- .../Transforms/ApplySquashingTransform.h | 16 +--- .../Transforms/PlanSquashingTransform.cpp | 21 ++--- .../Transforms/PlanSquashingTransform.h | 2 +- .../Transforms/SquashingTransform.cpp | 22 ++--- .../Transforms/SquashingTransform.h | 1 - src/Server/TCPHandler.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- 9 files changed, 85 insertions(+), 96 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index bf363a21400..dbf16452287 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,22 +10,24 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(header_) { } Chunk Squashing::flush() { - decltype(chunks_to_merge_vec) to_convert; - to_convert.swap(chunks_to_merge_vec); - return convertToChunk(std::move(to_convert)); + if (!accumulated) + return {}; + + return convertToChunk(accumulated.extract()); } Chunk Squashing::squash(Chunk && input_chunk) { - if (input_chunk.getChunkInfos().empty()) + if (!input_chunk) return Chunk(); auto squash_info = input_chunk.getChunkInfos().extract(); @@ -42,48 +44,39 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + if (isEnoughSize(input_chunk)) { /// If no accumulated data, return just read block. - if (chunks_to_merge_vec.empty()) + if (!accumulated) { - chunks_to_merge_vec.push_back(std::move(input_chunk)); - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - return res_chunk; + accumulated.add(std::move(input_chunk)); + return convertToChunk(accumulated.extract()); } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + accumulated.add(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - changeCurrentSize(0, 0); - chunks_to_merge_vec.clear(); - return res_chunk; + return convertToChunk(accumulated.extract()); } + return {}; } @@ -95,7 +88,8 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - auto aggr_chunk = Chunk(); + // It is imortant that chunk is not empty, it has to have colums even if they are emty + auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); return aggr_chunk; @@ -136,22 +130,34 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl return accumulated_chunk; } -void Squashing::expandCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows += rows; - accumulated_size.bytes += bytes; -} - -void Squashing::changeCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows = rows; - accumulated_size.bytes = bytes; -} - bool Squashing::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +bool Squashing::isEnoughSize() const +{ + return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); +}; + +bool Squashing::isEnoughSize(const Chunk & chunk) const +{ + return isEnoughSize(chunk.getNumRows(), chunk.bytes()); +} + +void Squashing::CurrentSize::add(Chunk && chunk) +{ + rows += chunk.getNumRows(); + bytes += chunk.bytes(); + chunks.push_back(std::move(chunk)); +} + +std::vector Squashing::CurrentSize::extract() +{ + auto result = std::move(chunks); + *this = {}; + return result; +} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 08535119241..830d621b43b 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -38,36 +38,39 @@ public: class Squashing { public: - explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); static Chunk squash(Chunk && input_chunk); Chunk flush(); - bool isDataLeft() - { - return !chunks_to_merge_vec.empty(); - } - private: - struct CurrentSize + class CurrentSize { + std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; + + public: + explicit operator bool () const { return !chunks.empty(); } + size_t getRows() const { return rows; } + size_t getBytes() const { return bytes; } + void add(Chunk && chunk); + std::vector extract(); }; - std::vector chunks_to_merge_vec = {}; - size_t min_block_size_rows; - size_t min_block_size_bytes; + const size_t min_block_size_rows; + const size_t min_block_size_bytes; + const Block header; - CurrentSize accumulated_size; + CurrentSize accumulated; static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); - void expandCurrentSize(size_t rows, size_t bytes); - void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize() const; bool isEnoughSize(size_t rows, size_t bytes) const; + bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 51bc69f6b9b..94b890198d4 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -27,18 +27,12 @@ public: } ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } } protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) - cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + cur_chunk = DB::Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override @@ -48,16 +42,10 @@ protected: res.is_done = true; return res; } - void onFinish() override - { - auto chunk = DB::Squashing::squash({}); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); - } private: Squashing squashing; Chunk cur_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index d1d3fcd3205..6a8cd10027e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,4 +1,6 @@ #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include namespace DB @@ -10,22 +12,22 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header) - , squashing(min_block_size_rows, min_block_size_bytes) + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header_, header_) + , squashing(header_, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - Chunk result = squashing.add(std::move(chunk)); - if (!result.getChunkInfos().empty()) - squashed_chunk = std::move(result); + LOG_DEBUG(getLogger("PlanSquashingTransform"), "consume {}", chunk.getNumRows()); + + squashed_chunk = squashing.add(std::move(chunk)); } Chunk PlanSquashingTransform::generate() { - if (squashed_chunk.getChunkInfos().empty()) + if (!squashed_chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -35,12 +37,11 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return !squashed_chunk.getChunkInfos().empty(); + return bool(squashed_chunk); } Chunk PlanSquashingTransform::getRemaining() { - Chunk current_chunk = squashing.flush(); - return current_chunk; + return squashing.flush(); } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 4ad2ec2d089..1f83e62284d 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1e3798e89c8..e457a262681 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,15 +12,13 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (!planned_chunk.getChunkInfos().empty()) - cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); + cur_chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -33,10 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - Chunk chunk = squashing.flush(); - if (!chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(chunk)); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + finish_chunk = DB::Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -49,6 +44,7 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); + if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -59,7 +55,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -67,18 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (!planned_chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(planned_chunk)); + chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = squashing.flush(); - if (!chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(chunk)); + chunk = DB::Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 9d1591d0bcd..c5b727ac6ec 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -26,7 +26,6 @@ protected: private: Squashing squashing; Chunk cur_chunk; - Chunk::ChunkInfoCollection cur_chunkinfos; Chunk finish_chunk; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index bc1487acefa..22d2c4eeebc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -884,7 +884,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); Block header = state.input_header; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a8334f22272..0beeca0d542 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; From bc31f851273e719658ef70371f6b684f4e1c0e69 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 00:29:11 +0200 Subject: [PATCH 068/141] fix style --- src/Interpreters/Squashing.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index dbf16452287..971f0102148 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -88,7 +88,7 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have colums even if they are emty + // It is imortant that chunk is not empty, it has to have columns even if they are empty auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a3c5a7ed3ed..6ce6f5e454e 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolums in unknown required columns. + /// Check for dynamic subcolumns in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) From d485606e9420eec3e617e5ec49a1c1ac16478a85 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 02:09:45 +0200 Subject: [PATCH 069/141] fix header in async insert and projections --- src/Interpreters/Squashing.cpp | 22 ++++++++++++---------- src/Interpreters/Squashing.h | 5 ++++- src/Server/TCPHandler.cpp | 22 ++++++++-------------- src/Storages/MergeTree/MutateTask.cpp | 18 +++++++----------- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 971f0102148..2b808e25fbb 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "base/defines.h" namespace DB @@ -22,7 +23,9 @@ Chunk Squashing::flush() if (!accumulated) return {}; - return convertToChunk(accumulated.extract()); + auto result = convertToChunk(accumulated.extract()); + chassert(result); + return result; } Chunk Squashing::squash(Chunk && input_chunk) @@ -73,9 +76,7 @@ Chunk Squashing::add(Chunk && input_chunk) /// If accumulated data is big enough, we send it if (isEnoughSize()) - { return convertToChunk(accumulated.extract()); - } return {}; } @@ -91,7 +92,7 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const // It is imortant that chunk is not empty, it has to have columns even if they are empty auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); - + chassert(aggr_chunk); return aggr_chunk; } @@ -118,16 +119,17 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } - Chunk accumulated_chunk; - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - accumulated_chunk.setChunkInfos(infos); - accumulated_chunk.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); - return accumulated_chunk; + Chunk result; + result.setColumns(std::move(mutable_columns), rows); + result.setChunkInfos(infos); + result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); + + chassert(result); + return result; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 830d621b43b..64a9768a71f 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -45,6 +45,9 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); + void setHeader(Block header_) { header = std::move(header_); } + const Block & getHeader() const { return header; } + private: class CurrentSize { @@ -62,7 +65,7 @@ private: const size_t min_block_size_rows; const size_t min_block_size_bytes; - const Block header; + Block header; CurrentSize accumulated; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 22d2c4eeebc..fd226db5bb1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -886,16 +886,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro startInsertQuery(); Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); - Block header = state.input_header; - while (readDataNext()) { - header = state.block_for_insert.cloneEmpty(); - auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - if (!planned_chunk.getChunkInfos().empty()) + squashing.setHeader(state.block_for_insert.cloneEmpty()); + auto result_chunk = DB::Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + if (result_chunk) { - Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -904,16 +901,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = squashing.flush(); - if (planned_chunk.getChunkInfos().empty()) + Chunk result_chunk = DB::Squashing::squash(squashing.flush()); + if (!result_chunk) { - return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(header), query_context); + return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); } - Chunk result_chunk; - result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = header.cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0beeca0d542..5da36b6ee3b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1315,14 +1315,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - projection_header = block_to_squash.cloneEmpty(); + projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - if (!planned_chunk.getChunkInfos().empty()) + Chunk squashed_chunk = DB::Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_header.cloneWithColumns(projection_chunk.detachColumns()); + auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1343,12 +1341,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto planned_chunk = projection_squash_plan.flush(); - if (!planned_chunk.getChunkInfos().empty()) + auto squashed_chunk = DB::Squashing::squash(projection_squash_plan.flush()); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_header.cloneWithColumns(projection_chunk.detachColumns()); + auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); From 5ddb9b11f487b3f13cb6d7e1d69e22779b6a745f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 27 Jun 2024 14:33:37 +0000 Subject: [PATCH 070/141] remove unwanted changes --- src/Backups/BackupIO_AzureBlobStorage.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 0ee0160a969..cee41861d70 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} , connection_params(connection_params_) , blob_path(blob_path_) { @@ -128,7 +128,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} , connection_params(connection_params_) , blob_path(blob_path_) { From 0220a3cac74ad0e96244c68a00a674a41dfb47c4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 17:38:15 +0200 Subject: [PATCH 071/141] fix tests --- src/Processors/Transforms/DeduplicationTokenTransforms.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 10c21249ebc..bcb8ee94f7a 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -67,6 +67,9 @@ void TokenInfo::addChunkHash(String part) void TokenInfo::defineSourceWithChunkHashes() { + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + if (stage != DEFINE_SOURCE_WITH_HASHES) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); From 9fa5764c9e330a0c7b21427b5e1972b55951d850 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:57:14 +0200 Subject: [PATCH 072/141] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 416d4bb5f62..c3944b8dd1d 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -72,7 +72,7 @@ namespace DeduplicationToken * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken * - * After token is define it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. * * transition // method * DEFINED -> DEFINE_VIEW // setViewID From 1c12c95b79d24e4fba9362d140910ac6a4d16f35 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:57:24 +0200 Subject: [PATCH 073/141] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index c3944b8dd1d..9d087536a38 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -59,7 +59,7 @@ namespace DeduplicationToken size_t getTotalSize() const; /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the foloving order. + * BuildingStage ensures that token is expanded according the following order. * Firstly token is expanded with information about the source. * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. * From cb3d0ed2757fc6de98fdb0bad1e74f83facd7c88 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 28 Jun 2024 02:20:35 +0200 Subject: [PATCH 074/141] Update StorageMaterializedView.cpp --- src/Storages/StorageMaterializedView.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..f9f627863dd 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,6 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; + manual_create_query->has_uuid = true; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); From b16451ad8946fdeb93ca259af083467853b6ac22 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 28 Jun 2024 02:28:07 +0200 Subject: [PATCH 075/141] Update StorageMaterializedView.cpp --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index f9f627863dd..ec1559b71a4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,7 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = true; + manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); From 31c65a40926d3d5209898f5efb5c1cf33b602133 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 14:21:14 +0000 Subject: [PATCH 076/141] add settings to change history --- src/Core/SettingsChangesHistory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4ac25a649b7..1c5ad9d0875 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,9 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, + {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, + {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, From 05dbb241f54dc2d691fbabe79c6df026ab24b64d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sat, 29 Jun 2024 01:35:59 +0200 Subject: [PATCH 077/141] adjust logging --- src/Interpreters/Squashing.cpp | 2 +- src/Processors/Transforms/DeduplicationTokenTransforms.cpp | 2 +- src/Processors/Transforms/PlanSquashingTransform.cpp | 4 ---- src/Processors/Transforms/PlanSquashingTransform.h | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 2b808e25fbb..25434d1103e 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "base/defines.h" +#include namespace DB diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index bcb8ee94f7a..374a6495f79 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -155,7 +155,7 @@ void CheckTokenTransform::transform(Chunk & chunk) return; } - LOG_DEBUG(log, "{}, token: {}", debug, token_info->debugToken()); + LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); } #endif diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 6a8cd10027e..ee4dfa6a64e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,6 +1,4 @@ #include -#include "Common/Logger.h" -#include "Common/logger_useful.h" #include namespace DB @@ -20,8 +18,6 @@ PlanSquashingTransform::PlanSquashingTransform( void PlanSquashingTransform::consume(Chunk chunk) { - LOG_DEBUG(getLogger("PlanSquashingTransform"), "consume {}", chunk.getNumRows()); - squashed_chunk = squashing.add(std::move(chunk)); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 1f83e62284d..e6db245499e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -23,7 +23,6 @@ protected: private: Squashing squashing; Chunk squashed_chunk; - Chunk finish_chunk; }; } From 004d913c565cc0646222601d6a98789b77d92938 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sun, 30 Jun 2024 02:14:28 +0000 Subject: [PATCH 078/141] change option to enum and add test --- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 4 ++++ src/Core/SettingsEnums.h | 8 ++++++++ src/Interpreters/InterpreterDeleteQuery.cpp | 6 +++--- .../0_stateless/03161_lightweight_delete_projection.sql | 7 +++++++ 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 513cdf9f9a2..574017a6953 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -612,7 +612,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(String, lightweight_mutation_projection_mode, "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 05985316566..9dfff3c56ca 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -173,6 +173,10 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT}, {"range", ParallelReplicasCustomKeyFilterType::RANGE}}) +IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS, + {{"throw", LightweightMutationProjectionMode::THROW}, + {"drop", LightweightMutationProjectionMode::DROP}}) + IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_ENUM(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 575cd8700c8..8456c4b688c 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -339,6 +339,14 @@ enum class ParallelReplicasCustomKeyFilterType : uint8_t DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) +enum class LightweightMutationProjectionMode : uint8_t +{ + THROW, + DROP, +}; + +DECLARE_SETTING_ENUM(LightweightMutationProjectionMode) + DECLARE_SETTING_ENUM(LocalFSReadMethod) enum class S3QueueMode : uint8_t diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 23bbd18ff51..39d5d9e9cef 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -116,15 +116,15 @@ BlockIO InterpreterDeleteQuery::execute() if (table->hasProjection()) { auto context = Context::createCopy(getContext()); - auto mode = Field(context->getSettingsRef().lightweight_mutation_projection_mode); - if (mode == "throw") + auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; + if (mode == LightweightMutationProjectionMode::THROW) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DELETE query is not supported for table {} as it has projections. " "User should drop all the projections manually before running the query", table->getStorageID().getFullTableName()); } - else if (mode == "drop") + else if (mode == LightweightMutationProjectionMode::DROP) { std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 786f6a3cc34..70a069df1bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -19,6 +19,13 @@ DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; +-- expecting no projection +SELECT + name, + `table` +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users'); + SELECT * FROM users; DROP TABLE users; From 6b47171f2c2a3f3ebaed692f6d30e644c42380db Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 10:52:08 +0200 Subject: [PATCH 079/141] Keeper binary with different entrypoint --- docker/packager/packager | 5 +- programs/CMakeLists.txt | 20 +- programs/keeper/CMakeLists.txt | 196 +------------- programs/keeper/Keeper.cpp | 10 - programs/keeper/keeper_main.cpp | 443 ++++++++++++++++++++++++++++++++ 5 files changed, 460 insertions(+), 214 deletions(-) create mode 100644 programs/keeper/keeper_main.cpp diff --git a/docker/packager/packager b/docker/packager/packager index 2dcbd8d695e..da4af7fc1be 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -276,10 +276,7 @@ def parse_env_variables( if is_release_build(debug_build, package_type, sanitizer, coverage): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") - if is_cross_arm: - cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") - else: - result.append("BUILD_MUSL_KEEPER=1") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") elif package_type == "fuzzers": cmake_flags.append("-DENABLE_FUZZING=1") cmake_flags.append("-DENABLE_PROTOBUF=1") diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 4640882f2be..b06290ae352 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -66,18 +66,18 @@ else() message(STATUS "Library bridge mode: OFF") endif() -if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") -else() - message(STATUS "ClickHouse keeper mode: OFF") -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) message(STATUS "ClickHouse keeper-converter mode: ON") else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER) + message(STATUS "ClickHouse keeper mode: ON") +else() + message(STATUS "ClickHouse keeper mode: OFF") +endif() + if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) message(STATUS "ClickHouse keeper-client mode: ON") else() @@ -131,10 +131,6 @@ add_subdirectory (static-files-disk-uploader) add_subdirectory (su) add_subdirectory (disks) -if (ENABLE_CLICKHOUSE_KEEPER) - add_subdirectory (keeper) -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) add_subdirectory (keeper-converter) endif() @@ -143,6 +139,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) add_subdirectory (keeper-client) endif() +if (ENABLE_CLICKHOUSE_KEEPER) + add_subdirectory (keeper) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 079951be55e..9b931c49c24 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,4 +1,5 @@ set(CLICKHOUSE_KEEPER_SOURCES + keeper_main.cpp Keeper.cpp ) @@ -8,6 +9,9 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_io clickhouse_common_zookeeper daemon + clickhouse-keeper-converter-lib + clickhouse-keeper-client-lib + clickhouse_functions dbms ) @@ -17,199 +21,11 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources - set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp + clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_SOURCES}) - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp - - Keeper.cpp - clickhouse-keeper.cpp - ) - - # List of resources for clickhouse-keeper client - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) - list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp - ) - endif() - - clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) - - # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD) - target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) - - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim) - target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim) - endif() - - target_link_libraries(clickhouse-keeper - PRIVATE - ch_contrib::abseil_swiss_tables - ch_contrib::nuraft - ch_contrib::lz4 - ch_contrib::zstd - ch_contrib::cityhash - ch_contrib::jemalloc - common ch_contrib::double_conversion - ch_contrib::dragonbox_to_chars - pcg_random - ch_contrib::pdqsort - ch_contrib::miniselect - clickhouse_common_config_no_zookeeper_log - loggers_no_text_log - clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. - ) + target_link_libraries(clickhouse-keeper PUBLIC ${CLICKHOUSE_KEEPER_LINK}) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - if (SPLIT_DEBUG_SYMBOLS) clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) else() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..60834dbe582 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -75,16 +75,6 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD - -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} - -#endif - namespace DB { diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp new file mode 100644 index 00000000000..a5bc5db7be8 --- /dev/null +++ b/programs/keeper/keeper_main.cpp @@ -0,0 +1,443 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include /// pair + +#include + +#include "config.h" +#include "config_tools.h" + +#include +#include +#include + +#include +#include + + +int mainEntryClickHouseKeeper(int argc, char ** argv); +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT +int mainEntryClickHouseKeeperClient(int argc, char ** argv); +#endif + +namespace +{ + +using MainFunc = int (*)(int, char**); + +/// Add an item here to register new application +std::pair clickhouse_applications[] = +{ + // keeper + {"keeper", mainEntryClickHouseKeeper}, +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"converter", mainEntryClickHouseKeeperConverter}, + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT + {"client", mainEntryClickHouseKeeperClient}, + {"keeper-client", mainEntryClickHouseKeeperClient}, +#endif + +}; + +int printHelp(int, char **) +{ + std::cerr << "Use one of the following commands:" << std::endl; + for (auto & application : clickhouse_applications) + std::cerr << "clickhouse " << application.first << " [args] " << std::endl; + return -1; +} + + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif + + +#if defined(SANITIZE_COVERAGE) +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + +} + +bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) +{ + /// Use app if the first arg 'app' is passed (the arg should be quietly removed) + if (argv.size() >= 2) + { + auto first_arg = argv.begin() + 1; + + /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok + if (*first_arg == app_suffix + || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix)) + { + argv.erase(first_arg); + return true; + } + } + + return false; +} + +/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. +/// We don't use it. But it can be used by some libraries for implementation of "plugins". +/// We absolutely discourage the ancient technique of loading +/// 3rd-party uncontrolled dangerous libraries into the process address space, +/// because it is insane. + +#if !defined(USE_MUSL) +extern "C" +{ + void * dlopen(const char *, int) + { + return nullptr; + } + + void * dlmopen(long, const char *, int) // NOLINT + { + return nullptr; + } + + int dlclose(void *) + { + return 0; + } + + const char * dlerror() + { + return "ClickHouse does not allow dynamic library loading"; + } +} +#endif + +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + +int main(int argc_, char ** argv_) +{ + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + /// Note: we forbid dlopen in our code. + updatePHDRCache(); + +#if !defined(USE_MUSL) + checkHarmfulEnvironmentVariables(argv_); +#endif + + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + + /// Reset new handler to default (that throws std::bad_alloc) + /// It is needed because LLVM library clobbers it. + std::set_new_handler(nullptr); + + std::vector argv(argv_, argv_ + argc_); + + /// Print a basic help if nothing was matched + MainFunc main_func = mainEntryClickHouseKeeper; + + if (isClickhouseApp("help", argv)) + { + main_func = printHelp; + } + else + { + for (auto & application : clickhouse_applications) + { + if (isClickhouseApp(application.first, argv)) + { + main_func = application.second; + break; + } + } + } + + int exit_code = main_func(static_cast(argv.size()), argv.data()); + +#if defined(SANITIZE_COVERAGE) + dumpCoverage(); +#endif + + return exit_code; +} From 4a9daa202d74ba30fc3efd455f6a37a41bb4e4db Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 11:24:45 +0200 Subject: [PATCH 080/141] Remove Keeper standalone build --- docker/packager/binary-builder/build.sh | 1 + programs/self-extracting/CMakeLists.txt | 17 +- src/Compression/CompressionFactory.cpp | 4 - src/Coordination/Standalone/Context.cpp | 486 ------------------ src/Coordination/Standalone/Context.h | 178 ------- src/Coordination/Standalone/Settings.cpp | 24 - .../Standalone/ThreadStatusExt.cpp | 19 - src/Core/SettingsFields.cpp | 45 -- src/Core/SettingsFields.h | 14 +- src/Daemon/BaseDaemon.cpp | 5 - src/Daemon/SentryWriter.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 - src/Disks/ObjectStorages/DiskObjectStorage.h | 2 - .../DiskObjectStorageMetadata.cpp | 4 - .../ObjectStorages/MetadataStorageFactory.cpp | 6 - .../ObjectStorages/ObjectStorageFactory.cpp | 18 +- .../createMetadataStorageMetrics.h | 12 +- src/Disks/registerDisks.cpp | 15 - src/IO/S3/BlobStorageLogWriter.cpp | 2 - src/Interpreters/Context.h | 8 - src/Server/PrometheusRequestHandler.cpp | 3 - src/Server/ProtocolServerAdapter.cpp | 4 +- src/Server/ProtocolServerAdapter.h | 2 +- 23 files changed, 36 insertions(+), 837 deletions(-) delete mode 100644 src/Coordination/Standalone/Context.cpp delete mode 100644 src/Coordination/Standalone/Context.h delete mode 100644 src/Coordination/Standalone/Settings.cpp delete mode 100644 src/Coordination/Standalone/ThreadStatusExt.cpp diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh index 032aceb0af3..bd5f2fe8466 100755 --- a/docker/packager/binary-builder/build.sh +++ b/docker/packager/binary-builder/build.sh @@ -111,6 +111,7 @@ fi mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output +[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 4b6dd07f618..32b686d40dd 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -10,9 +10,24 @@ else () set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") endif () -add_custom_target (self-extracting ALL +add_custom_target (self-extracting-server ALL ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped DEPENDS clickhouse clickhouse-stripped compressor ) + +set(self_extracting_deps "self-extracting-server") + +if (BUILD_STANDALONE_KEEPER) + add_custom_target (self-extracting-keeper ALL + ${CMAKE_COMMAND} -E remove clickhouse-keeper + COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-keeper ../clickhouse-keeper + DEPENDS compressor clickhouse-keeper + ) + list(APPEND self_extracting_deps "self-extracting-keeper") +endif() + +add_custom_target (self-extracting ALL + DEPENDS ${self_extracting_deps} +) diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 68e0131c91b..2e7aa0d086f 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -185,7 +185,6 @@ void registerCodecDeflateQpl(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory); @@ -193,7 +192,6 @@ void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); void registerCodecGCD(CompressionCodecFactory & factory); -#endif CompressionCodecFactory::CompressionCodecFactory() { @@ -205,7 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() #endif registerCodecLZ4HC(*this); registerCodecMultiple(*this); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); registerCodecDoubleDelta(*this); @@ -216,7 +213,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); -#endif default_codec = get("LZ4", {}); } diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp deleted file mode 100644 index 2017adcc58d..00000000000 --- a/src/Coordination/Standalone/Context.cpp +++ /dev/null @@ -1,486 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace ProfileEvents -{ - extern const Event ContextLock; - extern const Event ContextLockWaitMicroseconds; -} - -namespace CurrentMetrics -{ - extern const Metric ContextLockWait; - extern const Metric BackgroundSchedulePoolTask; - extern const Metric BackgroundSchedulePoolSize; - extern const Metric IOWriterThreads; - extern const Metric IOWriterThreadsActive; - extern const Metric IOWriterThreadsScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNSUPPORTED_METHOD; -} - -struct ContextSharedPart : boost::noncopyable -{ - ContextSharedPart() - : macros(std::make_unique()) - {} - - ~ContextSharedPart() - { - if (keeper_dispatcher) - { - try - { - keeper_dispatcher->shutdown(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - /// Wait for thread pool for background reads and writes, - /// since it may use per-user MemoryTracker which will be destroyed here. - if (asynchronous_remote_fs_reader) - { - try - { - asynchronous_remote_fs_reader->wait(); - asynchronous_remote_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (asynchronous_local_fs_reader) - { - try - { - asynchronous_local_fs_reader->wait(); - asynchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (synchronous_local_fs_reader) - { - try - { - synchronous_local_fs_reader->wait(); - synchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (threadpool_writer) - { - try - { - threadpool_writer->wait(); - threadpool_writer.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - /// For access of most of shared objects. - mutable SharedMutex mutex; - - ServerSettings server_settings; - - String path; /// Path to the data directory, with a slash at the end. - ConfigurationPtr config; /// Global configuration settings. - MultiVersion macros; /// Substitutions extracted from config. - OnceFlag schedule_pool_initialized; - mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml - - mutable OnceFlag readers_initialized; - mutable std::unique_ptr asynchronous_remote_fs_reader; - mutable std::unique_ptr asynchronous_local_fs_reader; - mutable std::unique_ptr synchronous_local_fs_reader; - -#if USE_LIBURING - mutable OnceFlag io_uring_reader_initialized; - mutable std::unique_ptr io_uring_reader; -#endif - - mutable OnceFlag threadpool_writer_initialized; - mutable std::unique_ptr threadpool_writer; - - mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads - mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes - - mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads - mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes - - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage - - mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); -}; - -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; - -Context::Context() = default; -Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} -Context::~Context() = default; - -SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; -SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default; -SharedContextHolder::SharedContextHolder() = default; -SharedContextHolder::~SharedContextHolder() = default; -SharedContextHolder::SharedContextHolder(std::unique_ptr shared_context) - : shared(std::move(shared_context)) {} - -void SharedContextHolder::reset() { shared.reset(); } - -void Context::makeGlobalContext() -{ - initGlobal(); - global_context = shared_from_this(); -} - -ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) -{ - auto res = std::shared_ptr(new Context); - res->shared = shared_part; - return res; -} - -void Context::initGlobal() -{ - assert(!global_context_instance); - global_context_instance = shared_from_this(); -} - -SharedContextHolder Context::createShared() -{ - return SharedContextHolder(std::make_unique()); -} - - -ContextMutablePtr Context::getGlobalContext() const -{ - auto ptr = global_context.lock(); - if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); - return ptr; -} - -std::unique_lock Context::getGlobalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getGlobalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::unique_lock Context::getLocalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getLocalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -String Context::getPath() const -{ - auto lock = getGlobalSharedLock(); - return shared->path; -} - -void Context::setPath(const String & path) -{ - auto lock = getGlobalLock(); - shared->path = path; -} - -MultiVersion::Version Context::getMacros() const -{ - return shared->macros.get(); -} - -void Context::setMacros(std::unique_ptr && macros) -{ - shared->macros.set(std::move(macros)); -} - -BackgroundSchedulePool & Context::getSchedulePool() const -{ - callOnce(shared->schedule_pool_initialized, [&] { - shared->schedule_pool = std::make_unique( - shared->server_settings.background_schedule_pool_size, - CurrentMetrics::BackgroundSchedulePoolTask, - CurrentMetrics::BackgroundSchedulePoolSize, - "BgSchPool"); - }); - - return *shared->schedule_pool; -} - -void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) -{ - shared->remote_host_filter.setValuesFromConfig(config); -} - -const RemoteHostFilter & Context::getRemoteHostFilter() const -{ - return shared->remote_host_filter; -} - -IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const -{ - callOnce(shared->readers_initialized, [&] { - const auto & config = getConfigRef(); - shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config); - shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config); - shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config); - }); - - switch (type) - { - case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: - return *shared->asynchronous_remote_fs_reader; - case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: - return *shared->asynchronous_local_fs_reader; - case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: - return *shared->synchronous_local_fs_reader; - } -} - -#if USE_LIBURING -IOUringReader & Context::getIOUringReader() const -{ - callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader(); - }); - - return *shared->io_uring_reader; -} -#endif - -std::shared_ptr Context::getFilesystemCacheLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getFilesystemReadPrefetchesLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getBlobStorageLog() const -{ - return nullptr; -} - -void Context::setConfig(const ConfigurationPtr & config) -{ - auto lock = getGlobalLock(); - shared->config = config; -} - -const Poco::Util::AbstractConfiguration & Context::getConfigRef() const -{ - auto lock = getGlobalSharedLock(); - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); -} - -std::shared_ptr Context::getAsyncReadCounters() const -{ - auto lock = getLocalLock(); - if (!async_read_counters) - async_read_counters = std::make_shared(); - return async_read_counters; -} - -ThreadPool & Context::getThreadPoolWriter() const -{ - callOnce(shared->threadpool_writer_initialized, [&] { - const auto & config = getConfigRef(); - auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); - auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); - - shared->threadpool_writer = std::make_unique( - CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size); - }); - - return *shared->threadpool_writer; -} - -ThrottlerPtr Context::getRemoteReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getRemoteWriteThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalWriteThrottler() const -{ - return nullptr; -} - -ReadSettings Context::getReadSettings() const -{ - return ReadSettings{}; -} - -ResourceManagerPtr Context::getResourceManager() const -{ - return nullptr; -} - -ClassifierPtr Context::getWorkloadClassifier() const -{ - return nullptr; -} - -void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const -{ - const auto & config_ref = getConfigRef(); - - std::lock_guard lock(shared->keeper_dispatcher_mutex); - - if (shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); - - if (config_ref.has("keeper_server")) - { - shared->keeper_dispatcher = std::make_shared(); - shared->keeper_dispatcher->initialize(config_ref, true, start_async, getMacros()); - } -} - -std::shared_ptr Context::getKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); - - return shared->keeper_dispatcher; -} - -std::shared_ptr Context::tryGetKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - return shared->keeper_dispatcher; -} - -void Context::shutdownKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (shared->keeper_dispatcher) - { - shared->keeper_dispatcher->shutdown(); - shared->keeper_dispatcher.reset(); - } -} - -void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - return; - - shared->keeper_dispatcher->updateConfiguration(config_, getMacros()); -} - -std::shared_ptr Context::getZooKeeper() const -{ - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); -} - -const S3SettingsByEndpoint & Context::getStorageS3Settings() const -{ - std::lock_guard lock(shared->mutex); - - if (!shared->storage_s3_settings) - { - const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); - shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); - } - - return *shared->storage_s3_settings; -} - -const ServerSettings & Context::getServerSettings() const -{ - return shared->server_settings; -} - -bool Context::hasTraceCollector() const -{ - return false; -} - -bool Context::isBackgroundOperationContext() const -{ - return false; -} - -} diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h deleted file mode 100644 index d3bbfececed..00000000000 --- a/src/Coordination/Standalone/Context.h +++ /dev/null @@ -1,178 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include - -#include "config.h" -namespace zkutil -{ - class ZooKeeper; - using ZooKeeperPtr = std::shared_ptr; -} - -namespace DB -{ - -struct ContextSharedPart; -class Macros; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class BlobStorageLog; -class IOUringReader; -class S3SettingsByEndpoint; - -/// A small class which owns ContextShared. -/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. -struct SharedContextHolder -{ - ~SharedContextHolder(); - SharedContextHolder(); - explicit SharedContextHolder(std::unique_ptr shared_context); - SharedContextHolder(SharedContextHolder &&) noexcept; - - SharedContextHolder & operator=(SharedContextHolder &&) noexcept; - - ContextSharedPart * get() const { return shared.get(); } - void reset(); -private: - std::unique_ptr shared; -}; - -class ContextData -{ -protected: - ContextWeakMutablePtr global_context; - inline static ContextPtr global_context_instance; - ContextSharedPart * shared; - - /// Query metrics for reading data asynchronously with IAsynchronousReader. - mutable std::shared_ptr async_read_counters; - - Settings settings; /// Setting for query execution. - -public: - /// Use copy constructor or createGlobal() instead - ContextData(); - ContextData(const ContextData &); -}; - -class Context : public ContextData, public std::enable_shared_from_this -{ -private: - /// ContextData mutex - mutable SharedMutex mutex; - - Context(); - Context(const Context &); - - std::unique_lock getGlobalLock() const; - - std::shared_lock getGlobalSharedLock() const; - - std::unique_lock getLocalLock() const; - - std::shared_lock getLocalSharedLock() const; - -public: - /// Create initial Context with ContextShared and etc. - static ContextMutablePtr createGlobal(ContextSharedPart * shared_part); - static SharedContextHolder createShared(); - - ContextMutablePtr getGlobalContext() const; - static ContextPtr getGlobalContextInstance() { return global_context_instance; } - - void makeGlobalContext(); - void initGlobal(); - - ~Context(); - - using ConfigurationPtr = Poco::AutoPtr; - - /// Global application configuration settings. - void setConfig(const ConfigurationPtr & config); - const Poco::Util::AbstractConfiguration & getConfigRef() const; - - const Settings & getSettingsRef() const { return settings; } - - String getPath() const; - void setPath(const String & path); - - MultiVersion::Version getMacros() const; - void setMacros(std::unique_ptr && macros); - - BackgroundSchedulePool & getSchedulePool() const; - - /// Storage of allowed hosts from config.xml - void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - const RemoteHostFilter & getRemoteHostFilter() const; - - std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getFilesystemReadPrefetchesLog() const; - std::shared_ptr getBlobStorageLog() const; - - enum class ApplicationType : uint8_t - { - KEEPER, - SERVER, - }; - - void setApplicationType(ApplicationType) {} - ApplicationType getApplicationType() const { return ApplicationType::KEEPER; } - - IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; -#if USE_LIBURING - IOUringReader & getIOUringReader() const; -#endif - std::shared_ptr getAsyncReadCounters() const; - ThreadPool & getThreadPoolWriter() const; - - ThrottlerPtr getRemoteReadThrottler() const; - ThrottlerPtr getRemoteWriteThrottler() const; - - ThrottlerPtr getLocalReadThrottler() const; - ThrottlerPtr getLocalWriteThrottler() const; - - ReadSettings getReadSettings() const; - - /// Resource management related - ResourceManagerPtr getResourceManager() const; - ClassifierPtr getWorkloadClassifier() const; - - std::shared_ptr getKeeperDispatcher() const; - std::shared_ptr tryGetKeeperDispatcher() const; - void initializeKeeperDispatcher(bool start_async) const; - void shutdownKeeperDispatcher() const; - void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); - - zkutil::ZooKeeperPtr getZooKeeper() const; - - const S3SettingsByEndpoint & getStorageS3Settings() const; - - const String & getUserName() const { static std::string user; return user; } - - const ServerSettings & getServerSettings() const; - - bool hasTraceCollector() const; - - bool isBackgroundOperationContext() const; -}; - -} diff --git a/src/Coordination/Standalone/Settings.cpp b/src/Coordination/Standalone/Settings.cpp deleted file mode 100644 index 12a7a42ffac..00000000000 --- a/src/Coordination/Standalone/Settings.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -namespace DB -{ - -IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) - -std::vector Settings::getAllRegisteredNames() const -{ - std::vector all_settings; - for (const auto & setting_field : all()) - { - all_settings.push_back(setting_field.getName()); - } - return all_settings; -} - -void Settings::set(std::string_view name, const Field & value) -{ - BaseSettings::set(name, value); -} - - -} diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp deleted file mode 100644 index fc78233d9dc..00000000000 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -namespace DB -{ - -void CurrentThread::detachFromGroupIfNotDetached() -{ -} - -void CurrentThread::attachToGroup(const ThreadGroupPtr &) -{ -} - -void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) -{ -} - -} diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index caa8b3fdffd..7d094e2a107 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -380,15 +380,6 @@ void SettingFieldString::readBinary(ReadBuffer & in) *this = std::move(str); } -/// Unbeautiful workaround for clickhouse-keeper standalone build ("-DBUILD_STANDALONE_KEEPER=1"). -/// In this build, we don't build and link library dbms (to which SettingsField.cpp belongs) but -/// only build SettingsField.cpp. Further dependencies, e.g. DataTypeString and DataTypeMap below, -/// require building of further files for clickhouse-keeper. To keep dependencies slim, we don't do -/// that. The linker does not complain only because clickhouse-keeper does not call any of below -/// functions. A cleaner alternative would be more modular libraries, e.g. one for data types, which -/// could then be linked by the server and the linker. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - SettingFieldMap::SettingFieldMap(const Field & f) : value(fieldToMap(f)) {} String SettingFieldMap::toString() const @@ -428,42 +419,6 @@ void SettingFieldMap::readBinary(ReadBuffer & in) *this = map; } -#else - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SettingFieldMap::SettingFieldMap(const Field &) : value(Map()) {} -String SettingFieldMap::toString() const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - - -SettingFieldMap & SettingFieldMap::operator =(const Field &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::parseFromString(const String &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::writeBinary(WriteBuffer &) const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::readBinary(ReadBuffer &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -#endif - namespace { char stringToChar(const String & str) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 19809348921..266141815e3 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -247,12 +247,6 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD -#define NORETURN [[noreturn]] -#else -#define NORETURN -#endif - struct SettingFieldMap { public: @@ -269,11 +263,11 @@ public: operator const Map &() const { return value; } /// NOLINT explicit operator Field() const { return value; } - NORETURN String toString() const; - NORETURN void parseFromString(const String & str); + String toString() const; + void parseFromString(const String & str); - NORETURN void writeBinary(WriteBuffer & out) const; - NORETURN void readBinary(ReadBuffer & in); + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); }; #undef NORETURN diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index b2c425ceb79..48f76769a09 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -502,9 +502,7 @@ private: if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD Context::getGlobalContextInstance()->handleCrash(); -#endif /// Send crash report to developers (if configured) if (sig != SanitizerTrap) @@ -533,8 +531,6 @@ private: } } - /// ClickHouse Keeper does not link to some parts of Settings. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) { @@ -549,7 +545,6 @@ private: LOG_FATAL(log, "Changed settings: {}", changed_settings); } } -#endif /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 9479dd65730..c51a1100639 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -19,7 +19,7 @@ #include "config.h" #include -#if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_SENTRY # include # include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index c77709c27eb..bb9761a3905 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -78,7 +78,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c std::unique_ptr buf; -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -96,7 +95,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c /* read_until_position */std::nullopt, cache_log); } -#endif /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the /// former doesn't support seeks. diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 59cc82d8c81..5c45a258806 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -195,7 +195,6 @@ public: /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Add a cache layer. /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// There can be any number of cache layers: @@ -204,7 +203,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; -#endif bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 44854633d65..56d5d11ef8a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -222,11 +222,7 @@ ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting() { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD return Context::getGlobalContextInstance()->getServerSettings().storage_metadata_write_full_object_key; -#else - return false; -#endif } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index ab7c2069b43..a690ecd2757 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -2,9 +2,7 @@ #include #include #include -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include -#endif #include #include @@ -135,7 +133,6 @@ void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory) }); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & factory) { factory.registerMetadataStorageType("web", []( @@ -147,7 +144,6 @@ void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & fa return std::make_shared(assert_cast(*object_storage)); }); } -#endif void registerMetadataStorages() { @@ -155,9 +151,7 @@ void registerMetadataStorages() registerMetadataStorageFromDisk(factory); registerPlainMetadataStorage(factory); registerPlainRewritableMetadataStorage(factory); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerMetadataStorageFromStaticFilesWebServer(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1bf8250adff..5698d2ad588 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -7,19 +7,17 @@ #include #include #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS #include #include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE #include #include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include #include -#endif #include #include #include @@ -284,7 +282,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS void registerHDFSObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType( @@ -309,7 +307,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) } #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE void registerAzureObjectStorage(ObjectStorageFactory & factory) { auto creator = []( @@ -333,7 +331,6 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) } #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerWebObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType("web", []( @@ -381,7 +378,6 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) factory.registerObjectStorageType("local_blob_storage", creator); factory.registerObjectStorageType("local", creator); } -#endif void registerObjectStorages() { @@ -393,18 +389,16 @@ void registerObjectStorages() registerS3PlainRewritableObjectStorage(factory); #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS registerHDFSObjectStorage(factory); #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE registerAzureObjectStorage(factory); #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerWebObjectStorage(factory); registerLocalObjectStorage(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h index 6dddc227ade..5cf1fbef2ab 100644 --- a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h +++ b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h @@ -1,14 +1,14 @@ #pragma once +#include "config.h" + #if USE_AWS_S3 # include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE # include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD -# include -#endif +#include #include namespace ProfileEvents @@ -42,7 +42,7 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -53,7 +53,6 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -62,6 +61,5 @@ inline MetadataStorageMetrics MetadataStorageMetrics::creategetBlobStorageLog()) { auto log_writer = std::make_shared(std::move(blob_storage_log)); @@ -67,7 +66,6 @@ BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) return log_writer; } -#endif return {}; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f9b91a45978..d3f152b7a67 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,7 +1,5 @@ #pragma once -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - #include #include #include @@ -1451,9 +1449,3 @@ struct HTTPContext : public IHTTPContext }; } - -#else - -#include - -#endif diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index dff960f7031..1f3e038a1f5 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,9 +18,6 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe { try { - /// Raw config reference is used here to avoid dependency on Context and ServerSettings. - /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1 - /// And there ordinary Context is replaced with a tiny clone. const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 8d14a849894..b41ad2376f1 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index dd11c1dfc58..76a6776ed9c 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -23,7 +23,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif From b0bbc9c8104ae36750f8216a1e197331f5c7d8ab Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 16:00:32 +0200 Subject: [PATCH 081/141] Fix symlinks --- programs/keeper/clickhouse-keeper.cpp | 30 --------------------------- programs/keeper/keeper_main.cpp | 8 ++++++- 2 files changed, 7 insertions(+), 31 deletions(-) delete mode 100644 programs/keeper/clickhouse-keeper.cpp diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp deleted file mode 100644 index f2f91930ac0..00000000000 --- a/programs/keeper/clickhouse-keeper.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include "config_tools.h" - - -int mainEntryClickHouseKeeper(int argc, char ** argv); - -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT -int mainEntryClickHouseKeeperClient(int argc, char ** argv); -#endif - -int main(int argc_, char ** argv_) -{ -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT - - if (argc_ >= 2) - { - /// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK - if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0) - { - argv_[1] = argv_[0]; - return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1); - } - } - - if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client"))) - return mainEntryClickHouseKeeperClient(argc_, argv_); -#endif - - return mainEntryClickHouseKeeper(argc_, argv_); -} diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp index a5bc5db7be8..ec9b84ce94b 100644 --- a/programs/keeper/keeper_main.cpp +++ b/programs/keeper/keeper_main.cpp @@ -339,7 +339,13 @@ bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) } } - return false; + /// keeper suffix is default which will be used if no other app is detected + if (app_suffix == "keeper") + return false; + + /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app + std::string app_name = "clickhouse-" + std::string(app_suffix); + return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); } /// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. From a2626037bc6ed631758c364edcc096c983805b0c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:15:59 +0200 Subject: [PATCH 082/141] Improve object storage tags in tests --- docker/test/stateful/run.sh | 4 ++++ docker/test/stateless/run.sh | 2 +- docker/test/stress/run.sh | 1 - tests/ci/stress_check.py | 3 +++ tests/clickhouse-test | 18 +++++++++++++++--- ...6_replace_partition_from_table_zookeeper.sh | 2 +- .../00632_get_sample_block_cache.sql | 2 +- ...0731_long_merge_tree_select_opened_files.sh | 2 +- ...3_system_columns_and_system_tables_long.sql | 2 +- .../0_stateless/00763_lock_buffer_long.sh | 2 +- .../01070_mutations_with_dependencies.sql | 2 +- .../01078_merge_tree_read_one_thread.sql | 2 +- .../01200_mutations_memory_consumption.sql | 2 +- .../0_stateless/01221_system_settings.sql | 2 +- .../0_stateless/01275_parallel_mv.sql.j2 | 4 ++-- .../01281_group_by_limit_memory_tracking.sh | 2 +- .../0_stateless/01293_optimize_final_force.sh | 2 +- .../0_stateless/01304_direct_io_long.sh | 2 +- .../01343_min_bytes_to_use_mmap_io.sql | 2 +- .../01344_min_bytes_to_use_mmap_io_index.sql | 2 +- .../0_stateless/01475_read_subcolumns.sql | 2 +- .../01475_read_subcolumns_storages.sh | 2 +- ...ce_condition_rename_clear_zookeeper_long.sh | 2 +- ...2_execute_merges_on_single_replica_long.sql | 2 +- .../0_stateless/01533_multiple_nested.sql | 2 +- .../01551_mergetree_read_in_order_spread.sql | 2 +- ...1605_adaptive_granularity_block_borders.sql | 4 ++-- .../01643_merge_tree_fsync_smoke.sql | 2 +- ...01643_replicated_merge_tree_fsync_smoke.sql | 2 +- ...5_normalize_create_alter_function_names.sql | 2 +- .../01810_max_part_removal_threads_long.sh | 2 +- .../02226_filesystem_cache_profile_events.sh | 2 +- .../02228_merge_tree_insert_memory_usage.sql | 4 ++-- ...33_optimize_aggregation_in_order_prefix.sql | 2 +- ...filesystem_cache_bypass_cache_threshold.sql | 2 +- .../02240_filesystem_query_cache.sql | 2 +- .../02240_system_filesystem_cache_table.sh | 2 +- ...241_filesystem_cache_on_write_operations.sh | 2 +- .../02242_system_filesystem_cache_log_table.sh | 2 +- .../0_stateless/02263_lazy_mark_load.sh | 2 +- .../0_stateless/02286_drop_filesystem_cache.sh | 2 +- .../02313_filesystem_cache_seeks.sh | 2 +- .../0_stateless/02336_sparse_columns_s3.sql | 2 +- .../0_stateless/02343_aggregation_pipeline.sql | 2 +- ..._with_external_aggregation_memory_usage.sql | 2 +- .../0_stateless/02361_fsync_profile_events.sh | 4 ++-- .../02381_client_prints_server_side_time.sh | 2 +- .../02454_create_table_with_custom_disk.sql | 2 +- .../02497_trace_events_stress_long.sh | 2 +- ...3_cache_on_write_with_small_segment_size.sh | 2 +- .../02521_aggregation_by_partitions.sql | 2 +- .../0_stateless/02532_send_logs_level_test.sh | 4 ++-- ...4_fix_grouping_sets_predicate_push_down.sql | 2 +- .../02560_vertical_merge_memory_usage.sql | 2 +- .../02582_async_reading_with_small_limit.sql | 2 +- .../02703_max_local_read_bandwidth.sh | 2 +- .../02703_max_local_write_bandwidth.sh | 2 +- .../0_stateless/02704_max_backup_bandwidth.sh | 2 +- .../0_stateless/02725_memory-for-merges.sql | 2 +- .../02731_zero_objects_in_metadata.sh | 2 +- ...stem_parts_columns_modification_time.sql.j2 | 4 ++-- .../02808_filesystem_cache_drop_query.sh | 2 +- .../02833_multiprewhere_extra_column.sql | 2 +- ...artition_with_duplicated_parts_zookeeper.sh | 2 +- ...933_change_cache_setting_without_restart.sh | 2 +- ...dynamically_change_filesystem_cache_size.sh | 2 +- .../03008_local_plain_rewritable.sh | 2 +- ...32_dynamically_resize_filesystem_cache_2.sh | 2 +- 68 files changed, 92 insertions(+), 74 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 09a9f51084b..2215ac2b37c 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -213,6 +213,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--azure-blob-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..b56394df97a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -207,7 +207,7 @@ function run_tests() if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # to disable the same tests - ADDITIONAL_OPTIONS+=('--s3-storage') + ADDITIONAL_OPTIONS+=('--azure-blob-storage') # azurite is slow, but with these two settings it can be super slow ADDITIONAL_OPTIONS+=('--no-random-settings') ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6d121ba4142..96f8ecb2fab 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -211,7 +211,6 @@ clickhouse-client --query "SYSTEM STOP THREAD FUZZER" stop_server # Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index bf0281cae68..486bfc25e22 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -30,6 +30,9 @@ def get_additional_envs(check_name: str) -> List[str]: if "azure" in check_name: result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") + if "s3" in check_name: + result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + return result diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..c581d35a289 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -700,7 +700,9 @@ class FailureReason(enum.Enum): NO_LONG = "not running long tests" REPLICATED_DB = "replicated-database" NON_ATOMIC_DB = "database engine not Atomic" + OBJECT_STORAGE = "object-storage" S3_STORAGE = "s3-storage" + AZURE_BLOB_STORAGE = "azure-blob-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" @@ -1226,13 +1228,17 @@ class TestCase: elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: + return FailureReason.AZURE_BLOB_STORAGE + elif tags and ("no-object-storage" in tags) and (args.azure_blob_storage or args.s3_storage): + return FailureReason.OBJECT_STORAGE elif ( tags - and "no-s3-storage-with-slow-build" in tags - and args.s3_storage + and "no-object-storage-with-slow-build" in tags + and (args.s3_storage or args.azure_blob_storage) and BuildFlags.RELEASE not in args.build_flags ): - return FailureReason.S3_STORAGE + return FailureReason.OBJECT_STORAGE elif tags: for build_flag in args.build_flags: @@ -3099,6 +3105,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--azure-blob-storage", + action="store_true", + default=False, + help="Run tests over azure blob storage", + ) parser.add_argument( "--no-random-settings", action="store_true", diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index ffbf4df4ba7..13146f2eab0 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index c54ca0b084e..ae9b6bb7b2c 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-asan +-- Tags: long, no-object-storage, no-asan SET joined_subquery_requires_alias = 0; diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index af746c43da9..5a4fd901f8d 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-tsan +# Tags: long, no-object-storage, no-tsan # no-s3 because read FileOpen metric set -e diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index 4613576cf4e..009fc0bbb9f 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-random-merge-tree-settings +-- Tags: long, no-object-storage, no-random-merge-tree-settings SET output_format_pretty_row_numbers = 0; DROP TABLE IF EXISTS check_system_tables; diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 046e4efaa85..2006d43cdd2 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Tags: long, no-object-storage, no-msan, no-asan, no-tsan, no-debug # Some kind of stress test, it doesn't make sense to test in a non-release build set -e diff --git a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql index 813ebf3f5a7..4d1cd54306c 100644 --- a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql +++ b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- With s3 policy TTL TO DISK 'default' doesn't work (because we have no default, only 's3') drop table if exists ttl; diff --git a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql index 3a05e4507a2..166f44df2a7 100644 --- a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql +++ b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- Output slightly different plan drop table if exists t; diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 5019abc38ab..f2d071961ee 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,4 @@ --- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +-- Tags: no-debug, no-parallel, long, no-object-storage, no-random-settings, no-random-merge-tree-settings SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01221_system_settings.sql b/tests/queries/0_stateless/01221_system_settings.sql index fcffd6c45fe..da0204b37bd 100644 --- a/tests/queries/0_stateless/01221_system_settings.sql +++ b/tests/queries/0_stateless/01221_system_settings.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage select * from system.settings where name = 'send_timeout'; select * from system.merge_tree_settings order by length(description) limit 1; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 047b1cc3ee7..9d74474c1a4 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage, no-parallel, no-fasttest --- no-s3-storage: s3 has 20 more threads +-- Tags: no-object-storage, no-parallel, no-fasttest +-- no-object-storage: s3 has 20 more threads -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index e83e49dffef..33b8f413fd5 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage, no-msan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-object-storage, no-msan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index d3d3d3e1ac5..e838af8af9b 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long, no-debug, no-s3-storage +# Tags: no-fasttest, long, no-debug, no-object-storage # This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 97148dc268e..2e27c2f7728 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index 614629351ef..15c9ec16700 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01343; CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01343 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql index 2e5ec563641..76cb535dcb7 100644 --- a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01344; CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01344 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index 8d4e3cb779b..d6eec2f84a1 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-settings +-- Tags: no-object-storage, no-random-settings SET use_uncompressed_cache = 0; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index 5a30f9e0f08..f74f6755e59 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index c3c87eeaf8b..6098c826e32 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-s3-storage +# Tags: race, zookeeper, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 49ef9d8b79f..e53f4476ec6 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -1,4 +1,4 @@ --- Tags: long, replica, no-replicated-database, no-parallel, no-s3-storage +-- Tags: long, replica, no-replicated-database, no-parallel, no-object-storage -- Tag no-replicated-database: Fails due to additional replicas or shards -- Tag no-parallel: static zk path diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 1a6f0ec395e..80e9fc7e2fb 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index 95b46c69e83..b5ece08196e 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings DROP TABLE IF EXISTS data_01551; diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 187ff5c37e1..9b96ce3e586 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,6 +1,6 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-s3-storage +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage -- no-tsan: too slow --- no-s3-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher +-- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql index dfc761e1764..f7622bcf98f 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists data_01643; diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql index 54c30fa2b1a..992cc687c88 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- no-parallel -- for flaky check and to avoid "Removing leftovers from table" (for other tables) -- Temporarily skip warning 'table was created by another server at the same moment, will retry' diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index be0f7e8b710..921d28e6399 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-replicated-database, no-parallel, no-s3-storage +-- Tags: zookeeper, no-replicated-database, no-parallel, no-object-storage drop table if exists x; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 3782a7d3ad6..c38fc505fa8 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-object-storage # Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 9d87542d84d..d0e61541b15 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-replicated-database +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings, no-replicated-database # set -x diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index ca1ee2738c7..6d86d995143 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,5 +1,5 @@ --- Tags: long, no-parallel, no-s3-storage --- no-s3-storage: Avoid flakiness due to cache / buffer usage +-- Tags: long, no-parallel, no-object-storage +-- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush diff --git a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql index 8bc75040e5a..48af5ae0031 100644 --- a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql +++ b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index ee92931ec54..b791ee18e82 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index a609702f22a..40c80e04697 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 57b8cec7864..8faf0a08f1f 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 1028fba76f5..f8e7b7e7e72 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 7a665d81eab..fe016f5a27f 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index 5f80d9d7f6d..f1602e47e01 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-parallel +# Tags: no-object-storage, no-random-settings, no-parallel set -eo pipefail CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a2c9352b7aa..32c9e9cb060 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index fbaec1ffaa7..b54e3d7f805 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index bf4622adedc..1dc1e980846 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage +-- Tags: no-parallel, no-fasttest, no-object-storage DROP TABLE IF EXISTS t_sparse_s3; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index d73ac66763e..0f9dbd0247d 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- produces different pipeline if enabled set enable_memory_bound_merging_of_aggregation_results = 0; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index a5a3da82324..105fb500461 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +-- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index e150d70b896..98c9cf9b7b4 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-merge-tree-settings -# Tag no-s3-storage: s3 does not have fsync +# Tags: no-object-storage, no-random-merge-tree-settings +# Tag no-object-storage: s3 does not have fsync CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh index e6cd63da95d..81376ee3791 100755 --- a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh +++ b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-s3-storage +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql index 6cb1c0774aa..a2d46cf6d1b 100644 --- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql +++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-replicated-database +-- Tags: no-object-storage, no-replicated-database DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index c111ed40a29..dfd2f12b55b 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-s3-storage, no-fasttest, no-replicated-database +# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-object-storage, no-fasttest, no-replicated-database set -e diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 4f3fd0e54f6..5aeab4c746e 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +# Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index 55723360c38..b4d31e234d8 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage +-- Tags: long, no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496b..71f42e2a6db 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-debug -# - no-s3-storage - S3 has additional logging +# Tags: no-object-storage, no-debug +# - no-object-storage - S3 has additional logging # - no-debug - debug builds also has additional logging CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql index 0891f1aa8a2..f926b9037d2 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_grouping_sets_predicate; diff --git a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql index 785fb10f70b..361305bac6d 100644 --- a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql +++ b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists tvm; create table tvm (c0 UInt64, c1 UInt64, c2 UInt64, c3 UInt64, c4 UInt64, c5 UInt64, c6 UInt64, c7 UInt64, c8 UInt64, c9 UInt64, c10 UInt64, c11 UInt64, c12 UInt64, c13 UInt64, c14 UInt64, c15 UInt64, c16 UInt64, c17 UInt64, c18 UInt64, c19 UInt64, c20 UInt64, c21 UInt64, c22 UInt64, c23 UInt64, c24 UInt64, c25 UInt64, c26 UInt64, c27 UInt64, c28 UInt64, c29 UInt64, c30 UInt64, c31 UInt64, c32 UInt64, c33 UInt64, c34 UInt64, c35 UInt64, c36 UInt64, c37 UInt64, c38 UInt64, c39 UInt64, c40 UInt64, c41 UInt64, c42 UInt64, c43 UInt64, c44 UInt64, c45 UInt64, c46 UInt64, c47 UInt64, c48 UInt64, c49 UInt64, c50 UInt64, c51 UInt64, c52 UInt64, c53 UInt64, c54 UInt64, c55 UInt64, c56 UInt64, c57 UInt64, c58 UInt64, c59 UInt64, c60 UInt64, c61 UInt64, c62 UInt64, c63 UInt64, c64 UInt64, c65 UInt64, c66 UInt64, c67 UInt64, c68 UInt64, c69 UInt64, c70 UInt64, c71 UInt64, c72 UInt64, c73 UInt64, c74 UInt64, c75 UInt64, c76 UInt64, c77 UInt64, c78 UInt64, c79 UInt64, c80 UInt64, c81 UInt64, c82 UInt64, c83 UInt64, c84 UInt64, c85 UInt64, c86 UInt64, c87 UInt64, c88 UInt64, c89 UInt64, c90 UInt64, c91 UInt64, c92 UInt64, c93 UInt64, c94 UInt64, c95 UInt64, c96 UInt64, c97 UInt64, c98 UInt64, c99 UInt64, c100 UInt64, c101 UInt64, c102 UInt64, c103 UInt64, c104 UInt64, c105 UInt64, c106 UInt64, c107 UInt64, c108 UInt64, c109 UInt64, c110 UInt64, c111 UInt64, c112 UInt64, c113 UInt64, c114 UInt64, c115 UInt64, c116 UInt64, c117 UInt64, c118 UInt64, c119 UInt64, c120 UInt64, c121 UInt64, c122 UInt64, c123 UInt64, c124 UInt64, c125 UInt64, c126 UInt64, c127 UInt64, c128 UInt64, c129 UInt64, c130 UInt64, c131 UInt64, c132 UInt64, c133 UInt64, c134 UInt64, c135 UInt64, c136 UInt64, c137 UInt64, c138 UInt64, c139 UInt64, c140 UInt64, c141 UInt64, c142 UInt64, c143 UInt64, c144 UInt64, c145 UInt64, c146 UInt64, c147 UInt64, c148 UInt64, c149 UInt64, c150 UInt64, c151 UInt64, c152 UInt64, c153 UInt64, c154 UInt64, c155 UInt64, c156 UInt64, c157 UInt64, c158 UInt64, c159 UInt64, c160 UInt64, c161 UInt64, c162 UInt64, c163 UInt64, c164 UInt64, c165 UInt64, c166 UInt64, c167 UInt64, c168 UInt64, c169 UInt64, c170 UInt64, c171 UInt64, c172 UInt64, c173 UInt64, c174 UInt64, c175 UInt64, c176 UInt64, c177 UInt64, c178 UInt64, c179 UInt64, c180 UInt64, c181 UInt64, c182 UInt64, c183 UInt64, c184 UInt64, c185 UInt64, c186 UInt64, c187 UInt64, c188 UInt64, c189 UInt64, c190 UInt64, c191 UInt64, c192 UInt64, c193 UInt64, c194 UInt64, c195 UInt64, c196 UInt64, c197 UInt64, c198 UInt64, c199 UInt64, c200 UInt64, c201 UInt64, c202 UInt64, c203 UInt64, c204 UInt64, c205 UInt64, c206 UInt64, c207 UInt64, c208 UInt64, c209 UInt64, c210 UInt64, c211 UInt64, c212 UInt64, c213 UInt64, c214 UInt64, c215 UInt64, c216 UInt64, c217 UInt64, c218 UInt64, c219 UInt64, c220 UInt64, c221 UInt64, c222 UInt64, c223 UInt64, c224 UInt64, c225 UInt64, c226 UInt64, c227 UInt64, c228 UInt64, c229 UInt64, c230 UInt64, c231 UInt64, c232 UInt64, c233 UInt64, c234 UInt64, c235 UInt64, c236 UInt64, c237 UInt64, c238 UInt64, c239 UInt64, c240 UInt64, c241 UInt64, c242 UInt64, c243 UInt64, c244 UInt64, c245 UInt64, c246 UInt64, c247 UInt64, c248 UInt64, c249 UInt64, c250 UInt64, c251 UInt64, c252 UInt64, c253 UInt64, c254 UInt64, c255 UInt64, c256 UInt64, c257 UInt64, c258 UInt64, c259 UInt64, c260 UInt64, c261 UInt64, c262 UInt64, c263 UInt64, c264 UInt64, c265 UInt64, c266 UInt64, c267 UInt64, c268 UInt64, c269 UInt64, c270 UInt64, c271 UInt64, c272 UInt64, c273 UInt64, c274 UInt64, c275 UInt64, c276 UInt64, c277 UInt64, c278 UInt64, c279 UInt64, c280 UInt64, c281 UInt64, c282 UInt64, c283 UInt64, c284 UInt64, c285 UInt64, c286 UInt64, c287 UInt64, c288 UInt64, c289 UInt64, c290 UInt64, c291 UInt64, c292 UInt64, c293 UInt64, c294 UInt64, c295 UInt64, c296 UInt64, c297 UInt64, c298 UInt64, c299 UInt64) engine = MergeTree order by tuple() settings min_rows_for_wide_part = 10, min_bytes_for_wide_part=0, vertical_merge_algorithm_min_rows_to_activate=1; diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql index cb6b1b6083e..406cab82183 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index c78cd202f1b..6f43c1ae869 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 31cf6e9606e..4f6a300c5b3 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 748bf856deb..8cb03a93a7a 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 1a8402dff4b..8e4d4f5b3e0 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index eef52002e36..78659b70129 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 index eee236ff681..1ca5cc0bb7e 100644 --- a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage --- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) +-- Tags: no-object-storage +-- Tag: no-object-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) set mutations_sync=1; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index b44f9e50513..8a4a2e906b0 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql index 3a751294cba..da2f050cf38 100644 --- a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-object-storage drop table if exists t_multi_prewhere; drop row policy if exists policy_02834 on t_multi_prewhere; diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh index edfed206d87..07d2ee27d22 100755 --- a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index ddad7a1904b..76ada756f47 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage +# Tags: no-fasttest, no-parallel, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 2e344a6b6e5..6f454da40da 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index 5fac964a219..d51e180efc9 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings, no-s3-storage, no-replicated-database, no-shared-merge-tree +# Tags: no-random-settings, no-object-storage, no-replicated-database, no-shared-merge-tree # Tag no-random-settings: enable after root causing flakiness CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 526c4f84030..09bdd7f6b56 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From ebacab6c986fd4bbd98ebf4761f082948d22b3cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:17:58 +0200 Subject: [PATCH 083/141] Bump From 452201caf943451f15cc14fb6dfb21de33166376 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:21:39 +0200 Subject: [PATCH 084/141] Black --- tests/clickhouse-test | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c581d35a289..8486e3a885f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1230,7 +1230,11 @@ class TestCase: return FailureReason.S3_STORAGE elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: return FailureReason.AZURE_BLOB_STORAGE - elif tags and ("no-object-storage" in tags) and (args.azure_blob_storage or args.s3_storage): + elif ( + tags + and ("no-object-storage" in tags) + and (args.azure_blob_storage or args.s3_storage) + ): return FailureReason.OBJECT_STORAGE elif ( tags From 06e235024f7b33c21be1ef2dc6210b40aabe7921 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 2 Jul 2024 15:16:57 +0200 Subject: [PATCH 085/141] work with review --- .../Transforms/ApplySquashingTransform.h | 2 +- .../DeduplicationTokenTransforms.cpp | 29 ++++++++----------- .../Transforms/DeduplicationTokenTransforms.h | 19 ++++++------ .../Transforms/SquashingTransform.cpp | 8 ++--- .../Transforms/buildPushingToViewsChain.cpp | 16 +++++----- src/Server/TCPHandler.cpp | 4 +-- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 10 ++++--- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 6 ++-- src/Storages/MergeTree/MutateTask.cpp | 4 +-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 5 ++-- src/Storages/WindowView/StorageWindowView.cpp | 6 ++-- 12 files changed, 55 insertions(+), 56 deletions(-) diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 94b890198d4..49a6581e685 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -32,7 +32,7 @@ public: protected: void onConsume(Chunk chunk) override { - cur_chunk = DB::Squashing::squash(std::move(chunk)); + cur_chunk = Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 374a6495f79..f50e69e730f 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -56,7 +56,7 @@ String TokenInfo::debugToken() const void TokenInfo::addChunkHash(String part) { - if (stage == UNDEFINED) + if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_WITH_HASHES; if (stage != DEFINE_SOURCE_WITH_HASHES) @@ -65,7 +65,7 @@ void TokenInfo::addChunkHash(String part) addTokenPart(std::move(part)); } -void TokenInfo::defineSourceWithChunkHashes() +void TokenInfo::finishChunkHashes() { if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_WITH_HASHES; @@ -78,7 +78,7 @@ void TokenInfo::defineSourceWithChunkHashes() void TokenInfo::setUserToken(const String & token) { - if (stage == UNDEFINED) + if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_USER_TOKEN; if (stage != DEFINE_SOURCE_USER_TOKEN) @@ -87,7 +87,7 @@ void TokenInfo::setUserToken(const String & token) addTokenPart(fmt::format("user-token-{}", token)); } -void TokenInfo::defineSourceWithUserToken(size_t block_number) +void TokenInfo::setSourceWithUserToken(size_t block_number) { if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); @@ -108,7 +108,7 @@ void TokenInfo::setViewID(const String & id) addTokenPart(fmt::format("view-id-{}", id)); } -void TokenInfo::defineViewID(size_t block_number) +void TokenInfo::setViewBlockNumber(size_t block_number) { if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); @@ -138,6 +138,7 @@ size_t TokenInfo::getTotalSize() const for (const auto & part : parts) size += part.size(); + // we reserve more size here to be able to add delimenter between parts. return size + parts.size() - 1; } @@ -149,17 +150,11 @@ void CheckTokenTransform::transform(Chunk & chunk) if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - if (!must_be_present) - { - LOG_DEBUG(log, "{}, no token required, token {}", debug, token_info->debugToken()); - return; - } - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); } #endif -String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) +String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -170,20 +165,20 @@ String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) } -void SetInitialTokenTransform::transform(Chunk & chunk) +void DefineSourceWithChunkHashesTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); if (!token_info) throw Exception( ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); + "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); if (token_info->isDefined()) return; token_info->addChunkHash(getChunkHash(chunk)); - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } void SetUserTokenTransform::transform(Chunk & chunk) @@ -203,7 +198,7 @@ void SetSourceBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->defineSourceWithUserToken(block_number++); + token_info->setSourceWithUserToken(block_number++); } void SetViewIDTransform::transform(Chunk & chunk) @@ -223,7 +218,7 @@ void SetViewBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->defineViewID(block_number++); + token_info->setViewBlockNumber(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 9d087536a38..79d168d1000 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -42,13 +42,13 @@ namespace DeduplicationToken bool isDefined() const { return stage == DEFINED; } void addChunkHash(String part); - void defineSourceWithChunkHashes(); + void finishChunkHashes(); void setUserToken(const String & token); - void defineSourceWithUserToken(size_t block_number); + void setSourceWithUserToken(size_t block_number); void setViewID(const String & id); - void defineViewID(size_t block_number); + void setViewBlockNumber(size_t block_number); void reset(); @@ -98,10 +98,9 @@ namespace DeduplicationToken class CheckTokenTransform : public ISimpleTransform { public: - CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) + CheckTokenTransform(String debug_, const Block & header_) : ISimpleTransform(header_, header_, true) , debug(std::move(debug_)) - , must_be_present(must_be_present_) { } @@ -112,7 +111,6 @@ namespace DeduplicationToken private: String debug; LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - bool must_be_present = false; }; #endif @@ -134,16 +132,19 @@ namespace DeduplicationToken }; - class SetInitialTokenTransform : public ISimpleTransform + class DefineSourceWithChunkHashesTransform : public ISimpleTransform { public: - explicit SetInitialTokenTransform(const Block & header_) + explicit DefineSourceWithChunkHashesTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } - String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } + String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with heshes from the parts. + // But if there is some table with different engine, we still need to define the source of the data in deduplication token + // We use that transform to define the source as a hash of entire block in deduplication token void transform(Chunk & chunk) override; static String getChunkHash(const Chunk & chunk); diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index e457a262681..1fb4433240a 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -18,7 +18,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); + cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = DB::Squashing::squash(squashing.flush()); + finish_chunk = Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -63,14 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); + chunk = Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = DB::Squashing::squash(squashing.flush()); + chunk = Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 713ab25600f..8d38396ecd5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -357,7 +357,7 @@ std::optional generateViewChain( } #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Before squashing", out.getInputHeader())); #endif auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); @@ -403,7 +403,7 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); #endif auto executing_inner_query = std::make_shared( @@ -413,7 +413,7 @@ std::optional generateViewChain( out.addSource(std::move(executing_inner_query)); #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); #endif } @@ -547,7 +547,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { @@ -555,7 +555,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (dynamic_cast(storage.get())) { @@ -564,7 +564,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,13 +573,13 @@ Chain buildPushingToViewsChain( metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::make_shared(sink->getHeader())); result_chain.addSource(std::move(sink)); } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a705ae2e013..ee38b7242b1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -889,7 +889,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = DB::Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); if (result_chunk) { auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); @@ -901,7 +901,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = DB::Squashing::squash(squashing.flush()); + Chunk result_chunk = Squashing::squash(squashing.flush()); if (!result_chunk) { return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 63858ce601d..429fd8b67c5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2322,12 +2322,11 @@ String IMergeTreeDataPart::getUniqueId() const return getDataPartStorage().getUniqueId(); } -String IMergeTreeDataPart::getPartBlockIDHash() const +UInt128 IMergeTreeDataPart::getPartBlockIDHash() const { SipHash hash; checksums.computeTotalChecksumDataOnly(hash); - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + return hash.get128(); } String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const @@ -2336,7 +2335,10 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name); if (token.empty()) - return info.partition_id + "_" + getPartBlockIDHash(); + { + const auto hash_value = getPartBlockIDHash(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + } SipHash hash; hash.update(token.data(), token.size()); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 204dfdaad0a..dbb1df3cfe8 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -210,7 +210,7 @@ public: /// Compute part block id for zero level part. Otherwise throws an exception. /// If token is not empty, block id is calculated based on it instead of block data - String getPartBlockIDHash() const; + UInt128 getPartBlockIDHash() const; String getZeroLevelPartBlockID(std::string_view token) const; void setName(const String & new_name); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7bc04c05a1c..4a1163d2317 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -126,7 +126,8 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -167,7 +168,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->isDefined()) { - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } finishDelayedChunk(); @@ -206,7 +207,6 @@ void MergeTreeSink::finishDelayedChunk() if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); - auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5da36b6ee3b..3dbcb5e5bda 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1317,7 +1317,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block block_to_squash = projection.calculate(cur_block, ctx->context); projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - Chunk squashed_chunk = DB::Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); if (squashed_chunk) { auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); @@ -1341,7 +1341,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = DB::Squashing::squash(projection_squash_plan.flush()); + auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); if (squashed_chunk) { auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 228b5c596ab..3677f5b02ab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -374,7 +374,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } } @@ -423,7 +424,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->isDefined()) { - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } finishDelayedChunk(zookeeper); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index ccb6259da00..e36247103c7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1558,7 +1558,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", true, stream_header); + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); }); #endif @@ -1604,7 +1604,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); }); #endif @@ -1630,7 +1630,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Before out", true, stream_header); + return std::make_shared("StorageWindowView: Before out", stream_header); }); #endif From f6a2c3156bd81fba8a48a04df5e5095fb8b5a384 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 2 Jul 2024 15:24:29 +0200 Subject: [PATCH 086/141] rename transform --- .../Transforms/DeduplicationTokenTransforms.cpp | 4 ++-- .../Transforms/DeduplicationTokenTransforms.h | 4 ++-- src/Processors/Transforms/buildPushingToViewsChain.cpp | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index f50e69e730f..6786f76cbef 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -154,7 +154,7 @@ void CheckTokenTransform::transform(Chunk & chunk) } #endif -String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) +String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -165,7 +165,7 @@ String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) } -void DefineSourceWithChunkHashesTransform::transform(Chunk & chunk) +void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 79d168d1000..94287dc4487 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -132,10 +132,10 @@ namespace DeduplicationToken }; - class DefineSourceWithChunkHashesTransform : public ISimpleTransform + class DefineSourceWithChunkHashTransform : public ISimpleTransform { public: - explicit DefineSourceWithChunkHashesTransform(const Block & header_) + explicit DefineSourceWithChunkHashTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8d38396ecd5..312b333ab33 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -547,7 +547,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { @@ -555,7 +555,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (dynamic_cast(storage.get())) { @@ -564,7 +564,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,13 +573,13 @@ Chain buildPushingToViewsChain( metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::make_shared(sink->getHeader())); result_chain.addSource(std::move(sink)); } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) From ce19dc5cd91a1424a172959d62d629646dfd7b38 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 2 Jul 2024 14:37:33 +0000 Subject: [PATCH 087/141] fix test --- .../0_stateless/03161_lightweight_delete_projection.reference | 2 +- .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index 15832d4cdfa..c5a6cbab0bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,2 +1,2 @@ -8888 Alice 50 1231 John 33 +8888 Alice 50 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 70a069df1bc..b189388e356 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -26,6 +26,6 @@ SELECT FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users'); -SELECT * FROM users; +SELECT * FROM users ORDER BY uid; DROP TABLE users; From d2cade4aa38be9f33715d593cb2e0d549c9f565e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 2 Jul 2024 20:11:06 +0200 Subject: [PATCH 088/141] Relax the check in 02982_aggregation_states_destruction --- .../queries/0_stateless/02982_aggregation_states_destruction.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 263a4535c0e..84183606d48 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" From bcf8a93a52204cb80a867c237f302221bb51c272 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 01:34:25 -0400 Subject: [PATCH 089/141] `max_query_length` argument for the fuzzQuery --- .../table-functions/fuzzQuery.md | 3 ++- src/Storages/StorageFuzzQuery.cpp | 20 +++++++++++++------ src/Storages/StorageFuzzQuery.h | 1 + .../03031_table_function_fuzzquery.sql | 4 ++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md index ff8cfd1cd3b..e15f8a40156 100644 --- a/docs/en/sql-reference/table-functions/fuzzQuery.md +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -9,12 +9,13 @@ sidebar_label: fuzzQuery Perturbs the given query string with random variations. ``` sql -fuzzQuery(query[, random_seed]) +fuzzQuery(query[, max_query_length[, random_seed]]) ``` **Arguments** - `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. - `random_seed` (UInt64) - A random seed for producing stable results. **Returned Value** diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 5e29a04427b..229ae1af7c1 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -47,7 +47,7 @@ ColumnPtr FuzzQuerySource::createColumn() size_t data_len = data.size(); /// AST is too long, will start from the original query. - if (data_len > 500) + if (config.max_query_length > 500) { fuzz_base = query; continue; @@ -120,10 +120,11 @@ StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine // Supported signatures: // - // FuzzQuery('query') - // FuzzQuery('query', 'random_seed') - if (engine_args.empty() || engine_args.size() > 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); @@ -131,9 +132,16 @@ StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); configuration.query = std::move(first_arg); - if (engine_args.size() == 2) + if (engine_args.size() >= 2) { const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); if (!literal.value.isNull()) configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 3ae506fdfb8..125ef960e74 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -18,6 +18,7 @@ public: struct Configuration : public StatelessTableEngineConfiguration { String query; + UInt64 max_query_length = 500; UInt64 random_seed = randomSeed(); }; diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql index 5821e2e5111..b26096f7f0e 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -1,5 +1,5 @@ -SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; +SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; SELECT * FROM fuzzQuery('SELECT * FROM ( @@ -15,4 +15,4 @@ FROM ( ) AS r ON l.item_id = r.item_id ORDER BY 1,2,3; -', 8956) LIMIT 10 FORMAT NULL; +', 500, 8956) LIMIT 10 FORMAT NULL; From f2ffd727f002702ab29dff7c2d18ceaaf8e09e6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 11:04:04 +0200 Subject: [PATCH 090/141] Bump From 5f53a73457f0851f78b82e2d5559f4b654eaf6e7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 3 Jul 2024 12:05:49 +0200 Subject: [PATCH 091/141] Fix config merging for from_env with replace overrides Without this patch new test fails with: Exception: Failed to preprocess config '/etc/clickhouse-server/config.xml': Exception: Element has value and does not have 'replace' attribute, can't process from_env substitution. Stack trace: Signed-off-by: Azat Khuzhin --- src/Common/Config/ConfigProcessor.cpp | 1 - .../configs/000-server_overrides.xml | 3 ++ ...subst.xml => 000-users_with_env_subst.xml} | 0 .../configs/010-server_with_env_subst.xml | 3 ++ .../test_config_substitutions/test.py | 30 ++++++++++++++++--- 5 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_config_substitutions/configs/000-server_overrides.xml rename tests/integration/test_config_substitutions/configs/{000-config_with_env_subst.xml => 000-users_with_env_subst.xml} (100%) create mode 100644 tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c9832e8efd5..67d6036aa51 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -316,7 +316,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { - with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } diff --git a/tests/integration/test_config_substitutions/configs/000-server_overrides.xml b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml new file mode 100644 index 00000000000..9335f663d68 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml @@ -0,0 +1,3 @@ + + 10000 + diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml similarity index 100% rename from tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml rename to tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml diff --git a/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml new file mode 100644 index 00000000000..ea91f066a21 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index faceab6fbcd..124dbcaedf7 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -39,9 +39,13 @@ node6 = cluster.add_instance( node7 = cluster.add_instance( "node7", user_configs=[ - "configs/000-config_with_env_subst.xml", + "configs/000-users_with_env_subst.xml", "configs/010-env_subst_override.xml", ], + main_configs=[ + "configs/000-server_overrides.xml", + "configs/010-server_with_env_subst.xml", + ], env_variables={ # overridden with 424242 "MAX_QUERY_SIZE": "121212", @@ -126,9 +130,9 @@ def test_config(start_cluster): ) -def test_config_invalid_overrides(start_cluster): +def test_config_from_env_overrides(start_cluster): node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -156,7 +160,7 @@ def test_config_invalid_overrides(start_cluster): ): node7.query("SYSTEM RELOAD CONFIG") node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -181,6 +185,24 @@ def test_config_invalid_overrides(start_cluster): node7.query("SYSTEM RELOAD CONFIG") +def test_config_merge_from_env_overrides(start_cluster): + assert ( + node7.query( + "SELECT value FROM system.server_settings WHERE name='max_thread_pool_size'" + ) + == "10000\n" + ) + node7.replace_config( + "/etc/clickhouse-server/config.d/010-server_with_env_subst.xml", + """ + + 9000 + +""", + ) + node7.query("SYSTEM RELOAD CONFIG") + + def test_include_config(start_cluster): # assert node4.query("select 1") From 10d48afc20d23043c8e89bd804259b19247dc03c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 13:16:01 +0200 Subject: [PATCH 092/141] token_info is defined always --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3677f5b02ab..dedb4a9ddae 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -297,16 +297,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) String block_dedup_token; auto token_info = chunk.getChunkInfos().get(); - if constexpr (!async_insert) - { - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - } + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); From c08293026570e00f7f4332d7b0d4b3eb646db1d5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 13:43:27 +0200 Subject: [PATCH 093/141] rename to buildPreAndSinkChains --- src/Interpreters/InterpreterInsertQuery.cpp | 6 +++--- src/Interpreters/InterpreterInsertQuery.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 5e8b8601f08..2becea61b3a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -397,7 +397,7 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) { chassert(presink_streams > 0); chassert(sink_streams > 0); @@ -612,7 +612,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & sink_streams_size = 1; } - auto [presink_chains, sink_chains] = buildPreAndSyncChains( + auto [presink_chains, sink_chains] = buildPreAndSinkChains( presink_streams_size, sink_streams_size, table, metadata_snapshot, query_sample_block); @@ -673,7 +673,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query Chain chain; { - auto [presink_chains, sink_chains] = buildPreAndSyncChains( + auto [presink_chains, sink_chains] = buildPreAndSinkChains( 1, 1, table, metadata_snapshot, query_sample_block); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index b06bb9a3db2..894c7c42144 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -79,7 +79,7 @@ private: std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); From 8777363670dcc8775037f7104e90eea05f0fa0b2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:47:45 +0200 Subject: [PATCH 094/141] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 94287dc4487..d6aff9e1370 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -142,7 +142,7 @@ namespace DeduplicationToken String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with heshes from the parts. + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. // But if there is some table with different engine, we still need to define the source of the data in deduplication token // We use that transform to define the source as a hash of entire block in deduplication token void transform(Chunk & chunk) override; From 5c88d5b48ad75cbe3f8e15e428d8d24380c23943 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:47:55 +0200 Subject: [PATCH 095/141] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2becea61b3a..15b9b155d54 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -828,7 +828,13 @@ void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized, false, false, false); + return std::make_unique( + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, + /* no_destination */false, + /* async_insert */false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } From aee1289f2d89b83d5f5255792fe73784ec824ca1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:04 +0200 Subject: [PATCH 096/141] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 15b9b155d54..2581a368272 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -791,12 +791,8 @@ BlockIO InterpreterInsertQuery::execute() res.pipeline.addStorageHolder(table); - StoragePtr inner_table; if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - - if (inner_table) - res.pipeline.addStorageHolder(inner_table); + res.pipeline.addStorageHolder(mv->getTargetTable()); return res; } From f0aa006461dd4118dada9c6262d53fc703d0af82 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:32 +0200 Subject: [PATCH 097/141] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2581a368272..f9b57f530f0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -580,7 +580,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (!settings.insert_deduplication_token.value.empty()) { - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(settings.insert_deduplication_token.value, in_header); }); From c4207e9a6ef7c8ccd5e1c837535268e2f9f04b70 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:45 +0200 Subject: [PATCH 098/141] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f9b57f530f0..333da81ced0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -585,7 +585,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return std::make_shared(settings.insert_deduplication_token.value, in_header); }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header); }); From 913e97b1a5560536bfcdc722812a53395370a435 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 14:05:06 +0200 Subject: [PATCH 099/141] work with review comments --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 6 ++++-- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 6 ++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 333da81ced0..d7f778f6678 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -674,7 +674,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query { auto [presink_chains, sink_chains] = buildPreAndSinkChains( - 1, 1, + /* presink_streams */1, /* sink_streams */1, table, metadata_snapshot, query_sample_block); chain = std::move(presink_chains.front()); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 4a1163d2317..d8cfce1ca99 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -95,6 +95,8 @@ void MergeTreeSink::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); + const bool need_to_define_dedup_token = !token_info->isDefined(); + String block_dedup_token; if (token_info->isDefined()) block_dedup_token = token_info->getToken(); @@ -123,7 +125,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { chassert(temp_part.part); const auto hash_value = temp_part.part->getPartBlockIDHash(); @@ -166,7 +168,7 @@ void MergeTreeSink::consume(Chunk & chunk) }); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { token_info->finishChunkHashes(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index dedb4a9ddae..bbae054fbed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -302,6 +302,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); + const bool need_to_define_dedup_token = !token_info->isDefined(); + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); @@ -368,7 +370,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { chassert(temp_part.part); const auto hash_value = temp_part.part->getPartBlockIDHash(); @@ -419,7 +421,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { token_info->finishChunkHashes(); } From 3a09000e4448c921cc9faefd387ef8b383a89c1a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 14:33:21 +0200 Subject: [PATCH 100/141] remove trailing whitespaces --- src/Interpreters/InterpreterInsertQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d7f778f6678..2cbfc55d008 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -825,10 +825,10 @@ void registerInterpreterInsertQuery(InterpreterFactory & factory) auto create_fn = [] (const InterpreterFactory::Arguments & args) { return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, /* no_destination */false, /* async_insert */false); }; From 4e6bdb15b0c58bd6d3457f21b2f9a493b698904e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 14:35:17 +0200 Subject: [PATCH 101/141] Azure policy --- docker/test/stress/run.sh | 40 +++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 96f8ecb2fab..323944591b1 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -110,6 +110,15 @@ start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="s3_cache" +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="azure_cache" +else + TEMP_POLICY="default" +fi + + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, @@ -135,7 +144,7 @@ clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnabl URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, @@ -161,7 +170,7 @@ clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable U URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, @@ -195,7 +204,7 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) - SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" @@ -216,13 +225,24 @@ export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 configure -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp -mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml \ + | sed "s|
azure
|
azure
default|" \ + > /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml +fi + sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ | sed "s|trace|test|" \ From 5fd36059e48c4377ea526343e7272009d2ccaf2a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 15:28:01 +0200 Subject: [PATCH 102/141] Try disabling background threads --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 023fdcf103a..cc5a391676f 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() From 1c14a458e72bc9554e851c20ffff9bfa03e5446e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 16:11:02 +0200 Subject: [PATCH 103/141] Add profile events for regex cache --- src/Common/ProfileEvents.cpp | 4 ++++ src/Functions/Regexps.h | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..cd5f67fdff2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -239,6 +239,10 @@ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ + M(RegexpGlobalCacheHit, "Number of times we fetched compiled regular expression from the global cache.") \ + M(RegexpGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from the local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from the local cache.") \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index b6bd463212f..fff21fdb941 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -24,6 +24,10 @@ namespace ProfileEvents { extern const Event RegexpCreated; +extern const Event RegexpGlobalCacheHit; +extern const Event RegexpGlobalCacheMiss; +extern const Event RegexpLocalCacheHit; +extern const Event RegexpLocalCacheMiss; } @@ -72,18 +76,28 @@ public: Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE]; if (bucket.regexp == nullptr) [[unlikely]] + { /// insert new entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } else + { if (pattern != bucket.pattern) + { + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); /// replace existing entry bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } + else + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit); + } return bucket.regexp; } private: - constexpr static size_t CACHE_SIZE = 100; /// collision probability + constexpr static size_t CACHE_SIZE = 1000; /// collision probability std::hash hasher; struct Bucket @@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else + { if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance) { /// replace existing entry @@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } + else + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheHit); + } return bucket.regexps; } From 8319d2579789aee45a8e02cac0131e7dc348eedd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 14:43:47 +0000 Subject: [PATCH 104/141] Minor updates --- src/Common/ProfileEvents.cpp | 11 ++++++----- src/Functions/Regexps.h | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index cd5f67fdff2..2e3984f8f10 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -238,11 +238,12 @@ \ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ - M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ - M(RegexpGlobalCacheHit, "Number of times we fetched compiled regular expression from the global cache.") \ - M(RegexpGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression from the global cache.") \ - M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from the local cache.") \ - M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from the local cache.") \ + M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ + M(RegexpWithMultipleNeedlesCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ + \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index fff21fdb941..b317d786fab 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -23,11 +23,11 @@ namespace ProfileEvents { -extern const Event RegexpCreated; -extern const Event RegexpGlobalCacheHit; -extern const Event RegexpGlobalCacheMiss; -extern const Event RegexpLocalCacheHit; -extern const Event RegexpLocalCacheMiss; + extern const Event RegexpWithMultipleNeedlesCreated; + extern const Event RegexpWithMultipleNeedlesGlobalCacheHit; + extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss; + extern const Event RegexpLocalCacheHit; + extern const Event RegexpLocalCacheMiss; } @@ -85,8 +85,8 @@ public: { if (pattern != bucket.pattern) { - ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); /// replace existing entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; } else @@ -97,7 +97,7 @@ public: } private: - constexpr static size_t CACHE_SIZE = 1000; /// collision probability + constexpr static size_t CACHE_SIZE = 1'000; /// collision probability std::hash hasher; struct Bucket @@ -258,7 +258,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message)); } - ProfileEvents::increment(ProfileEvents::RegexpCreated); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated); /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch /// function which is faster than allocating scratch space each time in each thread. @@ -336,7 +336,7 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else @@ -349,11 +349,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheHit); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit); } return bucket.regexps; From 07f51e02eda1c8194da28317e4d8452a5c52fc40 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 16:54:09 +0200 Subject: [PATCH 105/141] Reuse some checks --- programs/CMakeLists.txt | 4 +- programs/keeper/keeper_main.cpp | 266 +----------------------------- programs/main.cpp | 268 +------------------------------ src/Common/Coverage.cpp | 45 ++++++ src/Common/Coverage.h | 5 + src/Common/EnvironmentChecks.cpp | 234 +++++++++++++++++++++++++++ src/Common/EnvironmentChecks.h | 5 + 7 files changed, 297 insertions(+), 530 deletions(-) create mode 100644 src/Common/Coverage.cpp create mode 100644 src/Common/Coverage.h create mode 100644 src/Common/EnvironmentChecks.cpp create mode 100644 src/Common/EnvironmentChecks.h diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index b06290ae352..6b3a0b16624 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -73,9 +73,9 @@ else() endif() if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") + message(STATUS "ClickHouse Keeper: ON") else() - message(STATUS "ClickHouse keeper mode: OFF") + message(STATUS "ClickHouse Keeper: OFF") endif() if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp index ec9b84ce94b..a240f9699f2 100644 --- a/programs/keeper/keeper_main.cpp +++ b/programs/keeper/keeper_main.cpp @@ -1,11 +1,9 @@ -#include #include #include #include #include #include -#include #include #include /// pair @@ -14,6 +12,9 @@ #include "config.h" #include "config_tools.h" +#include +#include + #include #include #include @@ -59,270 +60,9 @@ int printHelp(int, char **) return -1; } - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } } -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - -} - bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) { /// Use app if the first arg 'app' is passed (the arg should be quietly removed) diff --git a/programs/main.cpp b/programs/main.cpp index 61e2bc18ed7..eecbe3a6876 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include @@ -7,7 +5,6 @@ #include #include #include -#include #include #include /// pair @@ -16,6 +13,9 @@ #include "config.h" #include "config_tools.h" + +#include +#include #include #include #include @@ -119,268 +119,6 @@ std::pair clickhouse_short_names[] = {"chc", "client"}, }; - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } -} - - -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - } bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp new file mode 100644 index 00000000000..fa8da1f9e15 --- /dev/null +++ b/src/Common/Coverage.cpp @@ -0,0 +1,45 @@ +#include + +#if defined(SANITIZE_COVERAGE) +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + diff --git a/src/Common/Coverage.h b/src/Common/Coverage.h new file mode 100644 index 00000000000..aa6dd2825ed --- /dev/null +++ b/src/Common/Coverage.h @@ -0,0 +1,5 @@ +#pragma once + +#if defined(SANITIZE_COVERAGE) +void dumpCoverage(); +#endif diff --git a/src/Common/EnvironmentChecks.cpp b/src/Common/EnvironmentChecks.cpp new file mode 100644 index 00000000000..d69e8cbaa3d --- /dev/null +++ b/src/Common/EnvironmentChecks.cpp @@ -0,0 +1,234 @@ +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +namespace +{ + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + +} + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif diff --git a/src/Common/EnvironmentChecks.h b/src/Common/EnvironmentChecks.h new file mode 100644 index 00000000000..6d355a69ff9 --- /dev/null +++ b/src/Common/EnvironmentChecks.h @@ -0,0 +1,5 @@ +#pragma once + +#if !defined(USE_MUSL) +void checkHarmfulEnvironmentVariables(char ** argv); +#endif From 39a371b27de3f9f1ee94f6da0ce1b78ab527d6f0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 15:07:31 +0000 Subject: [PATCH 106/141] Bump vectorscan --- contrib/vectorscan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/vectorscan b/contrib/vectorscan index 38431d11178..4918f81ea3d 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 38431d111781843741a781a57a6381a527d900a4 +Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 From 9a023744a5825d349c1027e1c1d425956a3bc87f Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 11:13:39 -0400 Subject: [PATCH 107/141] fix build --- src/Storages/StorageFuzzQuery.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 229ae1af7c1..6e8f425f8dc 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -1,14 +1,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include @@ -41,10 +39,8 @@ ColumnPtr FuzzQuerySource::createColumn() fuzzer.fuzzMain(new_query); auto fuzzed_text = new_query->formatForErrorMessage(); - WriteBufferFromOwnString out; - formatAST(*new_query, out, false); - auto data = out.str(); - size_t data_len = data.size(); + if (base_before_fuzz == fuzzed_text) + continue; /// AST is too long, will start from the original query. if (config.max_query_length > 500) @@ -53,12 +49,12 @@ ColumnPtr FuzzQuerySource::createColumn() continue; } - IColumn::Offset next_offset = offset + data_len + 1; + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; data_to.resize(next_offset); - std::copy(data.begin(), data.end(), &data_to[offset]); + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); - data_to[offset + data_len] = 0; + data_to[offset + fuzzed_text.size()] = 0; offsets_to[row_num] = next_offset; offset = next_offset; From 6aa2f7d5a0997fd56432f4550b3353e5d7cc6898 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 17:02:00 +0100 Subject: [PATCH 108/141] adjust reference file --- .../0_stateless/02982_aggregation_states_destruction.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index 72749c905a3..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 1 1 +1 From e8701dc4e4b1893dbe507d5180f8367a6d4b7689 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 18:16:26 +0200 Subject: [PATCH 109/141] Fix shutdown in GRPCServer. --- src/Server/GRPCServer.cpp | 102 ++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 10b59751b22..cb36df1efc0 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1735,10 +1735,19 @@ namespace class GRPCServer::Runner { public: - explicit Runner(GRPCServer & owner_) : owner(owner_) {} + explicit Runner(GRPCServer & owner_) : owner(owner_), log(owner.log) {} ~Runner() { + try + { + stop(); + } + catch (...) + { + tryLogCurrentException(log, "~Runner"); + } + if (queue_thread.joinable()) queue_thread.join(); } @@ -1756,13 +1765,27 @@ public: } catch (...) { - tryLogCurrentException("GRPCServer"); + tryLogCurrentException(log, "run"); } }; queue_thread = ThreadFromGlobalPool{runner_function}; } - void stop() { stopReceivingNewCalls(); } + void stop() + { + std::lock_guard lock{mutex}; + should_stop = true; + + if (current_calls.empty()) + { + /// If there are no current calls then we call shutdownQueue() to signal the queue to stop waiting for next events. + /// The following line will make CompletionQueue::Next() stop waiting if the queue is empty and return false instead. + shutdownQueue(); + + /// If there are some current calls then we can't call shutdownQueue() right now because we want to let the current calls finish. + /// In this case function shutdownQueue() will be called later in run(). + } + } size_t getNumCurrentCalls() const { @@ -1789,12 +1812,6 @@ private: [this, call_type](bool ok) { onNewCall(call_type, ok); }); } - void stopReceivingNewCalls() - { - std::lock_guard lock{mutex}; - should_stop = true; - } - void onNewCall(CallType call_type, bool responder_started_ok) { std::lock_guard lock{mutex}; @@ -1827,38 +1844,47 @@ private: void run() { setThreadName("GRPCServerQueue"); - while (true) + + bool ok = false; + void * tag = nullptr; + + while (owner.queue->Next(&tag, &ok)) { - { - std::lock_guard lock{mutex}; - finished_calls.clear(); /// Destroy finished calls. - - /// If (should_stop == true) we continue processing until there is no active calls. - if (should_stop && current_calls.empty()) - { - bool all_responders_gone = std::all_of( - responders_for_new_calls.begin(), responders_for_new_calls.end(), - [](std::unique_ptr & responder) { return !responder; }); - if (all_responders_gone) - break; - } - } - - bool ok = false; - void * tag = nullptr; - if (!owner.queue->Next(&tag, &ok)) - { - /// Queue shutted down. - break; - } - auto & callback = *static_cast(tag); callback(ok); + + std::lock_guard lock{mutex}; + finished_calls.clear(); /// Destroy finished calls. + + /// If (should_stop == true) we continue processing while there are current calls. + if (should_stop && current_calls.empty()) + shutdownQueue(); } + + /// CompletionQueue::Next() returns false if the queue is fully drained and shut down. + } + + /// Shutdown the queue if that isn't done yet. + void shutdownQueue() + { + chassert(should_stop); + if (queue_is_shut_down) + return; + + queue_is_shut_down = true; + + /// Server should be shut down before CompletionQueue. + if (owner.grpc_server) + owner.grpc_server->Shutdown(); + + if (owner.queue) + owner.queue->Shutdown(); } GRPCServer & owner; + LoggerRawPtr log; ThreadFromGlobalPool queue_thread; + bool queue_is_shut_down = false; std::vector> responders_for_new_calls; std::map> current_calls; std::vector> finished_calls; @@ -1876,16 +1902,6 @@ GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & addr GRPCServer::~GRPCServer() { - /// Server should be shutdown before CompletionQueue. - if (grpc_server) - grpc_server->Shutdown(); - - /// Completion Queue should be shutdown before destroying the runner, - /// because the runner is now probably executing CompletionQueue::Next() on queue_thread - /// which is blocked until an event is available or the queue is shutting down. - if (queue) - queue->Shutdown(); - runner.reset(); } From 8b14754005b52bea1422021aac0ed774f82a7946 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 16:29:47 +0000 Subject: [PATCH 110/141] Fix ARM build (upgrade sysroot) --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index 39c4713334f..cc385041b22 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c +Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 From d0e3a6906015b34290d3ab3fdd0ab67716f55e29 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:41:16 +0200 Subject: [PATCH 111/141] make 03008_deduplication_mv_generates_several_blocks_nonreplicated thinner --- .../0_stateless/03008_deduplication.python | 14 +- ...tes_several_blocks_nonreplicated.reference | 256 +++++++++--------- 2 files changed, 135 insertions(+), 135 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 89dbea97667..dd1058518c9 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -390,14 +390,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != 5 ) FROM table_a_b; - SELECT throwIf( count() != 47 ) + SELECT throwIf( count() != 9 ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {47 if args.deduplicate_dst_table else 94} ) + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) FROM table_when_b_even_and_joined; """ else: @@ -406,14 +406,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) FROM table_a_b; - SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 90} ) + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) FROM table_when_b_even_and_joined; """ else: @@ -421,14 +421,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) FROM table_a_b; - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) FROM table_when_b_even_and_joined; """ @@ -451,7 +451,7 @@ def test_mv_generates_several_blocks(parser): ORDER BY (a_join, b); INSERT INTO table_for_join_with SELECT 'joined_' || toString(number), number - FROM numbers(9); + FROM numbers(1); {details_print_for_table_for_join_with} {create_table_a_b_statement} diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference index 76ef4cf6b2c..6e76ec46aa8 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -3,13 +3,13 @@ Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -18,13 +18,13 @@ Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -33,13 +33,13 @@ Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -48,13 +48,13 @@ Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -63,13 +63,13 @@ Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -78,13 +78,13 @@ Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -93,13 +93,13 @@ Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -108,13 +108,13 @@ Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -123,13 +123,13 @@ Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -138,13 +138,13 @@ Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -153,13 +153,13 @@ Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -168,13 +168,13 @@ Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -183,13 +183,13 @@ Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -198,13 +198,13 @@ Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -213,13 +213,13 @@ Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -228,13 +228,13 @@ Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -243,13 +243,13 @@ Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -258,13 +258,13 @@ Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -273,13 +273,13 @@ Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -288,13 +288,13 @@ Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -303,13 +303,13 @@ Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -318,13 +318,13 @@ Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -333,13 +333,13 @@ Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -348,13 +348,13 @@ Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -363,13 +363,13 @@ Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -378,13 +378,13 @@ Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -393,13 +393,13 @@ Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -408,13 +408,13 @@ Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -423,13 +423,13 @@ Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -438,13 +438,13 @@ Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -453,13 +453,13 @@ Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -468,13 +468,13 @@ Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -483,13 +483,13 @@ Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -498,13 +498,13 @@ Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -513,13 +513,13 @@ Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -528,13 +528,13 @@ Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -543,13 +543,13 @@ Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -558,13 +558,13 @@ Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -573,13 +573,13 @@ Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -588,13 +588,13 @@ Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -603,13 +603,13 @@ Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -618,13 +618,13 @@ Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -633,13 +633,13 @@ Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -648,13 +648,13 @@ Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -663,13 +663,13 @@ Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -678,13 +678,13 @@ Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -693,13 +693,13 @@ Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -708,13 +708,13 @@ Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -723,13 +723,13 @@ Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -738,13 +738,13 @@ Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -753,13 +753,13 @@ Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -768,13 +768,13 @@ Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -783,13 +783,13 @@ Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -798,13 +798,13 @@ Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -813,13 +813,13 @@ Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -828,13 +828,13 @@ Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -843,13 +843,13 @@ Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -858,13 +858,13 @@ Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -873,13 +873,13 @@ Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -888,13 +888,13 @@ Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -903,13 +903,13 @@ Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -918,13 +918,13 @@ Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -933,13 +933,13 @@ Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -948,13 +948,13 @@ Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK From 5875694669acc70b07ea5422902f6a6549f4f62b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 18:48:32 +0200 Subject: [PATCH 112/141] Fix includes --- programs/main.cpp | 1 - src/Common/Coverage.cpp | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/programs/main.cpp b/programs/main.cpp index eecbe3a6876..02ea1471108 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,7 +13,6 @@ #include "config.h" #include "config_tools.h" - #include #include #include diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp index fa8da1f9e15..a21efe62fb6 100644 --- a/src/Common/Coverage.cpp +++ b/src/Common/Coverage.cpp @@ -1,6 +1,26 @@ #include #if defined(SANITIZE_COVERAGE) + +#include +#include + +#include +#include + +#include +#include + +#include + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + __attribute__((no_sanitize("coverage"))) void dumpCoverage() { From 438fd899236b15468828c3dec751081fd07325d6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:59:07 +0200 Subject: [PATCH 113/141] adjust 03008_deduplication_mv_generates_several_blocks_replicated --- ...erates_several_blocks_replicated.reference | 256 +++++++++--------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference index a84539df16b..a25e8713c61 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -3,13 +3,13 @@ Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -18,13 +18,13 @@ Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -33,13 +33,13 @@ Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -48,13 +48,13 @@ Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -63,13 +63,13 @@ Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -78,13 +78,13 @@ Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -93,13 +93,13 @@ Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -108,13 +108,13 @@ Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -123,13 +123,13 @@ Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -138,13 +138,13 @@ Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -153,13 +153,13 @@ Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -168,13 +168,13 @@ Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -183,13 +183,13 @@ Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -198,13 +198,13 @@ Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -213,13 +213,13 @@ Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -228,13 +228,13 @@ Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -243,13 +243,13 @@ Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -258,13 +258,13 @@ Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -273,13 +273,13 @@ Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -288,13 +288,13 @@ Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -303,13 +303,13 @@ Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -318,13 +318,13 @@ Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -333,13 +333,13 @@ Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -348,13 +348,13 @@ Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -363,13 +363,13 @@ Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -378,13 +378,13 @@ Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -393,13 +393,13 @@ Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -408,13 +408,13 @@ Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -423,13 +423,13 @@ Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -438,13 +438,13 @@ Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -453,13 +453,13 @@ Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -468,13 +468,13 @@ Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -483,13 +483,13 @@ Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -498,13 +498,13 @@ Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -513,13 +513,13 @@ Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -528,13 +528,13 @@ Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -543,13 +543,13 @@ Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -558,13 +558,13 @@ Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -573,13 +573,13 @@ Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -588,13 +588,13 @@ Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -603,13 +603,13 @@ Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -618,13 +618,13 @@ Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -633,13 +633,13 @@ Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -648,13 +648,13 @@ Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -663,13 +663,13 @@ Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -678,13 +678,13 @@ Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -693,13 +693,13 @@ Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -708,13 +708,13 @@ Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -723,13 +723,13 @@ Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -738,13 +738,13 @@ Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -753,13 +753,13 @@ Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -768,13 +768,13 @@ Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -783,13 +783,13 @@ Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -798,13 +798,13 @@ Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -813,13 +813,13 @@ Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -828,13 +828,13 @@ Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -843,13 +843,13 @@ Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -858,13 +858,13 @@ Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -873,13 +873,13 @@ Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -888,13 +888,13 @@ Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -903,13 +903,13 @@ Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -918,13 +918,13 @@ Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -933,13 +933,13 @@ Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -948,13 +948,13 @@ Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK From d688d4114c6ac915aeb584587b985006a39c9f15 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 18:16:45 +0100 Subject: [PATCH 114/141] Rename events --- src/Common/ProfileEvents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 2e3984f8f10..acd29a91450 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -239,8 +239,8 @@ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ - M(RegexpWithMultipleNeedlesCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ - M(RegexpWithMultipleNeedlesCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ \ From 2ec0a9cfeaf850c31f1567da1884ad77fae0bcdd Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 19:25:44 +0200 Subject: [PATCH 115/141] Fix test test_grpc_protocol/test.py::test_progress --- tests/integration/test_grpc_protocol/test.py | 60 ++++++++------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..1e4ae7f0288 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -369,47 +369,33 @@ def test_progress(): "SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True, ) - results = list(results) - for result in results: - result.time_zone = "" - result.query_id = "" - # print(results) - # Note: We can't convert those messages to string like `results = str(results)` and then compare it as a string - # because str() can serialize a protobuf message with any order of fields. - expected_results = [ - clickhouse_grpc_pb2.Result( - output_format="TabSeparated", - progress=clickhouse_grpc_pb2.Progress( - read_rows=2, read_bytes=16, total_rows_to_read=8 - ), - ), - clickhouse_grpc_pb2.Result(output=b"0\t0\n1\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"2\t0\n3\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"4\t0\n5\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"6\t0\n7\t0\n"), - clickhouse_grpc_pb2.Result( - stats=clickhouse_grpc_pb2.Stats( - rows=8, - blocks=4, - allocated_bytes=1092, - ) - ), + # Note: We can't compare results using a statement like `assert results == expected_results` + # because `results` can come in slightly different order. + # So we compare `outputs` and `progresses` separately and not `results` as a whole. + + outputs = [i.output for i in results if i.output] + progresses = [i.progress for i in results if i.HasField("progress")] + + # print(outputs) + # print(progresses) + + expected_outputs = [ + b"0\t0\n1\t0\n", + b"2\t0\n3\t0\n", + b"4\t0\n5\t0\n", + b"6\t0\n7\t0\n", ] - # Stats data can be returned, which broke the test - results = [i for i in results if not isinstance(i, clickhouse_grpc_pb2.Stats)] + expected_progresses = [ + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16, total_rows_to_read=8), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + ] - assert results == expected_results + assert outputs == expected_outputs + assert progresses == expected_progresses def test_session_settings(): From 9737c5bab4779708e6a51bbb6739d8da34fc87bd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 17:33:45 +0000 Subject: [PATCH 116/141] Probably fix tsan assert in test_mysql_killed_while_insert_8_0 --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index 5d81fa7068f..ee2bb8513b2 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c +Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08 From 87dda31a2c234a7596b661ad5d63f74f3124ea25 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 20:36:19 +0200 Subject: [PATCH 117/141] Add test for GRPCServer's shutdown. --- tests/integration/test_grpc_protocol/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..1ace5b361b8 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -39,6 +39,7 @@ node = cluster.add_instance( "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") }, ipv6_address=IPV6_ADDRESS, + stay_alive=True, ) main_channel = None @@ -763,3 +764,9 @@ def test_opentelemetry_context_propagation(): ) == "SELECT 1\tsome custom state\n" ) + + +def test_restart(): + assert query("SELECT 1") == "1\n" + node.restart_clickhouse() + assert query("SELECT 2") == "2\n" From 877445c88d71caffc79e6e7cd956298e84e9867e Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 19:21:12 +0000 Subject: [PATCH 118/141] Fix reading dynamic subcolumns from altered Memory table --- src/Interpreters/getColumnFromBlock.cpp | 30 +++++++++++++++++++ src/Interpreters/getColumnFromBlock.h | 1 + .../QueryPlan/ReadFromMemoryStorageStep.cpp | 20 +++++++++---- ...3200_memory_engine_alter_dynamic.reference | 10 +++++++ .../03200_memory_engine_alter_dynamic.sql | 7 +++++ 5 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference create mode 100644 tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 972e109afb3..2e70a58b5a1 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -31,6 +31,36 @@ ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & req return castColumn({elem_column, elem_type, ""}, requested_column.type); } +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn) +{ + const auto * elem = block.findByName(requested_subcolumn.getNameInStorage()); + if (!elem) + return nullptr; + + auto subcolumn_name = requested_subcolumn.getSubcolumnName(); + /// If requested subcolumn is dynamic, we should first perform cast and then + /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. + if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + { + auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); + auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); + auto elem_type = requested_column_type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return elem_column; + } + + auto elem_column = elem->type->tryGetSubcolumn(subcolumn_name, elem->column); + auto elem_type = elem->type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return castColumn({elem_column, elem_type, ""}, requested_subcolumn.type); +} + ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column) { auto result_column = tryGetColumnFromBlock(block, requested_column); diff --git a/src/Interpreters/getColumnFromBlock.h b/src/Interpreters/getColumnFromBlock.h index 26500cfdd17..737ce9db555 100644 --- a/src/Interpreters/getColumnFromBlock.h +++ b/src/Interpreters/getColumnFromBlock.h @@ -9,5 +9,6 @@ namespace DB ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn); } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2e7693b1b36..6dc0c021a14 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -30,12 +30,15 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : ISource(storage_snapshot->getSampleBlockForColumns(column_names_)) - , column_names_and_types(storage_snapshot->getColumnsByNames( + , requested_column_names_and_types(storage_snapshot->getColumnsByNames( GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), column_names_)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) { + auto all_column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects()); + for (const auto & [name, type] : all_column_names_and_types) + all_names_to_types[name] = type; } String getName() const override { return "Memory"; } @@ -59,17 +62,20 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - size_t num_columns = column_names_and_types.size(); + size_t num_columns = requested_column_names_and_types.size(); columns.reserve(num_columns); - auto name_and_type = column_names_and_types.begin(); + auto name_and_type = requested_column_names_and_types.begin(); for (size_t i = 0; i < num_columns; ++i) { - columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); + if (name_and_type->isSubcolumn()) + columns.emplace_back(tryGetSubcolumnFromBlock(src, all_names_to_types[name_and_type->getNameInStorage()], *name_and_type)); + else + columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); ++name_and_type; } - fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, {}, nullptr); + fillMissingColumns(columns, src.rows(), requested_column_names_and_types, requested_column_names_and_types, {}, nullptr); assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; })); return Chunk(std::move(columns), src.rows()); @@ -88,7 +94,9 @@ private: } } - const NamesAndTypesList column_names_and_types; + const NamesAndTypesList requested_column_names_and_types; + /// Map (name -> type) for all columns from the storage header. + std::unordered_map all_names_to_types; size_t execution_index = 0; std::shared_ptr data; std::shared_ptr> parallel_execution_index; diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference new file mode 100644 index 00000000000..6d2c1334d6e --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -0,0 +1,10 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql new file mode 100644 index 00000000000..95823283812 --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -0,0 +1,7 @@ +set allow_experimental_dynamic_type=1; +create table test (d Dynamic) engine=Memory; +insert into table test select * from numbers(5); +alter table test modify column d Dynamic(max_types=1); +select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=1; + From 1b6ef06a91cf31b2aa8dbe5e3494ec63d602e4c9 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 21:14:28 +0100 Subject: [PATCH 119/141] review fixes --- src/Common/CgroupsMemoryUsageObserver.cpp | 26 ++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 33393a8b9c6..d36c7fd08aa 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,5 +1,3 @@ -#include -#include #include #if defined(OS_LINUX) @@ -14,7 +12,9 @@ #include #include +#include #include +#include #include #include "config.h" @@ -59,9 +59,9 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke } assertChar(' ', buf); - uint64_t mem_usage = 0; - readIntText(mem_usage, buf); - return mem_usage; + uint64_t value = 0; + readIntText(value, buf); + return value; } throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); @@ -96,10 +96,12 @@ struct CgroupsV2Reader : ICgroupsReader current_buf.rewind(); stat_buf.rewind(); - uint64_t mem_usage = 0; + int64_t mem_usage = 0; /// memory.current contains a single number + /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 readIntText(mem_usage, current_buf); mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + chassert(mem_usage >= 0, "Negative memory usage"); return mem_usage; } @@ -153,13 +155,13 @@ std::optional getCgroupsV1Path() std::pair getCgroupsPath() { - auto v2_file_name = getCgroupsV2Path(); - if (v2_file_name.has_value()) - return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - auto v1_file_name = getCgroupsV1Path(); - if (v1_file_name.has_value()) - return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); } From 5fb0fa3c3d3e611944f83ac06aaac2d6e5c0d0db Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Jul 2024 20:21:37 +0000 Subject: [PATCH 120/141] Automatic style fix --- tests/integration/test_memory_limit_observer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index 2840c830396..0eda165b1d2 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -76,4 +76,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 From eb7ab5128d009fe89ef1994e2f32ea10bea900b1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 21:07:59 +0000 Subject: [PATCH 121/141] Clean-up custom LLVM 15 patches --- contrib/llvm-project | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/llvm-project b/contrib/llvm-project index d2142eed980..2a8967b60cb 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f +Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc From 33b7afc1b45e0a493f4139dae14025e4e3613c29 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 09:02:33 +0000 Subject: [PATCH 122/141] Bump vectorscan to 5.4.11 --- contrib/icu-cmake/CMakeLists.txt | 2 +- contrib/vectorscan | 2 +- contrib/vectorscan-cmake/CMakeLists.txt | 27 +++++--------------- contrib/vectorscan-cmake/common/hs_version.h | 6 ++++- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index a54bd8c1de2..0a650f2bcc0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ else () endif () if (NOT ENABLE_ICU) - message(STATUS "Not using icu") + message(STATUS "Not using ICU") return() endif() diff --git a/contrib/vectorscan b/contrib/vectorscan index 4918f81ea3d..d29730e1cb9 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 +Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809 diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index d6c626c1612..35d5fd3dc82 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,11 +1,8 @@ -# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. - +# Vectorscan is drop-in replacement for Hyperscan. if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) - option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) + option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES}) endif() -# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine - if (NOT ENABLE_VECTORSCAN) message (STATUS "Not using vectorscan") return() @@ -272,34 +269,24 @@ if (ARCH_AARCH64) ) endif() -# TODO -# if (ARCH_PPC64LE) -# list(APPEND SRCS -# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" -# ) -# endif() - add_library (_vectorscan ${SRCS}) -target_compile_options (_vectorscan PRIVATE - -fno-sanitize=undefined # assume the library takes care of itself - -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system -) # library has too much debug information if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options (_vectorscan PRIVATE -g0) endif() -# Include version header manually generated by running the original build system -target_include_directories (_vectorscan SYSTEM PRIVATE common) +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Makes the version header visible. It was generated by running the native build system manually. +# Please update whenever you update vectorscan. +target_include_directories (_vectorscan SYSTEM PUBLIC common) # vectorscan inherited some patched in-source versions of boost headers to fix a bug in # boost 1.69. This bug has been solved long ago but vectorscan's source code still # points to the patched versions, so include it here. target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") -target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - # Include platform-specific config header generated by manually running the original build system # Please regenerate these files if you update vectorscan. diff --git a/contrib/vectorscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h index 8315b44fb2a..3d266484095 100644 --- a/contrib/vectorscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,8 +32,12 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.4.7 2022-06-20" +#define HS_VERSION_STRING "5.4.11 2024-07-04" #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 11 + #endif /* HS_VERSION_H_C6428FAF8E3713 */ From 4543ae3d6490a660c4df8b15bde0345735a540e0 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:51:22 +0200 Subject: [PATCH 123/141] Update test --- tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql index 95823283812..a01a595dbb5 100644 --- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -3,5 +3,5 @@ create table test (d Dynamic) engine=Memory; insert into table test select * from numbers(5); alter table test modify column d Dynamic(max_types=1); select d.UInt64 from test settings allow_experimental_analyzer=1; -select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=0; From 6ea4c101214edf678743833856e09f717f672c67 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 27 Jun 2024 17:19:52 +0200 Subject: [PATCH 124/141] Done --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..9da0b297e9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ From 6f89c4b9328b587d2342ca172da4eaebe0611a1c Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 28 Jun 2024 21:09:36 +0000 Subject: [PATCH 125/141] Bump the minimal version to keep compatibility --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file From d62454714b1e8ea760c6d6baa8df4ec185b8750c Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 1 Jul 2024 17:52:35 +0200 Subject: [PATCH 126/141] Make test error visible --- tests/ci/integration_tests_runner.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 87f721cfde7..7802dfa3c52 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -434,7 +434,14 @@ class ClickhouseIntegrationTestsRunner: "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + try: + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + except subprocess.CalledProcessError as ex: + print("ERROR: Setting test plan failed. Output:") + with open(out_file_full, 'r') as file: + for line in file: + print(" " + line, end='') + raise ex all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: From e2553454ecbe890cecf90995565d805287ca0703 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 1 Jul 2024 16:02:06 +0000 Subject: [PATCH 127/141] Automatic style fix --- tests/ci/integration_tests_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 7802dfa3c52..a1c33cf22d9 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -438,9 +438,9 @@ class ClickhouseIntegrationTestsRunner: subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) except subprocess.CalledProcessError as ex: print("ERROR: Setting test plan failed. Output:") - with open(out_file_full, 'r') as file: + with open(out_file_full, "r") as file: for line in file: - print(" " + line, end='') + print(" " + line, end="") raise ex all_tests = set() From 782115efea35a60cd9627faf22c1684ad54a551d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 18:06:47 +0200 Subject: [PATCH 128/141] Very bad change --- tests/integration/test_distributed_inter_server_secret/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..3e656c9d776 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -28,7 +28,7 @@ def make_instance(name, *args, **kwargs): # DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" +assert CLICKHOUSE_CI_MIN_TESTED_VERSION <= "23.3" # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user From 2c37cc048c16a90d972c0f1c2b9c41727174cff3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 22:11:44 +0200 Subject: [PATCH 129/141] Style --- tests/ci/integration_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index a1c33cf22d9..21f16d995a4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -438,7 +438,7 @@ class ClickhouseIntegrationTestsRunner: subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) except subprocess.CalledProcessError as ex: print("ERROR: Setting test plan failed. Output:") - with open(out_file_full, "r") as file: + with open(out_file_full, "r", encoding="utf-8") as file: for line in file: print(" " + line, end="") raise ex From 6fcd0eed06af3a02d1e26f182eb7f2bbf16d6471 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 11:40:04 +0000 Subject: [PATCH 130/141] Remove tests which are no longer relevant --- .../test.py | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 3e656c9d776..457590ac851 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -26,10 +26,6 @@ def make_instance(name, *args, **kwargs): **kwargs, ) - -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION <= "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +34,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +415,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From 910065e42745e3a1299a596462d6197e4c36a50f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Jul 2024 11:48:28 +0000 Subject: [PATCH 131/141] Automatic style fix --- tests/integration/test_distributed_inter_server_secret/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 457590ac851..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -26,6 +26,7 @@ def make_instance(name, *args, **kwargs): **kwargs, ) + # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( From dd3eb538f6a0788365ff62e15ab167b28f3d76a1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 22:51:21 +0000 Subject: [PATCH 132/141] Better --- src/Client/HedgedConnections.cpp | 6 ++ src/Client/MultiplexedConnections.cpp | 6 ++ src/Server/TCPHandler.cpp | 7 ++ .../test_analyzer_compatibility/__init__.py | 0 .../configs/remote_servers.xml | 17 ++++ .../test_analyzer_compatibility/test.py | 79 +++++++++++++++++++ 6 files changed, 115 insertions(+) create mode 100644 tests/integration/test_analyzer_compatibility/__init__.py create mode 100644 tests/integration/test_analyzer_compatibility/configs/remote_servers.xml create mode 100644 tests/integration/test_analyzer_compatibility/test.py diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8c993f906e0..51cbe6f3d6f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -195,6 +195,12 @@ void HedgedConnections::sendQuery( modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..99bdd706d8b 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -150,6 +150,12 @@ void MultiplexedConnections::sendQuery( } } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; size_t num_replicas = replica_states.size(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a522a3f8782..cfb41be0c27 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1890,6 +1890,13 @@ void TCPHandler::receiveQuery() /// /// Settings /// + + /// FIXME: Remove when allow_experimental_analyzer will become obsolete. + /// Even if allow_experimental_analyzer setting wasn't explicitly changed on the initiator server, it might be disabled there + /// So we just force ourselves to act in the same way. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + passed_settings.set("allow_experimental_analyzer", static_cast(passed_settings.allow_experimental_analyzer)); + auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) diff --git a/tests/integration/test_analyzer_compatibility/__init__.py b/tests/integration/test_analyzer_compatibility/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..0a50dab7fd3 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + current + 9000 + + + backward + 9000 + + + + + diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py new file mode 100644 index 00000000000..0ba7f248606 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -0,0 +1,79 @@ +import uuid + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT = "24.2" + +cluster = ClickHouseCluster(__file__) +# Here analyzer is enabled by default +current = cluster.add_instance( + "current", + main_configs=["configs/remote_servers.xml"], +) +# Here analyzer is disabled by default +backward = cluster.add_instance( + "backward", + use_old_analyzer=True, + main_configs=["configs/remote_servers.xml"], + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT, + with_installed_binary=True, +) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_two_new_versions(start_cluster): + # Two new versions (both know about the analyzer) + # One have it enabled by default, another one - disabled. + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + query_id = str(uuid.uuid4()) + current.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", query_id=query_id) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert current.query(""" +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""") == TSV([["backward", "true"], ["current", "true"]]) + + # Should be enabled everywhere + analyzer_enabled = current.query(f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""") + + assert TSV(analyzer_enabled) == TSV("1") + + query_id = str(uuid.uuid4()) + backward.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", query_id=query_id) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert backward.query(""" +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""") == TSV([["backward", "false"], ["current", "false"]]) + + # Should be disabled everywhere + analyzer_enabled = backward.query(f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""") + + assert TSV(analyzer_enabled) == TSV("0") From 7bd283764cc7c9350c376c3dc4f4b5c1bba0deee Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Jul 2024 22:59:22 +0000 Subject: [PATCH 133/141] Automatic style fix --- .../test_analyzer_compatibility/test.py | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py index 0ba7f248606..d4ded420c61 100644 --- a/tests/integration/test_analyzer_compatibility/test.py +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -22,6 +22,7 @@ backward = cluster.add_instance( with_installed_binary=True, ) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -39,41 +40,61 @@ def test_two_new_versions(start_cluster): backward.query("SYSTEM FLUSH LOGS") query_id = str(uuid.uuid4()) - current.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", query_id=query_id) + current.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", + query_id=query_id, + ) current.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS") - assert current.query(""" + assert ( + current.query( + """ SELECT hostname() AS h, getSetting('allow_experimental_analyzer') FROM clusterAllReplicas('test_cluster_mixed', system.one) -ORDER BY h;""") == TSV([["backward", "true"], ["current", "true"]]) +ORDER BY h;""" + ) + == TSV([["backward", "true"], ["current", "true"]]) + ) # Should be enabled everywhere - analyzer_enabled = current.query(f""" + analyzer_enabled = current.query( + f""" SELECT DISTINCT Settings['allow_experimental_analyzer'] FROM clusterAllReplicas('test_cluster_mixed', system.query_log) -WHERE initial_query_id = '{query_id}';""") +WHERE initial_query_id = '{query_id}';""" + ) assert TSV(analyzer_enabled) == TSV("1") query_id = str(uuid.uuid4()) - backward.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", query_id=query_id) + backward.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", + query_id=query_id, + ) current.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS") - assert backward.query(""" + assert ( + backward.query( + """ SELECT hostname() AS h, getSetting('allow_experimental_analyzer') FROM clusterAllReplicas('test_cluster_mixed', system.one) -ORDER BY h;""") == TSV([["backward", "false"], ["current", "false"]]) +ORDER BY h;""" + ) + == TSV([["backward", "false"], ["current", "false"]]) + ) # Should be disabled everywhere - analyzer_enabled = backward.query(f""" + analyzer_enabled = backward.query( + f""" SELECT DISTINCT Settings['allow_experimental_analyzer'] FROM clusterAllReplicas('test_cluster_mixed', system.query_log) -WHERE initial_query_id = '{query_id}';""") +WHERE initial_query_id = '{query_id}';""" + ) assert TSV(analyzer_enabled) == TSV("0") From d57375181d7de82f28ce5fcd40580cf04c2411b8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 15:49:52 +0000 Subject: [PATCH 134/141] Better --- src/Core/Settings.h | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test.py | 35 ++++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9da0b297e9e..f6d282792db 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 34f5c28fef8..544b06cca1b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" # to create docker-compose env file diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 7ecb2cda257..50d7be4d11e 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) @@ -27,6 +27,9 @@ def make_instance(name, *args, **kwargs): ) +# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits +assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" + # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -35,6 +38,14 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) +backward = make_instance( + "backward", + main_configs=["configs/remote_servers_backward.xml"], + image="clickhouse/clickhouse-server", + # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, + with_installed_binary=True, +) users = pytest.mark.parametrize( "user,password", @@ -416,6 +427,28 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) +@users +def test_user_secure_cluster_with_backward(user, password): + id_ = "with-backward-query-dist_secure-" + user + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password + ) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + +@users +def test_user_secure_cluster_from_backward(user, password): + id_ = "from-backward-query-dist_secure-" + user + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + assert n1.contains_in_log( + "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." + ) + + def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From fcabefa8f3e5a86aad6f5c2b79ef8eabbc349b9d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 16:42:01 +0000 Subject: [PATCH 135/141] Automatically disabling --- src/Server/TCPHandler.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index cfb41be0c27..443cc99475f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1892,10 +1892,13 @@ void TCPHandler::receiveQuery() /// /// FIXME: Remove when allow_experimental_analyzer will become obsolete. - /// Even if allow_experimental_analyzer setting wasn't explicitly changed on the initiator server, it might be disabled there - /// So we just force ourselves to act in the same way. - if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - passed_settings.set("allow_experimental_analyzer", static_cast(passed_settings.allow_experimental_analyzer)); + /// Analyzer became Beta in 24.3 and started to be enabled by default. + /// We have to disable it for ourselves to make sure we don't have different settings on + /// different servers. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && client_info.getVersionNumber() < VersionNumber(23, 3, 0) + && !passed_settings.allow_experimental_analyzer.changed) + passed_settings.set("allow_experimental_analyzer", false); auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; From fe6a875c7473d814011f4ae202942232e0801427 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 21:51:40 +0000 Subject: [PATCH 136/141] Make the setting back IMPORTANT + fix build --- src/Core/Settings.h | 2 +- src/Server/TCPHandler.cpp | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test.py | 35 +------------------ 4 files changed, 4 insertions(+), 37 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..9da0b297e9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 443cc99475f..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1875,7 +1875,7 @@ void TCPHandler::receiveQuery() #endif } - query_context = session->makeQueryContext(std::move(client_info)); + query_context = session->makeQueryContext(client_info); /// Sets the default database if it wasn't set earlier for the session context. if (is_interserver_mode && !default_database.empty()) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -27,9 +27,6 @@ def make_instance(name, *args, **kwargs): ) -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +35,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +416,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From c93d8cbb66ad7ae5a1e9c4f46f0c351944ff04e9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 4 Jul 2024 13:57:47 +0200 Subject: [PATCH 137/141] Fixes --- contrib/jemalloc-cmake/CMakeLists.txt | 6 +++++- programs/keeper/Keeper.cpp | 5 +++++ programs/server/Server.cpp | 5 +++++ src/Common/Jemalloc.cpp | 26 +++++++++++++++++++++++++- src/Common/Jemalloc.h | 4 ++++ 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index cc5a391676f..38ebcc8f680 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,11 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + else() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + endif() else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..fc3778593a6 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -277,6 +279,9 @@ HTTPContextPtr httpContext() int Keeper::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif Poco::Logger * log = &logger(); UseSSL use_ssl; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..1277249b462 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -656,6 +657,10 @@ static void initializeAzureSDKLogger( int Server::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif + Stopwatch startup_watch; Poco::Logger * log = &logger(); diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index fbe2f62c944..d7cc246db6a 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -46,6 +46,20 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); @@ -58,7 +72,7 @@ void setJemallocProfileActive(bool value) return; } - mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + setJemallocValue("prof.active", value); LOG_TRACE(getLogger("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); } @@ -84,6 +98,16 @@ std::string flushJemallocProfile(const std::string & file_prefix) return profile_dump_path; } +void setJemallocBackgroundThreads(bool enabled) +{ + setJemallocValue("background_thread", enabled); +} + +void setJemallocMaxBackgroundThreads(size_t max_threads) +{ + setJemallocValue("max_background_threads", max_threads); +} + } #endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 80ff0f1a319..499a906fd3d 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -17,6 +17,10 @@ void setJemallocProfileActive(bool value); std::string flushJemallocProfile(const std::string & file_prefix); +void setJemallocBackgroundThreads(bool enabled); + +void setJemallocMaxBackgroundThreads(size_t max_threads); + } #endif From da5f3c1efd940488413b34d3e5f8855460f0ce80 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 13:01:57 +0000 Subject: [PATCH 138/141] Fix test --- .../test_functions.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 758dda655da..e5023c062ff 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -130,10 +130,13 @@ def test_string_functions(start_cluster): functions = map(lambda x: x.strip(), functions) excludes = [ + # The argument of this function is not a seed, but an arbitrary expression needed for bypassing common subexpression elimination. "rand", "rand64", "randConstant", + "randCanonical", "generateUUIDv4", + "generateULID", # Syntax error otherwise "position", "substring", @@ -153,6 +156,16 @@ def test_string_functions(start_cluster): "tryBase64Decode", # Removed in 23.9 "meiliMatch", + # These functions require more than one argument. + "parseDateTimeInJodaSyntaxOrZero", + "parseDateTimeInJodaSyntaxOrNull", + "parseDateTimeOrNull", + "parseDateTimeOrZero", + "parseDateTime", + # The argument is effectively a disk name (and we don't have one with name foo) + "filesystemUnreserved", + "filesystemCapacity", + "filesystemAvailable", ] functions = filter(lambda x: x not in excludes, functions) @@ -205,6 +218,9 @@ def test_string_functions(start_cluster): # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", + # String foo is obviously not a valid IP address. + "CANNOT_PARSE_IPV4", + "CANNOT_PARSE_IPV6", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) From dbfe6323821739af4c8856e799cb5d861377439d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 13:11:13 +0000 Subject: [PATCH 139/141] Correct the test to exclude farmHash for now --- tests/integration/test_backward_compatibility/test_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index e5023c062ff..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -166,6 +166,8 @@ def test_string_functions(start_cluster): "filesystemUnreserved", "filesystemCapacity", "filesystemAvailable", + # Exclude it for now. Looks like the result depends on the build type. + "farmHash64", ] functions = filter(lambda x: x not in excludes, functions) From 24ff0f601d5b8d474429d67b5ed8702c662c58ec Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 4 Jul 2024 17:15:32 +0200 Subject: [PATCH 140/141] update keeper bench example config file --- utils/keeper-bench/example.yaml | 67 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index e800e923482..c3a62a01eac 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -18,45 +18,46 @@ connections: host: "localhost:9181" -generator: - setup: +setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" node: - name: "test3" + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 node: - name: "test_create" - node: - name: "test4" - node: - name: "test" - data: "somedata" - node: - repeat: 4 - name: - random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 + repeat: 2 node: repeat: 2 - node: - repeat: 2 - name: - random_string: - size: 12 name: random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 - node: - name: "test2" - data: "somedata" + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + +generator: requests: create: path: "/test_create" From 6dd13dd34ab397d5e18d01820a064f18ed25595a Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 4 Jul 2024 17:59:07 +0200 Subject: [PATCH 141/141] fix clean-up process --- utils/keeper-bench/Runner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 5ae4c7a0b1c..587e015b340 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1311,9 +1311,9 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa while (!children_span.empty()) { Coordination::Requests ops; - for (size_t i = 0; i < 1000 && !children.empty(); ++i) + for (size_t i = 0; i < 1000 && !children_span.empty(); ++i) { - removeRecursive(zookeeper, fs::path(path) / children.back()); + removeRecursive(zookeeper, fs::path(path) / children_span.back()); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); children_span = children_span.subspan(0, children_span.size() - 1); }