From a1c83e2f51117a69d484c7ae7884c3bc5dd98129 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 23 Jan 2024 15:26:04 +0100 Subject: [PATCH] Revert "Allow to attach partition from table with different partition expression when destination partition expression doesn't re-partition" --- .../statements/alter/partition.md | 2 +- src/Interpreters/MonotonicityCheckVisitor.h | 102 +--- src/Interpreters/applyFunction.cpp | 43 -- src/Interpreters/applyFunction.h | 16 - src/Parsers/queryToString.cpp | 5 - src/Parsers/queryToString.h | 1 - src/Storages/MergeTree/IMergeTreeDataPart.cpp | 37 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 - src/Storages/MergeTree/KeyCondition.cpp | 85 ++- src/Storages/MergeTree/MergeTreeData.cpp | 350 ++++++++----- src/Storages/MergeTree/MergeTreeData.h | 18 - .../MergeTree/MergeTreeDataPartCloner.cpp | 320 ------------ .../MergeTree/MergeTreeDataPartCloner.h | 43 -- src/Storages/MergeTree/MergeTreePartition.cpp | 39 -- src/Storages/MergeTree/MergeTreePartition.h | 10 +- ...ergeTreePartitionCompatibilityVerifier.cpp | 91 ---- .../MergeTreePartitionCompatibilityVerifier.h | 30 -- ...TreePartitionGlobalMinMaxIdxCalculator.cpp | 25 - ...geTreePartitionGlobalMinMaxIdxCalculator.h | 24 - src/Storages/StorageMergeTree.cpp | 93 +--- src/Storages/StorageReplicatedMergeTree.cpp | 135 +---- .../__init__.py | 0 .../configs/remote_servers.xml | 17 - .../test.py | 214 -------- ...artition_different_partition_exp.reference | 467 ----------------- ...tach_partition_different_partition_exp.sql | 485 ------------------ 26 files changed, 342 insertions(+), 2311 deletions(-) delete mode 100644 src/Interpreters/applyFunction.cpp delete mode 100644 src/Interpreters/applyFunction.h delete mode 100644 src/Storages/MergeTree/MergeTreeDataPartCloner.cpp delete mode 100644 src/Storages/MergeTree/MergeTreeDataPartCloner.h delete mode 100644 src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp delete mode 100644 src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h delete mode 100644 src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp delete mode 100644 src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h delete mode 100644 tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py delete mode 100644 tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml delete mode 100644 tests/integration/test_attach_partition_distinct_expression_replicated/test.py delete mode 100644 tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference delete mode 100644 tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 5659a0565c5..114b8d5ffe3 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -112,7 +112,7 @@ Note that: For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. -- Both tables must have the same order by key and the same primary key. +- Both tables must have the same partition key, the same order by key and the same primary key. - Both tables must have the same indices and projections. - Both tables must have the same storage policy. diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index 4e71bd56851..cc386825024 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -1,17 +1,13 @@ #pragma once #include -#include #include -#include #include #include -#include #include -#include +#include #include #include -#include #include #include #include @@ -37,8 +33,6 @@ public: ASTIdentifier * identifier = nullptr; DataTypePtr arg_data_type = {}; - Range range = Range::createWholeUniverse(); - void reject() { monotonicity.is_monotonic = false; } bool isRejected() const { return !monotonicity.is_monotonic; } @@ -103,30 +97,13 @@ public: if (data.isRejected()) return; - /// Monotonicity check only works for functions that contain at most two arguments and one of them must be a constant. - if (!ast_function.arguments) + /// TODO: monotonicity for functions of several arguments + if (!ast_function.arguments || ast_function.arguments->children.size() != 1) { data.reject(); return; } - auto arguments_size = ast_function.arguments->children.size(); - - if (arguments_size == 0 || arguments_size > 2) - { - data.reject(); - return; - } - else if (arguments_size == 2) - { - /// If the function has two arguments, then one of them must be a constant. - if (!ast_function.arguments->children[0]->as() && !ast_function.arguments->children[1]->as()) - { - data.reject(); - return; - } - } - if (!data.canOptimize(ast_function)) { data.reject(); @@ -147,33 +124,14 @@ public: return; } - auto function_arguments = getFunctionArguments(ast_function, data); - - auto function_base = function->build(function_arguments); + ColumnsWithTypeAndName args; + args.emplace_back(data.arg_data_type, "tmp"); + auto function_base = function->build(args); if (function_base && function_base->hasInformationAboutMonotonicity()) { bool is_positive = data.monotonicity.is_positive; - data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, data.range.left, data.range.right); - - auto & key_range = data.range; - - /// If we apply function to open interval, we can get empty intervals in result. - /// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001'). - /// To avoid this we make range left and right included. - /// Any function that treats NULL specially is not monotonic. - /// Thus we can safely use isNull() as an -Inf/+Inf indicator here. - if (!key_range.left.isNull()) - { - key_range.left = applyFunction(function_base, data.arg_data_type, key_range.left); - key_range.left_included = true; - } - - if (!key_range.right.isNull()) - { - key_range.right = applyFunction(function_base, data.arg_data_type, key_range.right); - key_range.right_included = true; - } + data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field()); if (!is_positive) data.monotonicity.is_positive = !data.monotonicity.is_positive; @@ -185,53 +143,13 @@ public: static bool needChildVisit(const ASTPtr & parent, const ASTPtr &) { - /// Multi-argument functions with all but one constant arguments can be monotonic. + /// Currently we check monotonicity only for single-argument functions. + /// Although, multi-argument functions with all but one constant arguments can also be monotonic. if (const auto * func = typeid_cast(parent.get())) - return func->arguments->children.size() <= 2; + return func->arguments->children.size() < 2; return true; } - - static ColumnWithTypeAndName extractLiteralColumnAndTypeFromAstLiteral(const ASTLiteral * literal) - { - ColumnWithTypeAndName result; - - result.type = applyVisitor(FieldToDataType(), literal->value); - result.column = result.type->createColumnConst(0, literal->value); - - return result; - } - - static ColumnsWithTypeAndName getFunctionArguments(const ASTFunction & ast_function, const Data & data) - { - ColumnsWithTypeAndName args; - - auto arguments_size = ast_function.arguments->children.size(); - - chassert(arguments_size == 1 || arguments_size == 2); - - if (arguments_size == 2) - { - if (ast_function.arguments->children[0]->as()) - { - const auto * literal = ast_function.arguments->children[0]->as(); - args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal)); - args.emplace_back(data.arg_data_type, "tmp"); - } - else - { - const auto * literal = ast_function.arguments->children[1]->as(); - args.emplace_back(data.arg_data_type, "tmp"); - args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal)); - } - } - else - { - args.emplace_back(data.arg_data_type, "tmp"); - } - - return args; - } }; using MonotonicityCheckVisitor = ConstInDepthNodeVisitor; diff --git a/src/Interpreters/applyFunction.cpp b/src/Interpreters/applyFunction.cpp deleted file mode 100644 index a53f14f0381..00000000000 --- a/src/Interpreters/applyFunction.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include - -#include -#include - -namespace DB -{ - -static Field applyFunctionForField(const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value) -{ - ColumnsWithTypeAndName columns{ - {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, - }; - - auto col = func->execute(columns, func->getResultType(), 1); - return (*col)[0]; -} - -FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) -{ - /// Fallback for fields without block reference. - if (field.isExplicit()) - return applyFunctionForField(func, current_type, field); - - String result_name = "_" + func->getName() + "_" + toString(field.column_idx); - const auto & columns = field.columns; - size_t result_idx = columns->size(); - - for (size_t i = 0; i < result_idx; ++i) - if ((*columns)[i].name == result_name) - result_idx = i; - - if (result_idx == columns->size()) - { - ColumnsWithTypeAndName args{(*columns)[field.column_idx]}; - field.columns->emplace_back(ColumnWithTypeAndName{nullptr, func->getResultType(), result_name}); - (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size()); - } - - return {field.columns, field.row_idx, result_idx}; -} - -} diff --git a/src/Interpreters/applyFunction.h b/src/Interpreters/applyFunction.h deleted file mode 100644 index 9b8ae43a53c..00000000000 --- a/src/Interpreters/applyFunction.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace DB -{ -struct FieldRef; - -class IFunctionBase; -class IDataType; - -using DataTypePtr = std::shared_ptr; -using FunctionBasePtr = std::shared_ptr; - -FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field); -} diff --git a/src/Parsers/queryToString.cpp b/src/Parsers/queryToString.cpp index 4a1903393f6..9721aa1f128 100644 --- a/src/Parsers/queryToString.cpp +++ b/src/Parsers/queryToString.cpp @@ -3,11 +3,6 @@ namespace DB { - String queryToStringNullable(const ASTPtr & query) - { - return query ? queryToString(query) : ""; - } - String queryToString(const ASTPtr & query) { return queryToString(*query); diff --git a/src/Parsers/queryToString.h b/src/Parsers/queryToString.h index 3acd560b1e2..873de218293 100644 --- a/src/Parsers/queryToString.h +++ b/src/Parsers/queryToString.h @@ -6,5 +6,4 @@ namespace DB { String queryToString(const ASTPtr & query); String queryToString(const IAST & query); - String queryToStringNullable(const ASTPtr & query); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index f3057a8254f..87f23b0da2a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -81,7 +81,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); size_t minmax_idx_size = minmax_column_types.size(); - hyperrectangle.clear(); hyperrectangle.reserve(minmax_idx_size); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -105,39 +104,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par initialized = true; } -Block IMergeTreeDataPart::MinMaxIndex::getBlock(const MergeTreeData & data) const -{ - if (!initialized) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to get block from uninitialized MinMax index."); - - Block block; - - const auto metadata_snapshot = data.getInMemoryMetadataPtr(); - const auto & partition_key = metadata_snapshot->getPartitionKey(); - - const auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - const auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); - const auto minmax_idx_size = minmax_column_types.size(); - - for (size_t i = 0; i < minmax_idx_size; ++i) - { - const auto & data_type = minmax_column_types[i]; - const auto & column_name = minmax_column_names[i]; - - const auto column = data_type->createColumn(); - - const auto min_val = hyperrectangle.at(i).left; - const auto max_val = hyperrectangle.at(i).right; - - column->insert(min_val); - column->insert(max_val); - - block.insert(ColumnWithTypeAndName(column->getPtr(), data_type, column_name)); - } - - return block; -} - IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store( const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const { @@ -219,7 +185,8 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other) if (!initialized) { - *this = other; + hyperrectangle = other.hyperrectangle; + initialized = true; } else { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 29f0f54d419..640a1f1d0a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -336,7 +336,6 @@ public: } void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager); - Block getBlock(const MergeTreeData & data) const; using WrittenFiles = std::vector>; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index e5bcb11091f..d5922ae1bc2 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1,37 +1,36 @@ -#include -#include +#include +#include +#include #include #include #include #include -#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include +#include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -837,6 +836,21 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants return node.tryGetConstant(out_value, out_type); } + +static Field applyFunctionForField( + const FunctionBasePtr & func, + const DataTypePtr & arg_type, + const Field & arg_value) +{ + ColumnsWithTypeAndName columns + { + { arg_type->createColumnConst(1, arg_value), arg_type, "x" }, + }; + + auto col = func->execute(columns, func->getResultType(), 1); + return (*col)[0]; +} + /// The case when arguments may have types different than in the primary key. static std::pair applyFunctionForFieldOfUnknownType( const FunctionBasePtr & func, @@ -876,6 +890,33 @@ static std::pair applyBinaryFunctionForFieldOfUnknownType( return {std::move(result), std::move(return_type)}; } + +static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) +{ + /// Fallback for fields without block reference. + if (field.isExplicit()) + return applyFunctionForField(func, current_type, field); + + String result_name = "_" + func->getName() + "_" + toString(field.column_idx); + const auto & columns = field.columns; + size_t result_idx = columns->size(); + + for (size_t i = 0; i < result_idx; ++i) + { + if ((*columns)[i].name == result_name) + result_idx = i; + } + + if (result_idx == columns->size()) + { + ColumnsWithTypeAndName args{(*columns)[field.column_idx]}; + field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name}); + (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size()); + } + + return {field.columns, field.row_idx, result_idx}; +} + /** When table's key has expression with these functions from a column, * and when a column in a query is compared with a constant, such as: * CREATE TABLE (x String) ORDER BY toDate(x) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c3e348a549a..61332a4ff38 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,6 +8,21 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -28,20 +43,19 @@ #include #include #include +#include +#include #include #include #include #include #include #include -#include -#include #include -#include #include -#include -#include #include +#include +#include #include #include #include @@ -50,41 +64,26 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include -#include #include #include #include #include #include -#include #include #include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include @@ -198,50 +197,6 @@ namespace ErrorCodes extern const int LIMIT_EXCEEDED; } -static size_t getPartitionAstFieldsCount(const ASTPartition & partition_ast, ASTPtr partition_value_ast) -{ - if (partition_ast.fields_count.has_value()) - return *partition_ast.fields_count; - - if (partition_value_ast->as()) - return 1; - - const auto * tuple_ast = partition_value_ast->as(); - - if (!tuple_ast) - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID()); - } - - if (tuple_ast->name != "tuple") - { - if (!isFunctionCast(tuple_ast)) - throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - - if (tuple_ast->arguments->as()->children.empty()) - throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - - auto first_arg = tuple_ast->arguments->as()->children.at(0); - if (const auto * inner_tuple = first_arg->as(); inner_tuple && inner_tuple->name == "tuple") - { - const auto * arguments_ast = tuple_ast->arguments->as(); - return arguments_ast ? arguments_ast->children.size() : 0; - } - else if (const auto * inner_literal_tuple = first_arg->as(); inner_literal_tuple) - { - return inner_literal_tuple->value.getType() == Field::Types::Tuple ? inner_literal_tuple->value.safeGet().size() : 1; - } - - throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - else - { - const auto * arguments_ast = tuple_ast->arguments->as(); - return arguments_ast ? arguments_ast->children.size() : 0; - } -} - static void checkSuspiciousIndices(const ASTFunction * index_function) { std::unordered_set unique_index_expression_hashes; @@ -4899,7 +4854,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D } void MergeTreeData::checkAlterPartitionIsPossible( - const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr) const + const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const { for (const auto & command : commands) { @@ -4927,15 +4882,7 @@ void MergeTreeData::checkAlterPartitionIsPossible( throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently"); } else - { - // The below `getPartitionIDFromQuery` call will not work for attach / replace because it assumes the partition expressions - // are the same and deliberately uses this storage. Later on, `MergeTreeData::replaceFrom` is called, and it makes the right - // call to `getPartitionIDFromQuery` using source storage. - // Note: `PartitionCommand::REPLACE_PARTITION` is used both for `REPLACE PARTITION` and `ATTACH PARTITION FROM` queries. - // But not for `ATTACH PARTITION` queries. - if (command.type != PartitionCommand::REPLACE_PARTITION) - getPartitionIDFromQuery(command.partition, getContext()); - } + getPartitionIDFromQuery(command.partition, local_context); } } } @@ -5669,8 +5616,69 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version); return partition_ast.id->as()->value.safeGet(); } + size_t partition_ast_fields_count = 0; ASTPtr partition_value_ast = partition_ast.value->clone(); - auto partition_ast_fields_count = getPartitionAstFieldsCount(partition_ast, partition_value_ast); + if (!partition_ast.fields_count.has_value()) + { + if (partition_value_ast->as()) + { + partition_ast_fields_count = 1; + } + else if (const auto * tuple_ast = partition_value_ast->as()) + { + if (tuple_ast->name != "tuple") + { + if (isFunctionCast(tuple_ast)) + { + if (tuple_ast->arguments->as()->children.empty()) + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + } + auto first_arg = tuple_ast->arguments->as()->children.at(0); + if (const auto * inner_tuple = first_arg->as(); inner_tuple && inner_tuple->name == "tuple") + { + const auto * arguments_ast = tuple_ast->arguments->as(); + if (arguments_ast) + partition_ast_fields_count = arguments_ast->children.size(); + else + partition_ast_fields_count = 0; + } + else if (const auto * inner_literal_tuple = first_arg->as(); inner_literal_tuple) + { + if (inner_literal_tuple->value.getType() == Field::Types::Tuple) + partition_ast_fields_count = inner_literal_tuple->value.safeGet().size(); + else + partition_ast_fields_count = 1; + } + else + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + } + } + else + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + } + else + { + const auto * arguments_ast = tuple_ast->arguments->as(); + if (arguments_ast) + partition_ast_fields_count = arguments_ast->children.size(); + else + partition_ast_fields_count = 0; + } + } + else + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID()); + } + } + else + { + partition_ast_fields_count = *partition_ast.fields_count; + } if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -7006,35 +7014,23 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); - if (queryToStringNullable(my_snapshot->getSortingKeyAST()) != queryToStringNullable(src_snapshot->getSortingKeyAST())) + auto query_to_string = [] (const ASTPtr & ast) + { + return ast ? queryToString(ast) : ""; + }; + + if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering"); + if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key"); + if (format_version != src_data->format_version) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version"); - if (queryToStringNullable(my_snapshot->getPrimaryKeyAST()) != queryToStringNullable(src_snapshot->getPrimaryKeyAST())) + if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); - const auto is_a_subset_of = [](const auto & lhs, const auto & rhs) - { - if (lhs.size() > rhs.size()) - return false; - - const auto rhs_set = NameSet(rhs.begin(), rhs.end()); - for (const auto & lhs_element : lhs) - if (!rhs_set.contains(lhs_element)) - return false; - - return true; - }; - - if (!is_a_subset_of(my_snapshot->getColumnsRequiredForPartitionKey(), src_snapshot->getColumnsRequiredForPartitionKey())) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Destination table partition expression columns must be a subset of source table partition expression columns"); - } - const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions) { if (my_descriptions.size() != src_descriptions.size()) @@ -7075,56 +7071,128 @@ std::pair MergeTreeData::cloneAn const ReadSettings & read_settings, const WriteSettings & write_settings) { - return MergeTreeDataPartCloner::clone( - this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings); -} + /// Check that the storage policy contains the disk where the src_part is located. + bool does_storage_policy_allow_same_disk = false; + for (const DiskPtr & disk : getStoragePolicy()->getDisks()) + { + if (disk->getName() == src_part->getDataPartStorage().getDiskName()) + { + does_storage_policy_allow_same_disk = true; + break; + } + } + if (!does_storage_policy_allow_same_disk) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Could not clone and load part {} because disk does not belong to storage policy", + quoteString(src_part->getDataPartStorage().getFullPath())); -std::pair MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( - const MergeTreeData::DataPartPtr & src_part, - const MergeTreePartition & new_partition, - const String & partition_id, - const IMergeTreeDataPart::MinMaxIndex & min_max_index, - const String & tmp_part_prefix, - const StorageMetadataPtr & my_metadata_snapshot, - const IDataPartStorage::ClonePartParams & clone_params, - ContextPtr local_context, - Int64 min_block, - Int64 max_block -) -{ - MergeTreePartInfo dst_part_info(partition_id, min_block, max_block, src_part->info.level); + String dst_part_name = src_part->getNewName(dst_part_info); + String tmp_dst_part_name = tmp_part_prefix + dst_part_name; + auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); - return MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( - this, - src_part, - my_metadata_snapshot, - dst_part_info, - tmp_part_prefix, - local_context->getReadSettings(), - local_context->getWriteSettings(), - new_partition, - min_max_index, - false, - clone_params); -} + /// Why it is needed if we only hardlink files? + auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); + auto src_part_storage = src_part->getDataPartStoragePtr(); -std::pair MergeTreeData::createPartitionAndMinMaxIndexFromSourcePart( - const MergeTreeData::DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr local_context) -{ - const auto & src_data = src_part->storage; + scope_guard src_flushed_tmp_dir_lock; + MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; - auto metadata_manager = std::make_shared(src_part.get()); - IMergeTreeDataPart::MinMaxIndex min_max_index; + /// If source part is in memory, flush it to disk and clone it already in on-disk format + /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock + /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor + if (auto src_part_in_memory = asInMemoryPart(src_part)) + { + auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); - min_max_index.load(src_data, metadata_manager); + auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); + src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); - MergeTreePartition new_partition; + auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); - new_partition.create(metadata_snapshot, min_max_index.getBlock(src_data), 0u, local_context); + src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage) + .withPartInfo(src_part->info) + .withPartFormatFromDisk() + .build(); - return {new_partition, min_max_index}; + src_flushed_tmp_part->is_temp = true; + src_part_storage = flushed_part_storage; + } + + String with_copy; + if (params.copy_instead_of_hardlink) + with_copy = " (copying data)"; + + auto dst_part_storage = src_part_storage->freeze( + relative_data_path, + tmp_dst_part_name, + read_settings, + write_settings, + /* save_metadata_callback= */ {}, + params); + + if (params.metadata_version_to_write.has_value()) + { + chassert(!params.keep_metadata_version); + auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings()); + writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); + out_metadata->finalize(); + if (getSettings()->fsync_after_insert) + out_metadata->sync(); + } + + LOG_DEBUG(log, "Clone{} part {} to {}{}", + src_flushed_tmp_part ? " flushed" : "", + src_part_storage->getFullPath(), + std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), + with_copy); + + auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage) + .withPartFormatFromDisk() + .build(); + + if (!params.copy_instead_of_hardlink && params.hardlinked_files) + { + params.hardlinked_files->source_part_name = src_part->name; + params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); + + for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) + { + if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + params.hardlinked_files->hardlinks_from_source_part.insert(it->name()); + } + } + + auto projections = src_part->getProjectionParts(); + for (const auto & [name, projection_part] : projections) + { + const auto & projection_storage = projection_part->getDataPartStorage(); + for (auto it = projection_storage.iterate(); it->isValid(); it->next()) + { + auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); + if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); + } + } + } + } + + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; + dst_data_part->version.setCreationTID(tid, nullptr); + dst_data_part->storeVersionMetadata(); + + dst_data_part->is_temp = true; + + dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true); + dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime(); + return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); } String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 9c433e11b84..f0dbaf0e307 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -231,7 +231,6 @@ public: } }; - using DataParts = std::set; using MutableDataParts = std::set; using DataPartsVector = std::vector; @@ -849,23 +848,6 @@ public: const ReadSettings & read_settings, const WriteSettings & write_settings); - std::pair cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( - const MergeTreeData::DataPartPtr & src_part, - const MergeTreePartition & new_partition, - const String & partition_id, - const IMergeTreeDataPart::MinMaxIndex & min_max_index, - const String & tmp_part_prefix, - const StorageMetadataPtr & my_metadata_snapshot, - const IDataPartStorage::ClonePartParams & clone_params, - ContextPtr local_context, - Int64 min_block, - Int64 max_block); - - static std::pair createPartitionAndMinMaxIndexFromSourcePart( - const MergeTreeData::DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr local_context); - virtual std::vector getMutationsStatus() const = 0; /// Returns true if table can create new parts with adaptive granularity diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp deleted file mode 100644 index 78cb9aa0624..00000000000 --- a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp +++ /dev/null @@ -1,320 +0,0 @@ -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - -static Poco::Logger * log = &Poco::Logger::get("MergeTreeDataPartCloner"); - -namespace DistinctPartitionExpression -{ -std::unique_ptr updatePartitionFile( - const MergeTreeData & merge_tree_data, - const MergeTreePartition & partition, - const MergeTreeData::MutableDataPartPtr & dst_part, - IDataPartStorage & storage) -{ - storage.removeFile("partition.dat"); - // Leverage already implemented MergeTreePartition::store to create & store partition.dat. - // Checksum is re-calculated later. - return partition.store(merge_tree_data, storage, dst_part->checksums); -} - -IMergeTreeDataPart::MinMaxIndex::WrittenFiles updateMinMaxFiles( - const MergeTreeData & merge_tree_data, - const MergeTreeData::MutableDataPartPtr & dst_part, - IDataPartStorage & storage, - const StorageMetadataPtr & metadata_snapshot) -{ - for (const auto & column_name : MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->partition_key)) - { - auto file = "minmax_" + escapeForFileName(column_name) + ".idx"; - storage.removeFile(file); - } - - return dst_part->minmax_idx->store(merge_tree_data, storage, dst_part->checksums); -} - -void finalizeNewFiles(const std::vector> & files, bool sync_new_files) -{ - for (const auto & file : files) - { - file->finalize(); - if (sync_new_files) - file->sync(); - } -} - -void updateNewPartFiles( - const MergeTreeData & merge_tree_data, - const MergeTreeData::MutableDataPartPtr & dst_part, - const MergeTreePartition & new_partition, - const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, - const StorageMetadataPtr & src_metadata_snapshot, - bool sync_new_files) -{ - auto & storage = dst_part->getDataPartStorage(); - - *dst_part->minmax_idx = new_min_max_index; - - auto partition_file = updatePartitionFile(merge_tree_data, new_partition, dst_part, storage); - - auto min_max_files = updateMinMaxFiles(merge_tree_data, dst_part, storage, src_metadata_snapshot); - - IMergeTreeDataPart::MinMaxIndex::WrittenFiles written_files; - - if (partition_file) - written_files.emplace_back(std::move(partition_file)); - - written_files.insert(written_files.end(), std::make_move_iterator(min_max_files.begin()), std::make_move_iterator(min_max_files.end())); - - finalizeNewFiles(written_files, sync_new_files); - - // MergeTreeDataPartCloner::finalize_part calls IMergeTreeDataPart::loadColumnsChecksumsIndexes, which will re-create - // the checksum file if it doesn't exist. Relying on that is cumbersome, but this refactoring is simply a code extraction - // with small improvements. It can be further improved in the future. - storage.removeFile("checksums.txt"); -} -} - -namespace -{ -bool doesStoragePolicyAllowSameDisk(MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part) -{ - for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks()) - if (disk->getName() == src_part->getDataPartStorage().getDiskName()) - return true; - return false; -} - -DataPartStoragePtr flushPartStorageToDiskIfInMemory( - MergeTreeData * merge_tree_data, - const MergeTreeData::DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const String & tmp_part_prefix, - const String & tmp_dst_part_name, - scope_guard & src_flushed_tmp_dir_lock, - MergeTreeData::MutableDataPartPtr src_flushed_tmp_part) -{ - if (auto src_part_in_memory = asInMemoryPart(src_part)) - { - auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); - auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); - - src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); - - auto flushed_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); - - src_flushed_tmp_part = MergeTreeDataPartBuilder(*merge_tree_data, src_part->name, flushed_part_storage) - .withPartInfo(src_part->info) - .withPartFormatFromDisk() - .build(); - - src_flushed_tmp_part->is_temp = true; - - return flushed_part_storage; - } - - return src_part->getDataPartStoragePtr(); -} - -std::shared_ptr hardlinkAllFiles( - MergeTreeData * merge_tree_data, - const DB::ReadSettings & read_settings, - const DB::WriteSettings & write_settings, - const DataPartStoragePtr & storage, - const String & path, - const DB::IDataPartStorage::ClonePartParams & params) -{ - return storage->freeze( - merge_tree_data->getRelativeDataPath(), - path, - read_settings, - write_settings, - /*save_metadata_callback=*/{}, - params); -} - -std::pair cloneSourcePart( - MergeTreeData * merge_tree_data, - const MergeTreeData::DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const MergeTreePartInfo & dst_part_info, - const String & tmp_part_prefix, - const ReadSettings & read_settings, - const WriteSettings & write_settings, - const DB::IDataPartStorage::ClonePartParams & params) -{ - const auto dst_part_name = src_part->getNewName(dst_part_info); - - const auto tmp_dst_part_name = tmp_part_prefix + dst_part_name; - - auto temporary_directory_lock = merge_tree_data->getTemporaryPartDirectoryHolder(tmp_dst_part_name); - - src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); - - scope_guard src_flushed_tmp_dir_lock; - MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; - - auto src_part_storage = flushPartStorageToDiskIfInMemory( - merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part); - - auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params); - - if (params.metadata_version_to_write.has_value()) - { - chassert(!params.keep_metadata_version); - auto out_metadata = dst_part_storage->writeFile( - IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, merge_tree_data->getContext()->getWriteSettings()); - writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); - out_metadata->finalize(); - if (merge_tree_data->getSettings()->fsync_after_insert) - out_metadata->sync(); - } - - LOG_DEBUG( - log, - "Clone {} part {} to {}{}", - src_flushed_tmp_part ? "flushed" : "", - src_part_storage->getFullPath(), - std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), - false); - - - auto part = MergeTreeDataPartBuilder(*merge_tree_data, dst_part_name, dst_part_storage).withPartFormatFromDisk().build(); - - return std::make_pair(part, std::move(temporary_directory_lock)); -} - -void handleHardLinkedParameterFiles(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params) -{ - const auto & hardlinked_files = params.hardlinked_files; - - hardlinked_files->source_part_name = src_part->name; - hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); - - for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) - { - if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED - && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) - { - hardlinked_files->hardlinks_from_source_part.insert(it->name()); - } - } -} - -void handleProjections(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params) -{ - auto projections = src_part->getProjectionParts(); - for (const auto & [name, projection_part] : projections) - { - const auto & projection_storage = projection_part->getDataPartStorage(); - for (auto it = projection_storage.iterate(); it->isValid(); it->next()) - { - auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); - if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED - && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) - { - params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); - } - } - } -} - -MergeTreeData::MutableDataPartPtr finalizePart( - const MergeTreeData::MutableDataPartPtr & dst_part, const DB::IDataPartStorage::ClonePartParams & params, bool require_part_metadata) -{ - /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; - dst_part->version.setCreationTID(tid, nullptr); - dst_part->storeVersionMetadata(); - - dst_part->is_temp = true; - - dst_part->loadColumnsChecksumsIndexes(require_part_metadata, true); - - dst_part->modification_time = dst_part->getDataPartStorage().getLastModified().epochTime(); - - return dst_part; -} - -std::pair cloneAndHandleHardlinksAndProjections( - MergeTreeData * merge_tree_data, - const DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const MergeTreePartInfo & dst_part_info, - const String & tmp_part_prefix, - const ReadSettings & read_settings, - const WriteSettings & write_settings, - const IDataPartStorage::ClonePartParams & params) -{ - if (!doesStoragePolicyAllowSameDisk(merge_tree_data, src_part)) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Could not clone and load part {} because disk does not belong to storage policy", - quoteString(src_part->getDataPartStorage().getFullPath())); - - auto [destination_part, temporary_directory_lock] = cloneSourcePart( - merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); - - if (!params.copy_instead_of_hardlink && params.hardlinked_files) - { - handleHardLinkedParameterFiles(src_part, params); - handleProjections(src_part, params); - } - - return std::make_pair(destination_part, std::move(temporary_directory_lock)); -} -} - -std::pair MergeTreeDataPartCloner::clone( - MergeTreeData * merge_tree_data, - const DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const MergeTreePartInfo & dst_part_info, - const String & tmp_part_prefix, - bool require_part_metadata, - const IDataPartStorage::ClonePartParams & params, - const ReadSettings & read_settings, - const WriteSettings & write_settings) -{ - auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( - merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); - - return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock)); -} - -std::pair MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( - MergeTreeData * merge_tree_data, - const DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const MergeTreePartInfo & dst_part_info, - const String & tmp_part_prefix, - const ReadSettings & read_settings, - const WriteSettings & write_settings, - const MergeTreePartition & new_partition, - const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, - bool sync_new_files, - const IDataPartStorage::ClonePartParams & params) -{ - auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( - merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); - - DistinctPartitionExpression::updateNewPartFiles( - *merge_tree_data, destination_part, new_partition, new_min_max_index, src_part->storage.getInMemoryMetadataPtr(), sync_new_files); - - return std::make_pair(finalizePart(destination_part, params, false), std::move(temporary_directory_lock)); -} - -} diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.h b/src/Storages/MergeTree/MergeTreeDataPartCloner.h deleted file mode 100644 index 53585f20b7f..00000000000 --- a/src/Storages/MergeTree/MergeTreeDataPartCloner.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -namespace DB -{ - -struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; -struct MergeTreePartition; -class IMergeTreeDataPart; - -class MergeTreeDataPartCloner -{ -public: - using DataPart = IMergeTreeDataPart; - using MutableDataPartPtr = std::shared_ptr; - using DataPartPtr = std::shared_ptr; - - static std::pair clone( - MergeTreeData * merge_tree_data, - const DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const MergeTreePartInfo & dst_part_info, - const String & tmp_part_prefix, - bool require_part_metadata, - const IDataPartStorage::ClonePartParams & params, - const ReadSettings & read_settings, - const WriteSettings & write_settings); - - static std::pair cloneWithDistinctPartitionExpression( - MergeTreeData * merge_tree_data, - const DataPartPtr & src_part, - const StorageMetadataPtr & metadata_snapshot, - const MergeTreePartInfo & dst_part_info, - const String & tmp_part_prefix, - const ReadSettings & read_settings, - const WriteSettings & write_settings, - const MergeTreePartition & new_partition, - const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, - bool sync_new_files, - const IDataPartStorage::ClonePartParams & params); -}; - -} diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 76ef3be25b3..ddeaf69136a 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -467,45 +467,6 @@ void MergeTreePartition::create(const StorageMetadataPtr & metadata_snapshot, Bl } } -void MergeTreePartition::createAndValidateMinMaxPartitionIds( - const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context) -{ - if (!metadata_snapshot->hasPartitionKey()) - return; - - auto partition_key_names_and_types = executePartitionByExpression(metadata_snapshot, block_with_min_max_partition_ids, context); - value.resize(partition_key_names_and_types.size()); - - /// Executing partition_by expression adds new columns to passed block according to partition functions. - /// The block is passed by reference and is used afterwards. `moduloLegacy` needs to be substituted back - /// with just `modulo`, because it was a temporary substitution. - static constexpr std::string_view modulo_legacy_function_name = "moduloLegacy"; - - size_t i = 0; - for (const auto & element : partition_key_names_and_types) - { - auto & partition_column = block_with_min_max_partition_ids.getByName(element.name); - - if (element.name.starts_with(modulo_legacy_function_name)) - partition_column.name.replace(0, modulo_legacy_function_name.size(), "modulo"); - - Field extracted_min_partition_id_field; - Field extracted_max_partition_id_field; - - partition_column.column->get(0, extracted_min_partition_id_field); - partition_column.column->get(1, extracted_max_partition_id_field); - - if (extracted_min_partition_id_field != extracted_max_partition_id_field) - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, - "Can not create the partition. A partition can not contain values that have different partition ids"); - } - - partition_column.column->get(0u, value[i++]); - } -} - NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context) { auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context); diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index fd7ae02cde4..78b141f26ec 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -1,12 +1,11 @@ #pragma once -#include +#include #include #include #include #include -#include -#include +#include namespace DB { @@ -52,11 +51,6 @@ public: void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context); - /// Copy of MergeTreePartition::create, but also validates if min max partition keys are equal. If they are different, - /// it means the partition can't be created because the data doesn't belong to the same partition. - void createAndValidateMinMaxPartitionIds( - const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context); - static void appendFiles(const MergeTreeData & storage, Strings & files); /// Adjust partition key and execute its expression on block. Return sample block according to used expression. diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp deleted file mode 100644 index 21bcdb84a96..00000000000 --- a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - -namespace -{ -bool isDestinationPartitionExpressionMonotonicallyIncreasing( - const std::vector & hyperrectangle, const MergeTreeData & destination_storage) -{ - auto destination_table_metadata = destination_storage.getInMemoryMetadataPtr(); - - auto key_description = destination_table_metadata->getPartitionKey(); - auto definition_ast = key_description.definition_ast->clone(); - - auto table_identifier = std::make_shared(destination_storage.getStorageID().getTableName()); - auto table_with_columns - = TableWithColumnNamesAndTypes{DatabaseAndTableWithAlias(table_identifier), destination_table_metadata->getColumns().getOrdinary()}; - - auto expression_list = extractKeyExpressionList(definition_ast); - - MonotonicityCheckVisitor::Data data{{table_with_columns}, destination_storage.getContext(), /*group_by_function_hashes*/ {}}; - - for (auto i = 0u; i < expression_list->children.size(); i++) - { - data.range = hyperrectangle[i]; - - MonotonicityCheckVisitor(data).visit(expression_list->children[i]); - - if (!data.monotonicity.is_monotonic || !data.monotonicity.is_positive) - return false; - } - - return true; -} - -bool isExpressionDirectSubsetOf(const ASTPtr source, const ASTPtr destination) -{ - auto source_expression_list = extractKeyExpressionList(source); - auto destination_expression_list = extractKeyExpressionList(destination); - - std::unordered_set source_columns; - - for (auto i = 0u; i < source_expression_list->children.size(); ++i) - source_columns.insert(source_expression_list->children[i]->getColumnName()); - - for (auto i = 0u; i < destination_expression_list->children.size(); ++i) - if (!source_columns.contains(destination_expression_list->children[i]->getColumnName())) - return false; - - return true; -} -} - -void MergeTreePartitionCompatibilityVerifier::verify( - const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts) -{ - const auto source_metadata = source_storage.getInMemoryMetadataPtr(); - const auto destination_metadata = destination_storage.getInMemoryMetadataPtr(); - - const auto source_partition_key_ast = source_metadata->getPartitionKeyAST(); - const auto destination_partition_key_ast = destination_metadata->getPartitionKeyAST(); - - // If destination partition expression columns are a subset of source partition expression columns, - // there is no need to check for monotonicity. - if (isExpressionDirectSubsetOf(source_partition_key_ast, destination_partition_key_ast)) - return; - - const auto src_global_min_max_indexes = MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(source_parts, destination_storage); - - assert(!src_global_min_max_indexes.hyperrectangle.empty()); - - if (!isDestinationPartitionExpressionMonotonicallyIncreasing(src_global_min_max_indexes.hyperrectangle, destination_storage)) - throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Destination table partition expression is not monotonically increasing"); - - MergeTreePartition().createAndValidateMinMaxPartitionIds( - destination_storage.getInMemoryMetadataPtr(), - src_global_min_max_indexes.getBlock(destination_storage), - destination_storage.getContext()); -} - -} diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h deleted file mode 100644 index 1682add3ebd..00000000000 --- a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/* - * Verifies that source and destination partitions are compatible. - * To be compatible, one of the following criteria must be met: - * 1. Destination partition expression columns are a subset of source partition columns; or - * 2. Destination partition expression is monotonic on the source global min_max idx Range AND the computer partition id for - * the source global min_max idx range is the same. - * - * If not, an exception is thrown. - * */ - -class MergeTreePartitionCompatibilityVerifier -{ -public: - using DataPart = IMergeTreeDataPart; - using DataPartPtr = std::shared_ptr; - using DataPartsVector = std::vector; - - static void - verify(const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts); -}; - -} diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp deleted file mode 100644 index 0871efadf0c..00000000000 --- a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include - -namespace DB -{ - -IMergeTreeDataPart::MinMaxIndex -MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(const DataPartsVector & parts, const MergeTreeData & storage) -{ - IMergeTreeDataPart::MinMaxIndex global_min_max_indexes; - - for (const auto & part : parts) - { - auto metadata_manager = std::make_shared(part.get()); - - auto local_min_max_index = MergeTreeData::DataPart::MinMaxIndex(); - - local_min_max_index.load(storage, metadata_manager); - - global_min_max_indexes.merge(local_min_max_index); - } - - return global_min_max_indexes; -} - -} diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h deleted file mode 100644 index 4f271177246..00000000000 --- a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -/* - * Calculates global min max indexes for a given set of parts on given storage. - * */ -class MergeTreePartitionGlobalMinMaxIdxCalculator -{ - using DataPart = IMergeTreeDataPart; - using DataPartPtr = std::shared_ptr; - using DataPartsVector = std::vector; - -public: - static IMergeTreeDataPart::MinMaxIndex calculate(const DataPartsVector & parts, const MergeTreeData & storage); -}; - -} diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index fd5354a00a9..4761ccd8b58 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -5,9 +5,9 @@ #include #include +#include #include #include -#include #include "Common/Exception.h" #include #include @@ -20,30 +20,25 @@ #include #include #include -#include #include +#include #include #include #include #include #include -#include #include -#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -2044,74 +2039,42 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); - String partition_id = src_data.getPartitionIDFromQuery(partition, local_context); + String partition_id = getPartitionIDFromQuery(partition, local_context); DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); - - bool attach_empty_partition = !replace && src_parts.empty(); - if (attach_empty_partition) - return; - MutableDataPartsVector dst_parts; std::vector dst_parts_locks; static const String TMP_PREFIX = "tmp_replace_from_"; - const auto my_partition_expression = my_metadata_snapshot->getPartitionKeyAST(); - const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST(); - const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); - - if (is_partition_exp_different && !src_parts.empty()) - MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_parts); - - for (DataPartPtr & src_part : src_parts) + for (const DataPartPtr & src_part : src_parts) { if (!canReplacePartition(src_part)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", partition_id, src_part->name); - IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; /// This will generate unique name in scope of current server process. - auto index = insert_increment.get(); + Int64 temp_index = insert_increment.get(); + MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - if (is_partition_exp_different) - { - auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( - src_part, my_metadata_snapshot, local_context); - - auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( - src_part, - new_partition, - new_partition.getID(*this), - new_min_max_index, - TMP_PREFIX, - my_metadata_snapshot, - clone_params, - local_context, - index, - index); - - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); - } - else - { - MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - my_metadata_snapshot, - clone_params, - local_context->getReadSettings(), - local_context->getWriteSettings()); - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); - } + IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings()); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); } + /// ATTACH empty part set + if (!replace && dst_parts.empty()) + return; + MergeTreePartInfo drop_range; if (replace) { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 512811e39d7..f7e6783dbc2 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -26,21 +26,22 @@ #include +#include #include #include #include #include #include +#include #include #include +#include #include #include #include #include #include -#include #include -#include #include #include #include @@ -52,11 +53,9 @@ #include #include #include -#include #include #include #include -#include #include #include @@ -2714,48 +2713,16 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - - const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); - const auto src_partition_expression = source_table->getInMemoryMetadataPtr()->getPartitionKeyAST(); - - const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); - - if (is_partition_exp_different) - { - auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( - part_desc->src_table_part, metadata_snapshot, getContext()); - - auto partition_id = new_partition.getID(*this); - - auto [res_part, temporary_part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( - part_desc->src_table_part, - new_partition, - partition_id, - new_min_max_index, - TMP_PREFIX + "clone_", - metadata_snapshot, - clone_params, - getContext(), - part_desc->new_part_info.min_block, - part_desc->new_part_info.max_block); - - part_desc->res_part = std::move(res_part); - part_desc->temporary_part_lock = std::move(temporary_part_lock); - } - else - { - auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( - part_desc->src_table_part, - TMP_PREFIX + "clone_", - part_desc->new_part_info, - metadata_snapshot, - clone_params, - getContext()->getReadSettings(), - getContext()->getWriteSettings()); - - part_desc->res_part = std::move(res_part); - part_desc->temporary_part_lock = std::move(temporary_part_lock); - } + auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( + part_desc->src_table_part, + TMP_PREFIX + "clone_", + part_desc->new_part_info, + metadata_snapshot, + clone_params, + getContext()->getReadSettings(), + getContext()->getWriteSettings()); + part_desc->res_part = std::move(res_part); + part_desc->temporary_part_lock = std::move(temporary_part_lock); } else if (!part_desc->replica.empty()) { @@ -7885,22 +7852,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - String partition_id = src_data.getPartitionIDFromQuery(partition, query_context); + String partition_id = getPartitionIDFromQuery(partition, query_context); /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); - bool attach_empty_partition = !replace && src_all_parts.empty(); - if (attach_empty_partition) - return; - - const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); - const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST(); - const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); - - if (is_partition_exp_different && !src_all_parts.empty()) - MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_all_parts); - LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_replace_from_"; @@ -7955,18 +7911,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom( "Cannot replace partition '{}' because part '{}" "' has inconsistent granularity with table", partition_id, src_part->name); - IMergeTreeDataPart::MinMaxIndex min_max_index = *src_part->minmax_idx; - MergeTreePartition merge_tree_partition = src_part->partition; - - if (is_partition_exp_different) - { - auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(src_part, metadata_snapshot, query_context); - - merge_tree_partition = new_partition; - min_max_index = new_min_max_index; - partition_id = merge_tree_partition.getID(*this); - } - String hash_hex = src_part->checksums.getTotalChecksumHex(); const bool is_duplicated_part = replaced_parts.contains(hash_hex); replaced_parts.insert(hash_hex); @@ -7985,52 +7929,27 @@ void StorageReplicatedMergeTree::replacePartitionFrom( continue; } + UInt64 index = lock->getNumber(); + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; - - UInt64 index = lock->getNumber(); - IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - - if (is_partition_exp_different) - { - auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( - src_part, - merge_tree_partition, - partition_id, - min_max_index, - TMP_PREFIX, - metadata_snapshot, - clone_params, - query_context, - index, - index); - - dst_parts.emplace_back(dst_part); - dst_parts_locks.emplace_back(std::move(part_lock)); - } - else - { - MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - metadata_snapshot, - clone_params, - query_context->getReadSettings(), - query_context->getWriteSettings()); - - dst_parts.emplace_back(dst_part); - dst_parts_locks.emplace_back(std::move(part_lock)); - } - + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings()); src_parts.emplace_back(src_part); + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py b/tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml deleted file mode 100644 index b40730e9f7d..00000000000 --- a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - true - - replica1 - 9000 - - - replica2 - 9000 - - - - - diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py deleted file mode 100644 index 1d8ac4e9e37..00000000000 --- a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py +++ /dev/null @@ -1,214 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -cluster = ClickHouseCluster(__file__) - -replica1 = cluster.add_instance( - "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] -) -replica2 = cluster.add_instance( - "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] -) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - except Exception as ex: - print(ex) - finally: - cluster.shutdown() - - -def cleanup(nodes): - for node in nodes: - node.query("DROP TABLE IF EXISTS source SYNC") - node.query("DROP TABLE IF EXISTS destination SYNC") - - -def create_table(node, table_name, replicated): - replica = node.name - engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" - if replicated - else "MergeTree()" - ) - partition_expression = ( - "toYYYYMMDD(timestamp)" if table_name == "source" else "toYYYYMM(timestamp)" - ) - node.query_with_retry( - """ - CREATE TABLE {table_name}(timestamp DateTime) - ENGINE = {engine} - ORDER BY tuple() PARTITION BY {partition_expression} - SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; - """.format( - table_name=table_name, - engine=engine, - partition_expression=partition_expression, - ) - ) - - -def test_both_replicated(start_cluster): - for node in [replica1, replica2]: - create_table(node, "source", True) - create_table(node, "destination", True) - - replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") - replica1.query("SYSTEM SYNC REPLICA source") - replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" - ) - - assert_eq_with_retry( - replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n" - ) - assert_eq_with_retry( - replica1, - f"SELECT * FROM destination", - replica2.query(f"SELECT * FROM destination"), - ) - - cleanup([replica1, replica2]) - - -def test_only_destination_replicated(start_cluster): - create_table(replica1, "source", False) - create_table(replica1, "destination", True) - create_table(replica2, "destination", True) - - replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") - replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" - ) - - assert_eq_with_retry( - replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n" - ) - assert_eq_with_retry( - replica1, - f"SELECT * FROM destination", - replica2.query(f"SELECT * FROM destination"), - ) - - cleanup([replica1, replica2]) - - -def test_both_replicated_partitioned_to_unpartitioned(start_cluster): - def create_tables(nodes): - for node in nodes: - source_engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')" - ) - node.query( - """ - CREATE TABLE source(timestamp DateTime) - ENGINE = {engine} - ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp) - SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; - """.format( - engine=source_engine, - ) - ) - - destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')" - node.query( - """ - CREATE TABLE destination(timestamp DateTime) - ENGINE = {engine} - ORDER BY tuple() PARTITION BY tuple() - SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; - """.format( - engine=destination_engine, - ) - ) - - create_tables([replica1, replica2]) - - replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") - replica1.query("INSERT INTO source VALUES ('2010-03-03 02:01:01')") - replica1.query("SYSTEM SYNC REPLICA source") - replica1.query("SYSTEM SYNC REPLICA destination") - - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" - ) - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION ID '20100303' FROM source" - ) - - assert_eq_with_retry( - replica1, - f"SELECT * FROM destination ORDER BY timestamp", - "2010-03-02 02:01:01\n2010-03-03 02:01:01\n", - ) - assert_eq_with_retry( - replica1, - f"SELECT * FROM destination ORDER BY timestamp", - replica2.query(f"SELECT * FROM destination ORDER BY timestamp"), - ) - - cleanup([replica1, replica2]) - - -def test_both_replicated_different_exp_same_id(start_cluster): - def create_tables(nodes): - for node in nodes: - source_engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')" - ) - node.query( - """ - CREATE TABLE source(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8) - ENGINE = {engine} - ORDER BY tuple() PARTITION BY a % 3 - SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; - """.format( - engine=source_engine, - ) - ) - - destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')" - node.query( - """ - CREATE TABLE destination(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8) - ENGINE = {engine} - ORDER BY tuple() PARTITION BY a - SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; - """.format( - engine=destination_engine, - ) - ) - - create_tables([replica1, replica2]) - - replica1.query( - "INSERT INTO source (a, b, c, extra, sign) VALUES (1, 5, 9, 1000, 1)" - ) - replica1.query( - "INSERT INTO source (a, b, c, extra, sign) VALUES (2, 6, 10, 1000, 1)" - ) - replica1.query("SYSTEM SYNC REPLICA source") - replica1.query("SYSTEM SYNC REPLICA destination") - - replica1.query(f"ALTER TABLE destination ATTACH PARTITION 1 FROM source") - replica1.query(f"ALTER TABLE destination ATTACH PARTITION 2 FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT * FROM destination ORDER BY a", - "1\t5\t9\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n2\t6\t10\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n", - ) - assert_eq_with_retry( - replica1, - f"SELECT * FROM destination ORDER BY a", - replica2.query(f"SELECT * FROM destination ORDER BY a"), - ) - - cleanup([replica1, replica2]) diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference deleted file mode 100644 index f1d036b08bf..00000000000 --- a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference +++ /dev/null @@ -1,467 +0,0 @@ --- { echoOn } --- Should be allowed since destination partition expr is monotonically increasing and compatible -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -201003 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '20100302' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -201003 --- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though --- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -20100302 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '201003' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -20100302 --- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); -CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1); -ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 1 -2010-03-02 02:01:03 1 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 1 -2010-03-02 02:01:03 1 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -1 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION 0 FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 1 -2010-03-02 02:01:03 1 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 1 -2010-03-02 02:01:03 1 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -1 --- Should be allowed because dst partition exp is monot inc and data is not split -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category); -CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); -INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); -INSERT INTO TABLE source VALUES ('rice', 'food'); -ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source; -SELECT * FROM source ORDER BY productName; -mop general -rice food -spaghetti food -SELECT * FROM destination ORDER BY productName; -rice food -spaghetti food -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -59532f3c39a412a413f0f014c7750a9d -59532f3c39a412a413f0f014c7750a9d -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source; -SELECT * FROM source ORDER BY productName; -mop general -rice food -spaghetti food -SELECT * FROM destination ORDER BY productName; -rice food -spaghetti food -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -59532f3c39a412a413f0f014c7750a9d -59532f3c39a412a413f0f014c7750a9d --- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133 - -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000); -CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000))); -INSERT INTO TABLE source VALUES (1267495261123); -ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source; -SELECT * FROM source ORDER BY timestamp; -1267495261123 -SELECT * FROM destination ORDER BY timestamp; -1267495261123 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -2010 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '14670' from source; -SELECT * FROM source ORDER BY timestamp; -1267495261123 -SELECT * FROM destination ORDER BY timestamp; -1267495261123 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -2010 --- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726 - -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp); -CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400)); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1); -ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 1 1 -2010-03-02 02:01:01 1 1 -2011-02-02 02:01:03 1 1 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 1 1 -2010-03-02 02:01:01 1 1 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -14670 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '2010' from source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 1 1 -2010-03-02 02:01:01 1 1 -2011-02-02 02:01:03 1 1 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 1 1 -2010-03-02 02:01:01 1 1 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -14670 --- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately --- fall into the same partition. --- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple(); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -all -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '201003' from source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -all --- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that --- partition by tuple() is accepted. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple(); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -all -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '201003' from source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -all --- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns --- Columns in this case refer to the expression elements, not to the actual table columns -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); -CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b); -INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); -ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; -SELECT * FROM source ORDER BY (a, b, c); -1 2 3 -1 2 4 -SELECT * FROM destination ORDER BY (a, b, c); -1 2 3 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -1-2 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; -SELECT * FROM source ORDER BY (a, b, c); -1 2 3 -1 2 4 -SELECT * FROM destination ORDER BY (a, b, c); -1 2 3 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -1-2 --- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns --- Columns in this case refer to the expression elements, not to the actual table columns -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); -CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a; -INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); -ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; -SELECT * FROM source ORDER BY (a, b, c); -1 2 3 -1 2 4 -SELECT * FROM destination ORDER BY (a, b, c); -1 2 3 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -1 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; -SELECT * FROM source ORDER BY (a, b, c); -1 2 3 -1 2 4 -SELECT * FROM destination ORDER BY (a, b, c); -1 2 3 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -1 --- Should be allowed. Special test case, tricky to explain. First column of source partition expression is --- timestamp, while first column of destination partition expression is `A`. One of the previous implementations --- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp; -CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp; -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5); -ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 5 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 5 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -5 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION (201003, 0) from source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 5 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 5 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -5 --- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically --- increasing in the source partition min max indexes. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); -CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); -INSERT INTO TABLE source VALUES (6, 12); -ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source; -SELECT * FROM source ORDER BY A; -6 12 -SELECT * FROM destination ORDER BY A; -6 12 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -3-6 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION (6, 12) from source; -SELECT * FROM source ORDER BY A; -6 12 -SELECT * FROM destination ORDER BY A; -6 12 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -3-6 --- Should be allowed. The same scenario as above, but partition expressions inverted. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); -CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); -INSERT INTO TABLE source VALUES (6, 12); -ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source; -SELECT * FROM source ORDER BY A; -6 12 -SELECT * FROM destination ORDER BY A; -6 12 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -6-12 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION (3, 6) from source; -SELECT * FROM source ORDER BY A; -6 12 -SELECT * FROM destination ORDER BY A; -6 12 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -6-12 --- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE - source(timestamp DateTime) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1') - PARTITION BY toYYYYMMDD(timestamp) - ORDER BY tuple(); -CREATE TABLE - destination(timestamp DateTime) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1') - PARTITION BY toYYYYMM(timestamp) - ORDER BY tuple(); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -201003 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '20100302' from source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -201003 --- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated -DROP TABLE IF EXISTS source SYNC; -DROP TABLE IF EXISTS destination SYNC; -CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple(); -CREATE TABLE - destination(timestamp DateTime) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1') - PARTITION BY toYYYYMM(timestamp) - ORDER BY tuple(); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -201003 -TRUNCATE TABLE destination; -ALTER TABLE destination ATTACH PARTITION '20100302' from source; -SELECT * FROM source ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT * FROM destination ORDER BY timestamp; -2010-03-02 02:01:01 -2010-03-02 02:01:03 -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; -201003 --- Should not be allowed because data would be split into two different partitions -DROP TABLE IF EXISTS source SYNC; -DROP TABLE IF EXISTS destination SYNC; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03'); -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 } -ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 } --- Should not be allowed because data would be split into two different partitions -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); -CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2); -ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 } -ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 } --- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); -CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2); -INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); -INSERT INTO TABLE source VALUES ('rice', 'food'); -ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } -ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } --- Should not be allowed because dst partition exp depends on a different set of columns -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); -CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName); -INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); -INSERT INTO TABLE source VALUES ('rice', 'food'); -ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } -ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } --- Should not be allowed because dst partition exp is not monotonically increasing -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2); -CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName); -INSERT INTO TABLE source VALUES ('bread'), ('mop'); -INSERT INTO TABLE source VALUES ('broccoli'); -ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 } -ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 } --- Empty/ non-existent partition, same partition expression. Nothing should happen -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; -ALTER TABLE destination ATTACH PARTITION 1 FROM source; -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; --- Empty/ non-existent partition, different partition expression. Nothing should happen --- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; -ALTER TABLE destination ATTACH PARTITION 1 FROM source; -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; --- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen --- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -ALTER TABLE destination REPLACE PARTITION '1' FROM source; -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; --- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id. --- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; -CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; -INSERT INTO TABLE destination VALUES (1); -ALTER TABLE destination REPLACE PARTITION '1' FROM source; -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql deleted file mode 100644 index 9547d6ae249..00000000000 --- a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql +++ /dev/null @@ -1,485 +0,0 @@ --- { echoOn } --- Should be allowed since destination partition expr is monotonically increasing and compatible -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '20100302' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though --- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '201003' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); - -CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1); - -ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION 0 FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed because dst partition exp is monot inc and data is not split -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category); -CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); - -INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); -INSERT INTO TABLE source VALUES ('rice', 'food'); - -ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source; - -SELECT * FROM source ORDER BY productName; -SELECT * FROM destination ORDER BY productName; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source; - -SELECT * FROM source ORDER BY productName; -SELECT * FROM destination ORDER BY productName; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133 - -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000); -CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000))); - -INSERT INTO TABLE source VALUES (1267495261123); - -ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '14670' from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726 - -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp); -CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400)); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1); - -ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '2010' from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately --- fall into the same partition. --- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple(); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '201003' from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that --- partition by tuple() is accepted. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple(); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '201003' from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns --- Columns in this case refer to the expression elements, not to the actual table columns -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); -CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b); - -INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); - -ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; - -SELECT * FROM source ORDER BY (a, b, c); -SELECT * FROM destination ORDER BY (a, b, c); -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; - -SELECT * FROM source ORDER BY (a, b, c); -SELECT * FROM destination ORDER BY (a, b, c); -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns --- Columns in this case refer to the expression elements, not to the actual table columns -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); -CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a; - -INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); - -ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; - -SELECT * FROM source ORDER BY (a, b, c); -SELECT * FROM destination ORDER BY (a, b, c); -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; - -SELECT * FROM source ORDER BY (a, b, c); -SELECT * FROM destination ORDER BY (a, b, c); -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed. Special test case, tricky to explain. First column of source partition expression is --- timestamp, while first column of destination partition expression is `A`. One of the previous implementations --- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp; -CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp; - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5); - -ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION (201003, 0) from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically --- increasing in the source partition min max indexes. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); -CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); - -INSERT INTO TABLE source VALUES (6, 12); - -ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source; - -SELECT * FROM source ORDER BY A; -SELECT * FROM destination ORDER BY A; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION (6, 12) from source; - -SELECT * FROM source ORDER BY A; -SELECT * FROM destination ORDER BY A; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed. The same scenario as above, but partition expressions inverted. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); -CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); - -INSERT INTO TABLE source VALUES (6, 12); - -ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source; - -SELECT * FROM source ORDER BY A; -SELECT * FROM destination ORDER BY A; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION (3, 6) from source; - -SELECT * FROM source ORDER BY A; -SELECT * FROM destination ORDER BY A; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated. -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; -CREATE TABLE - source(timestamp DateTime) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1') - PARTITION BY toYYYYMMDD(timestamp) - ORDER BY tuple(); - -CREATE TABLE - destination(timestamp DateTime) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1') - PARTITION BY toYYYYMM(timestamp) - ORDER BY tuple(); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '20100302' from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated -DROP TABLE IF EXISTS source SYNC; -DROP TABLE IF EXISTS destination SYNC; -CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple(); - -CREATE TABLE - destination(timestamp DateTime) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1') - PARTITION BY toYYYYMM(timestamp) - ORDER BY tuple(); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - -TRUNCATE TABLE destination; - -ALTER TABLE destination ATTACH PARTITION '20100302' from source; - -SELECT * FROM source ORDER BY timestamp; -SELECT * FROM destination ORDER BY timestamp; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Should not be allowed because data would be split into two different partitions -DROP TABLE IF EXISTS source SYNC; -DROP TABLE IF EXISTS destination SYNC; - -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03'); - -ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 } -ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 } - --- Should not be allowed because data would be split into two different partitions -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); - -CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; - -INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2); - -ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 } -ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 } - --- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); -CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2); - -INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); -INSERT INTO TABLE source VALUES ('rice', 'food'); - -ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } -ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } - --- Should not be allowed because dst partition exp depends on a different set of columns -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); -CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName); - -INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); -INSERT INTO TABLE source VALUES ('rice', 'food'); - -ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } -ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } - --- Should not be allowed because dst partition exp is not monotonically increasing -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2); -CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName); - -INSERT INTO TABLE source VALUES ('bread'), ('mop'); -INSERT INTO TABLE source VALUES ('broccoli'); - -ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 } -ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 } - --- Empty/ non-existent partition, same partition expression. Nothing should happen -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); - -ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; -ALTER TABLE destination ATTACH PARTITION 1 FROM source; - -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Empty/ non-existent partition, different partition expression. Nothing should happen --- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); - -ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; -ALTER TABLE destination ATTACH PARTITION 1 FROM source; - -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen --- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); -CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); - -ALTER TABLE destination REPLACE PARTITION '1' FROM source; - -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; - --- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id. --- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 -DROP TABLE IF EXISTS source; -DROP TABLE IF EXISTS destination; - -CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; -CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; - -INSERT INTO TABLE destination VALUES (1); - -ALTER TABLE destination REPLACE PARTITION '1' FROM source; - -SELECT * FROM destination; -SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1;