From 9c8afbeb5394f0b38aed5451ef1d0bba0f98cc5e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 Mar 2021 12:59:19 +0300 Subject: [PATCH 1/4] Fix alter modify query for partition key and other metadata fields --- src/Storages/MergeTree/MergeTreeData.cpp | 13 ++-- src/Storages/StorageReplicatedMergeTree.cpp | 54 ++++++++++++---- ...ter_partition_key_enum_zookeeper.reference | 6 ++ ...747_alter_partition_key_enum_zookeeper.sql | 63 +++++++++++++++++++ 4 files changed, 119 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference create mode 100644 tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d9e24581c0c..0cedae9a2f4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -165,7 +165,7 @@ MergeTreeData::MergeTreeData( { try { - checkPartitionKeyAndInitMinMax(metadata_.partition_key); + setProperties(metadata_, metadata_, attach); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); } @@ -179,12 +179,10 @@ MergeTreeData::MergeTreeData( else { is_custom_partitioned = true; - checkPartitionKeyAndInitMinMax(metadata_.partition_key); + setProperties(metadata_, metadata_, attach); min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } - setProperties(metadata_, metadata_, attach); - /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(metadata_); @@ -398,6 +396,7 @@ void MergeTreeData::checkProperties( void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) { checkProperties(new_metadata, old_metadata, attach); + checkPartitionKeyAndInitMinMax(new_metadata.partition_key); setInMemoryMetadata(new_metadata); } @@ -440,6 +439,12 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa checkKeyExpression(*new_partition_key.expression, new_partition_key.sample_block, "Partition", allow_nullable_key); + /// Reset filled fields + minmax_idx_columns.clear(); + minmax_idx_column_types.clear(); + minmax_idx_date_column_pos = -1; + minmax_idx_time_column_pos = -1; + /// Add all columns used in the partition key to the min-max index. const NamesAndTypesList & minmax_idx_columns_with_types = new_partition_key.expression->getRequiredColumnsWithTypes(); minmax_idx_expr = std::make_shared(std::make_shared(minmax_idx_columns_with_types)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1a99f320365..c8b91f9533a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -897,21 +897,8 @@ void StorageReplicatedMergeTree::setTableStructure( StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); if (new_columns != new_metadata.columns) - { new_metadata.columns = new_columns; - new_metadata.column_ttls_by_name.clear(); - for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) - { - auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, global_context, new_metadata.primary_key); - new_metadata.column_ttls_by_name[name] = new_ttl_entry; - } - - /// The type of partition key expression may change - if (new_metadata.partition_key.definition_ast != nullptr) - new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, global_context); - } - if (!metadata_diff.empty()) { auto parse_key_expr = [] (const String & key_expr) @@ -977,6 +964,47 @@ void StorageReplicatedMergeTree::setTableStructure( } } + /// Changes in columns may affect following metadata fields + if (new_metadata.columns != old_metadata.columns) + { + new_metadata.column_ttls_by_name.clear(); + for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) + { + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, global_context, new_metadata.primary_key); + new_metadata.column_ttls_by_name[name] = new_ttl_entry; + } + + if (new_metadata.partition_key.definition_ast != nullptr) + new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, global_context); + + if (!metadata_diff.sorting_key_changed) /// otherwise already updated + new_metadata.sorting_key.recalculateWithNewColumns(new_metadata.columns, global_context); + + /// Primary key is special, it exists even if not defined + if (new_metadata.primary_key.definition_ast != nullptr) + { + new_metadata.primary_key.recalculateWithNewColumns(new_metadata.columns, global_context); + } + else + { + new_metadata.primary_key = KeyDescription::getKeyFromAST(new_metadata.sorting_key.definition_ast, new_metadata.columns, global_context); + new_metadata.primary_key.definition_ast = nullptr; + } + + if (!metadata_diff.sampling_expression_changed && new_metadata.sampling_key.definition_ast != nullptr) + new_metadata.sampling_key.recalculateWithNewColumns(new_metadata.columns, global_context); + + if (!metadata_diff.skip_indices_changed) /// otherwise already updated + { + for (auto & index : new_metadata.secondary_indices) + index.recalculateWithNewColumns(new_metadata.columns, global_context); + } + + if (!metadata_diff.ttl_table_changed && new_metadata.table_ttl.definition_ast != nullptr) + new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + new_metadata.table_ttl.definition_ast, new_metadata.columns, global_context, new_metadata.primary_key); + } + /// Even if the primary/sorting/partition keys didn't change we must reinitialize it /// because primary/partition key column types might have changed. checkTTLExpressions(new_metadata, old_metadata); diff --git a/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference new file mode 100644 index 00000000000..02359f0f98b --- /dev/null +++ b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.reference @@ -0,0 +1,6 @@ +IU lada 2101 1970-04-19 15:00:00 +PS jeep Grand Cherokee 2005-10-03 15:00:00 +PS jeep Grand Cherokee 2005-10-03 15:00:00 +IU lada 2101 1970-04-19 15:00:00 +PS jeep Grand Cherokee 2005-10-03 15:00:00 +PS jeep Grand Cherokee 2005-10-03 15:00:00 diff --git a/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql new file mode 100644 index 00000000000..759c8ba3a0b --- /dev/null +++ b/tests/queries/0_stateless/01747_alter_partition_key_enum_zookeeper.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS report; + +CREATE TABLE report +( + `product` Enum8('IU' = 1, 'WS' = 2), + `machine` String, + `branch` String, + `generated_time` DateTime +) +ENGINE = MergeTree +PARTITION BY (product, toYYYYMM(generated_time)) +ORDER BY (product, machine, branch, generated_time); + +INSERT INTO report VALUES ('IU', 'lada', '2101', toDateTime('1970-04-19 15:00:00')); + +SELECT * FROM report WHERE product = 'IU'; + +ALTER TABLE report MODIFY COLUMN product Enum8('IU' = 1, 'WS' = 2, 'PS' = 3); + +SELECT * FROM report WHERE product = 'PS'; + +INSERT INTO report VALUES ('PS', 'jeep', 'Grand Cherokee', toDateTime('2005-10-03 15:00:00')); + +SELECT * FROM report WHERE product = 'PS'; + +DETACH TABLE report; +ATTACH TABLE report; + +SELECT * FROM report WHERE product = 'PS'; + +DROP TABLE IF EXISTS report; + +DROP TABLE IF EXISTS replicated_report; + +CREATE TABLE replicated_report +( + `product` Enum8('IU' = 1, 'WS' = 2), + `machine` String, + `branch` String, + `generated_time` DateTime +) +ENGINE = ReplicatedMergeTree('/clickhouse/01747_alter_partition_key/t', '1') +PARTITION BY (product, toYYYYMM(generated_time)) +ORDER BY (product, machine, branch, generated_time); + +INSERT INTO replicated_report VALUES ('IU', 'lada', '2101', toDateTime('1970-04-19 15:00:00')); + +SELECT * FROM replicated_report WHERE product = 'IU'; + +ALTER TABLE replicated_report MODIFY COLUMN product Enum8('IU' = 1, 'WS' = 2, 'PS' = 3) SETTINGS replication_alter_partitions_sync=2; + +SELECT * FROM replicated_report WHERE product = 'PS'; + +INSERT INTO replicated_report VALUES ('PS', 'jeep', 'Grand Cherokee', toDateTime('2005-10-03 15:00:00')); + +SELECT * FROM replicated_report WHERE product = 'PS'; + +DETACH TABLE replicated_report; +ATTACH TABLE replicated_report; + +SELECT * FROM replicated_report WHERE product = 'PS'; + +DROP TABLE IF EXISTS replicated_report; From 9ebf1b4fad1cca6e8b3bd990273ff56f1a5389af Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Mar 2021 13:33:54 +0300 Subject: [PATCH 2/4] Get rid of separate minmax index fields --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 24 ++++++--- src/Storages/MergeTree/MergeTreeData.cpp | 53 ++++++++++++------- src/Storages/MergeTree/MergeTreeData.h | 7 +-- .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 21 +++++--- .../MergeTree/MergeTreeDataSelectExecutor.h | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../MergeTree/MergeTreeWriteAheadLog.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 5 +- tests/queries/skip_list.json | 4 +- 11 files changed, 82 insertions(+), 42 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f80020991b0..de7f3b6c0f4 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -363,7 +363,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( new_data_part->uuid = part_uuid; new_data_part->is_temp = true; new_data_part->setColumns(block.getNamesAndTypesList()); - new_data_part->minmax_idx.update(block, data.minmax_idx_columns); + new_data_part->minmax_idx.update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); new_data_part->partition.create(metadata_snapshot, block, 0); MergedBlockOutputStream part_out(new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {})); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 2f6513bbb12..1568ca16254 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -57,13 +57,18 @@ static std::unique_ptr openForReading(const DiskPtr & di void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path) { - size_t minmax_idx_size = data.minmax_idx_column_types.size(); + auto metadata_snapshot = data.getInMemoryMetadataPtr(); + const auto & partition_key = metadata_snapshot->getPartitionKey(); + + auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + size_t minmax_idx_size = minmax_column_types.size(); hyperrectangle.reserve(minmax_idx_size); for (size_t i = 0; i < minmax_idx_size; ++i) { - String file_name = part_path + "minmax_" + escapeForFileName(data.minmax_idx_columns[i]) + ".idx"; + String file_name = part_path + "minmax_" + escapeForFileName(minmax_column_names[i]) + ".idx"; auto file = openForReading(disk_, file_name); - const DataTypePtr & data_type = data.minmax_idx_column_types[i]; + const DataTypePtr & data_type = minmax_column_types[i]; Field min_val; data_type->deserializeBinary(min_val, *file); @@ -78,7 +83,13 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Dis void IMergeTreeDataPart::MinMaxIndex::store( const MergeTreeData & data, const DiskPtr & disk_, const String & part_path, Checksums & out_checksums) const { - store(data.minmax_idx_columns, data.minmax_idx_column_types, disk_, part_path, out_checksums); + auto metadata_snapshot = data.getInMemoryMetadataPtr(); + const auto & partition_key = metadata_snapshot->getPartitionKey(); + + auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + + store(minmax_column_names, minmax_column_types, disk_, part_path, out_checksums); } void IMergeTreeDataPart::MinMaxIndex::store( @@ -1168,6 +1179,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const auto metadata_snapshot = storage.getInMemoryMetadataPtr(); const auto & pk = metadata_snapshot->getPrimaryKey(); + const auto & partition_key = metadata_snapshot->getPartitionKey(); if (!checksums.empty()) { if (!pk.column_names.empty() && !checksums.files.count("primary.idx")) @@ -1183,7 +1195,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!isEmpty()) { - for (const String & col_name : storage.minmax_idx_columns) + for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) { if (!checksums.files.count("minmax_" + escapeForFileName(col_name) + ".idx")) throw Exception("No minmax idx file checksum for column " + col_name, ErrorCodes::NO_FILE_IN_DATA_PART); @@ -1214,7 +1226,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (metadata_snapshot->hasPartitionKey()) check_file_not_empty(volume->getDisk(), path + "partition.dat"); - for (const String & col_name : storage.minmax_idx_columns) + for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) check_file_not_empty(volume->getDisk(), path + "minmax_" + escapeForFileName(col_name) + ".idx"); } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0cedae9a2f4..caba9709785 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -165,6 +165,8 @@ MergeTreeData::MergeTreeData( { try { + + checkPartitionKeyAndInitMinMax(metadata_.partition_key); setProperties(metadata_, metadata_, attach); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -179,9 +181,10 @@ MergeTreeData::MergeTreeData( else { is_custom_partitioned = true; - setProperties(metadata_, metadata_, attach); + checkPartitionKeyAndInitMinMax(metadata_.partition_key); min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } + setProperties(metadata_, metadata_, attach); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(metadata_); @@ -396,7 +399,6 @@ void MergeTreeData::checkProperties( void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) { checkProperties(new_metadata, old_metadata, attach); - checkPartitionKeyAndInitMinMax(new_metadata.partition_key); setInMemoryMetadata(new_metadata); } @@ -421,6 +423,29 @@ ExpressionActionsPtr getCombinedIndicesExpression( } +ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partition_key) const +{ + NamesAndTypesList partition_key_columns; + if (!partition_key.column_names.empty()) + partition_key_columns = partition_key.expression->getRequiredColumnsWithTypes(); + + return std::make_shared(std::make_shared(partition_key_columns)); +} + +Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key) const +{ + if (!partition_key.column_names.empty()) + return partition_key.expression->getRequiredColumns(); + return {}; +} + +DataTypes MergeTreeData::getMinMaxColumnsTypes(const KeyDescription & partition_key) const +{ + if (!partition_key.column_names.empty()) + return partition_key.expression->getRequiredColumnsWithTypes().getTypes(); + return {}; +} + ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const { return getCombinedIndicesExpression(metadata_snapshot->getPrimaryKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); @@ -439,26 +464,14 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa checkKeyExpression(*new_partition_key.expression, new_partition_key.sample_block, "Partition", allow_nullable_key); - /// Reset filled fields - minmax_idx_columns.clear(); - minmax_idx_column_types.clear(); - minmax_idx_date_column_pos = -1; - minmax_idx_time_column_pos = -1; - /// Add all columns used in the partition key to the min-max index. - const NamesAndTypesList & minmax_idx_columns_with_types = new_partition_key.expression->getRequiredColumnsWithTypes(); - minmax_idx_expr = std::make_shared(std::make_shared(minmax_idx_columns_with_types)); - for (const NameAndTypePair & column : minmax_idx_columns_with_types) - { - minmax_idx_columns.emplace_back(column.name); - minmax_idx_column_types.emplace_back(column.type); - } + DataTypes minmax_idx_columns_types = getMinMaxColumnsTypes(new_partition_key); /// Try to find the date column in columns used by the partition key (a common case). bool encountered_date_column = false; - for (size_t i = 0; i < minmax_idx_column_types.size(); ++i) + for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i) { - if (typeid_cast(minmax_idx_column_types[i].get())) + if (typeid_cast(minmax_idx_columns_types[i].get())) { if (!encountered_date_column) { @@ -474,9 +487,9 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa } if (!encountered_date_column) { - for (size_t i = 0; i < minmax_idx_column_types.size(); ++i) + for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i) { - if (typeid_cast(minmax_idx_column_types[i].get())) + if (typeid_cast(minmax_idx_columns_types[i].get())) { if (!encountered_date_column) { @@ -3508,7 +3521,7 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions( if (column_name == name) return true; - for (const auto & name : minmax_idx_columns) + for (const auto & name : getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())) if (column_name == name) return true; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index f03f3f1dd8c..6f0ca34e753 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -692,12 +692,13 @@ public: bool is_custom_partitioned = false; - ExpressionActionsPtr minmax_idx_expr; - Names minmax_idx_columns; - DataTypes minmax_idx_column_types; Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. + ExpressionActionsPtr getMinMaxExpr(const KeyDescription & partition_key) const; + Names getMinMaxColumnsNames(const KeyDescription & partition_key) const; + DataTypes getMinMaxColumnsTypes(const KeyDescription & partition_key) const; + ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c571a53d4c8..f2f8172837c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1779,7 +1779,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( Block block; while (checkOperationIsNotCanceled(merge_entry) && (block = mutating_stream->read())) { - minmax_idx.update(block, data.minmax_idx_columns); + minmax_idx.update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); out.write(block); merge_entry->rows_written += block.rows(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d23413f4a84..c309bdd533f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -242,16 +242,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::optional minmax_idx_condition; std::optional partition_pruner; - if (data.minmax_idx_expr) + DataTypes minmax_columns_types; + if (metadata_snapshot->hasPartitionKey()) { - minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + const auto & partition_key = metadata_snapshot->getPartitionKey(); + auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); + minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); + + minmax_idx_condition.emplace(query_info, context, minmax_columns_names, data.getMinMaxExpr(partition_key)); partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { String msg = "Neither MinMax index by columns ("; bool first = true; - for (const String & col : data.minmax_idx_columns) + for (const String & col : minmax_columns_names) { if (first) first = false; @@ -268,9 +273,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const Context & query_context = context.hasQueryContext() ? context.getQueryContext() : context; if (query_context.getSettingsRef().allow_experimental_query_deduplication) - selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read, query_context); + selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context); else - selectPartsToRead(parts, part_values, minmax_idx_condition, partition_pruner, max_block_numbers_to_read); + selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read); /// Sampling. @@ -1885,6 +1890,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( MergeTreeData::DataPartsVector & parts, const std::unordered_set & part_values, const std::optional & minmax_idx_condition, + const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read) const { @@ -1900,7 +1906,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( continue; if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle( - part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true) + part->minmax_idx.hyperrectangle, minmax_columns_types).can_be_true) continue; if (partition_pruner) @@ -1924,6 +1930,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( MergeTreeData::DataPartsVector & parts, const std::unordered_set & part_values, const std::optional & minmax_idx_condition, + const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, const Context & query_context) const @@ -1950,7 +1957,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( continue; if (minmax_idx_condition - && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, data.minmax_idx_column_types) + && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, minmax_columns_types) .can_be_true) continue; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 7692424dfb5..8efdcf35082 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -123,6 +123,7 @@ private: MergeTreeData::DataPartsVector & parts, const std::unordered_set & part_values, const std::optional & minmax_idx_condition, + const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read) const; @@ -131,6 +132,7 @@ private: MergeTreeData::DataPartsVector & parts, const std::unordered_set & part_values, const std::optional & minmax_idx_condition, + const DataTypes & minmax_columns_types, std::optional & partition_pruner, const PartitionIdToMaxBlock * max_block_numbers_to_read, const Context & query_context) const; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5a9bdd90bc8..f478cdba40a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -268,7 +268,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa Int64 temp_index = data.insert_increment.get(); IMergeTreeDataPart::MinMaxIndex minmax_idx; - minmax_idx.update(block, data.minmax_idx_columns); + minmax_idx.update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); MergeTreePartition partition(std::move(block_with_partition.partition)); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 7ddc8d93b03..4ca20572e90 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -191,7 +191,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor { MergedBlockOutputStream part_out(part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {})); - part->minmax_idx.update(block, storage.minmax_idx_columns); + part->minmax_idx.update(block, storage.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); part->partition.create(metadata_snapshot, block, 0); if (metadata_snapshot->hasSortingKey()) metadata_snapshot->getSortingKey().expression->execute(block); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index d06706f9109..ac1c92849d5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -26,7 +26,10 @@ static String formattedAST(const ASTPtr & ast) ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot) { if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; + { + auto minmax_idx_column_names = data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); + date_column = minmax_idx_column_names[data.minmax_idx_date_column_pos]; + } const auto data_settings = data.getSettings(); sampling_expression = formattedAST(metadata_snapshot->getSamplingKeyAST()); diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 36cca55779d..45d569fc131 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -261,7 +261,8 @@ "00116_storage_set", "00083_create_merge_tree_zookeeper", "00062_replicated_merge_tree_alter_zookeeper", - "01720_constraints_complex_types" + "01720_constraints_complex_types", + "01747_alter_partition_key_enum_zookeeper" ], "polymorphic-parts": [ "01508_partition_pruning_long", /// bug, shoud be fixed @@ -748,6 +749,7 @@ "01676_dictget_in_default_expression", "01700_system_zookeeper_path_in", "01715_background_checker_blather_zookeeper", + "01747_alter_partition_key_enum_zookeeper", "attach", "ddl_dictionaries", "dictionary", From 4e33587043f9cc7b637207771c8a49da7c7c71be Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Mar 2021 13:57:09 +0300 Subject: [PATCH 3/4] Comments --- src/Storages/MergeTree/MergeTreeData.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 6f0ca34e753..e629536a106 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -692,11 +692,15 @@ public: bool is_custom_partitioned = false; + /// Used only for old syntax tables. Never changes after init. Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. + /// Get partition key expression on required columns ExpressionActionsPtr getMinMaxExpr(const KeyDescription & partition_key) const; + /// Get column names required for partition key Names getMinMaxColumnsNames(const KeyDescription & partition_key) const; + /// Get column types required for partition key DataTypes getMinMaxColumnsTypes(const KeyDescription & partition_key) const; ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; From c29d7c7f4959c406c67f4fdca8901c7333f4f7fb Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Mar 2021 19:13:36 +0300 Subject: [PATCH 4/4] Shutup clang tidy --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeData.h | 6 +++--- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataSelectExecutor.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index caba9709785..277f525f4d9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -423,7 +423,7 @@ ExpressionActionsPtr getCombinedIndicesExpression( } -ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partition_key) const +ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partition_key) { NamesAndTypesList partition_key_columns; if (!partition_key.column_names.empty()) @@ -432,14 +432,14 @@ ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partiti return std::make_shared(std::make_shared(partition_key_columns)); } -Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key) const +Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key) { if (!partition_key.column_names.empty()) return partition_key.expression->getRequiredColumns(); return {}; } -DataTypes MergeTreeData::getMinMaxColumnsTypes(const KeyDescription & partition_key) const +DataTypes MergeTreeData::getMinMaxColumnsTypes(const KeyDescription & partition_key) { if (!partition_key.column_names.empty()) return partition_key.expression->getRequiredColumnsWithTypes().getTypes(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e629536a106..bfc1a5f6b6f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -697,11 +697,11 @@ public: Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. /// Get partition key expression on required columns - ExpressionActionsPtr getMinMaxExpr(const KeyDescription & partition_key) const; + static ExpressionActionsPtr getMinMaxExpr(const KeyDescription & partition_key); /// Get column names required for partition key - Names getMinMaxColumnsNames(const KeyDescription & partition_key) const; + static Names getMinMaxColumnsNames(const KeyDescription & partition_key); /// Get column types required for partition key - DataTypes getMinMaxColumnsTypes(const KeyDescription & partition_key) const; + static DataTypes getMinMaxColumnsTypes(const KeyDescription & partition_key); ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c309bdd533f..b1f3f524beb 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1892,7 +1892,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, std::optional & partition_pruner, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const + const PartitionIdToMaxBlock * max_block_numbers_to_read) { auto prev_parts = parts; parts.clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 8efdcf35082..634719639ad 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -119,13 +119,13 @@ private: /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`, /// as well as `max_block_number_to_read`. - void selectPartsToRead( + static void selectPartsToRead( MergeTreeData::DataPartsVector & parts, const std::unordered_set & part_values, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, std::optional & partition_pruner, - const PartitionIdToMaxBlock * max_block_numbers_to_read) const; + const PartitionIdToMaxBlock * max_block_numbers_to_read); /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded. void selectPartsToReadWithUUIDFilter(