From 1da393b2180bedc1827bdd7b8c6356e06db7a993 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 15:07:09 +0300 Subject: [PATCH] Sampling key in StorageInMemoryMetadata --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Storages/IStorage.cpp | 23 ------------------- src/Storages/IStorage.h | 14 +---------- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 23 +++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 11 +++++++++ src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- 9 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 28aa42877d6..9d9a403ab59 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1062,7 +1062,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1)) { - Names columns_for_sampling = storage->getColumnsRequiredForSampling(); + Names columns_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(), columns_for_sampling.begin(), columns_for_sampling.end()); } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0c2ddc09da8..0a50cf95559 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -348,35 +348,12 @@ Names IStorage::getPrimaryKeyColumns() const return {}; } -const KeyDescription & IStorage::getSamplingKey() const -{ - return metadata->sampling_key; -} - -bool IStorage::isSamplingKeyDefined() const -{ - return metadata->sampling_key.definition_ast != nullptr; -} - -bool IStorage::hasSamplingKey() const -{ - return !metadata->sampling_key.column_names.empty(); -} - -Names IStorage::getColumnsRequiredForSampling() const -{ - if (hasSamplingKey()) - return metadata->sampling_key.expression->getRequiredColumns(); - return {}; -} - TTLTableDescription IStorage::getTableTTLs() const { std::lock_guard lock(ttl_mutex); return metadata->table_ttl; } - bool IStorage::hasAnyTableTTL() const { return hasAnyMoveTTL() || hasRowsTTL(); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4e1ca81dd10..5aa595b1375 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -101,7 +101,7 @@ public: virtual bool isView() const { return false; } /// Returns true if the storage supports queries with the SAMPLE section. - virtual bool supportsSampling() const { return hasSamplingKey(); } + virtual bool supportsSampling() const { return getInMemoryMetadataPtr()->hasSamplingKey(); } /// Returns true if the storage supports queries with the FINAL section. virtual bool supportsFinal() const { return false; } @@ -442,18 +442,6 @@ public: /// * y', 'toStartOfMonth(date)', etc. Names getPrimaryKeyColumns() const; - /// Returns structure with sampling key. - const KeyDescription & getSamplingKey() const; - /// Returns sampling expression AST for storage or nullptr if there is none. - ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; } - /// Storage has user-defined (in CREATE query) sampling key. - bool isSamplingKeyDefined() const; - /// Storage has sampling key. - bool hasSamplingKey() const; - /// Returns column names that need to be read to calculate sampling key. - Names getColumnsRequiredForSampling() const; - - /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9d198c7ed65..58214bae5ca 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -390,7 +390,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); RelativeSize size_of_universum = 0; - const auto & sampling_key = data.getSamplingKey(); + const auto & sampling_key = metadata_snapshot->getSamplingKey(); DataTypePtr sampling_column_type = sampling_key.data_types[0]; if (typeid_cast(sampling_column_type.get())) @@ -459,7 +459,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( /// If sample and final are used together no need to calculate sampling expression twice. /// The first time it was calculated for final, because sample key is a part of the PK. /// So, assume that we already have calculated column. - ASTPtr sampling_key_ast = data.getSamplingKeyAST(); + ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); if (select.final()) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 81366db5b2a..c3d91f4a5a9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -29,7 +29,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; const auto data_settings = data.getSettings(); - sampling_expression = formattedAST(data.getSamplingKeyAST()); + sampling_expression = formattedAST(metadata_snapshot->getSamplingKeyAST()); index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 3c7f474c482..6c5429fc556 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -342,4 +342,27 @@ Names StorageInMemoryMetadata::getSortingKeyColumns() const return sorting_key.column_names; return {}; } + +const KeyDescription & StorageInMemoryMetadata::getSamplingKey() const +{ + return sampling_key; +} + +bool StorageInMemoryMetadata::isSamplingKeyDefined() const +{ + return sampling_key.definition_ast != nullptr; +} + +bool StorageInMemoryMetadata::hasSamplingKey() const +{ + return !sampling_key.column_names.empty(); +} + +Names StorageInMemoryMetadata::getColumnsRequiredForSampling() const +{ + if (hasSamplingKey()) + return sampling_key.expression->getRequiredColumns(); + return {}; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index f7370057410..1abea7d250c 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -140,6 +140,17 @@ struct StorageInMemoryMetadata /// Returns column names that need to be read for FINAL to work. Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } + + /// Returns structure with sampling key. + const KeyDescription & getSamplingKey() const; + /// Returns sampling expression AST for storage or nullptr if there is none. + ASTPtr getSamplingKeyAST() const { return sampling_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sampling key. + bool isSamplingKeyDefined() const; + /// Storage has sampling key. + bool hasSamplingKey() const; + /// Returns column names that need to be read to calculate sampling key. + Names getColumnsRequiredForSampling() const; }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 016d52ffdcb..f998dc27cab 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -128,7 +128,7 @@ protected: cols_required_for_partition_key = metadata_snapshot->getColumnsRequiredForPartitionKey(); cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey(); cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey(); - cols_required_for_sampling = storage->getColumnsRequiredForSampling(); + cols_required_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); column_sizes = storage->getColumnSizes(); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 50982e3c4b7..0852a96e8ba 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -393,7 +393,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getSamplingKeyAST())) + if ((expression_ptr = metadata_snapshot->getSamplingKeyAST())) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault();