Sampling key in StorageInMemoryMetadata

This commit is contained in:
alesapin 2020-06-17 15:07:09 +03:00
parent eca6e9087e
commit 1da393b218
9 changed files with 41 additions and 42 deletions

View File

@ -1062,7 +1062,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1))
{
Names columns_for_sampling = storage->getColumnsRequiredForSampling();
Names columns_for_sampling = metadata_snapshot->getColumnsRequiredForSampling();
additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
columns_for_sampling.begin(), columns_for_sampling.end());
}

View File

@ -348,35 +348,12 @@ Names IStorage::getPrimaryKeyColumns() const
return {};
}
const KeyDescription & IStorage::getSamplingKey() const
{
return metadata->sampling_key;
}
bool IStorage::isSamplingKeyDefined() const
{
return metadata->sampling_key.definition_ast != nullptr;
}
bool IStorage::hasSamplingKey() const
{
return !metadata->sampling_key.column_names.empty();
}
Names IStorage::getColumnsRequiredForSampling() const
{
if (hasSamplingKey())
return metadata->sampling_key.expression->getRequiredColumns();
return {};
}
TTLTableDescription IStorage::getTableTTLs() const
{
std::lock_guard lock(ttl_mutex);
return metadata->table_ttl;
}
bool IStorage::hasAnyTableTTL() const
{
return hasAnyMoveTTL() || hasRowsTTL();

View File

@ -101,7 +101,7 @@ public:
virtual bool isView() const { return false; }
/// Returns true if the storage supports queries with the SAMPLE section.
virtual bool supportsSampling() const { return hasSamplingKey(); }
virtual bool supportsSampling() const { return getInMemoryMetadataPtr()->hasSamplingKey(); }
/// Returns true if the storage supports queries with the FINAL section.
virtual bool supportsFinal() const { return false; }
@ -442,18 +442,6 @@ public:
/// * y', 'toStartOfMonth(date)', etc.
Names getPrimaryKeyColumns() const;
/// Returns structure with sampling key.
const KeyDescription & getSamplingKey() const;
/// Returns sampling expression AST for storage or nullptr if there is none.
ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; }
/// Storage has user-defined (in CREATE query) sampling key.
bool isSamplingKeyDefined() const;
/// Storage has sampling key.
bool hasSamplingKey() const;
/// Returns column names that need to be read to calculate sampling key.
Names getColumnsRequiredForSampling() const;
/// Returns storage policy if storage supports it.
virtual StoragePolicyPtr getStoragePolicy() const { return {}; }

View File

@ -390,7 +390,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
RelativeSize size_of_universum = 0;
const auto & sampling_key = data.getSamplingKey();
const auto & sampling_key = metadata_snapshot->getSamplingKey();
DataTypePtr sampling_column_type = sampling_key.data_types[0];
if (typeid_cast<const DataTypeUInt64 *>(sampling_column_type.get()))
@ -459,7 +459,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
/// If sample and final are used together no need to calculate sampling expression twice.
/// The first time it was calculated for final, because sample key is a part of the PK.
/// So, assume that we already have calculated column.
ASTPtr sampling_key_ast = data.getSamplingKeyAST();
ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST();
if (select.final())
{

View File

@ -29,7 +29,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr
date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos];
const auto data_settings = data.getSettings();
sampling_expression = formattedAST(data.getSamplingKeyAST());
sampling_expression = formattedAST(metadata_snapshot->getSamplingKeyAST());
index_granularity = data_settings->index_granularity;
merging_params_mode = static_cast<int>(data.merging_params.mode);
sign_column = data.merging_params.sign_column;

View File

@ -342,4 +342,27 @@ Names StorageInMemoryMetadata::getSortingKeyColumns() const
return sorting_key.column_names;
return {};
}
const KeyDescription & StorageInMemoryMetadata::getSamplingKey() const
{
return sampling_key;
}
bool StorageInMemoryMetadata::isSamplingKeyDefined() const
{
return sampling_key.definition_ast != nullptr;
}
bool StorageInMemoryMetadata::hasSamplingKey() const
{
return !sampling_key.column_names.empty();
}
Names StorageInMemoryMetadata::getColumnsRequiredForSampling() const
{
if (hasSamplingKey())
return sampling_key.expression->getRequiredColumns();
return {};
}
}

View File

@ -140,6 +140,17 @@ struct StorageInMemoryMetadata
/// Returns column names that need to be read for FINAL to work.
Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); }
/// Returns structure with sampling key.
const KeyDescription & getSamplingKey() const;
/// Returns sampling expression AST for storage or nullptr if there is none.
ASTPtr getSamplingKeyAST() const { return sampling_key.definition_ast; }
/// Storage has user-defined (in CREATE query) sampling key.
bool isSamplingKeyDefined() const;
/// Storage has sampling key.
bool hasSamplingKey() const;
/// Returns column names that need to be read to calculate sampling key.
Names getColumnsRequiredForSampling() const;
};
using StorageMetadataPtr = std::shared_ptr<StorageInMemoryMetadata>;

View File

@ -128,7 +128,7 @@ protected:
cols_required_for_partition_key = metadata_snapshot->getColumnsRequiredForPartitionKey();
cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey();
cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey();
cols_required_for_sampling = storage->getColumnsRequiredForSampling();
cols_required_for_sampling = metadata_snapshot->getColumnsRequiredForSampling();
column_sizes = storage->getColumnSizes();
}

View File

@ -393,7 +393,7 @@ protected:
if (columns_mask[src_index++])
{
assert(table != nullptr);
if ((expression_ptr = table->getSamplingKeyAST()))
if ((expression_ptr = metadata_snapshot->getSamplingKeyAST()))
res_columns[res_index++]->insert(queryToString(expression_ptr));
else
res_columns[res_index++]->insertDefault();