Compilable sorting key in metadata

This commit is contained in:
alesapin 2020-06-17 14:05:11 +03:00
parent ab61abccc1
commit ba04d02f1e
21 changed files with 104 additions and 88 deletions

View File

@ -1011,6 +1011,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions()
ExpressionAnalysisResult::ExpressionAnalysisResult(
SelectQueryExpressionAnalyzer & query_analyzer,
const StorageMetadataPtr & metadata_snapshot,
bool first_stage_,
bool second_stage_,
bool only_types,
@ -1068,7 +1069,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (storage && query.final())
{
Names columns_for_final = storage->getColumnsRequiredForFinal();
Names columns_for_final = metadata_snapshot->getColumnsRequiredForFinal();
additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
columns_for_final.begin(), columns_for_final.end());
}

View File

@ -204,6 +204,7 @@ struct ExpressionAnalysisResult
ExpressionAnalysisResult(
SelectQueryExpressionAnalyzer & query_analyzer,
const StorageMetadataPtr & metadata_snapshot,
bool first_stage,
bool second_stage,
bool only_types,

View File

@ -492,8 +492,12 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
bool second_stage = from_stage <= QueryProcessingStage::WithMergeableState
&& options.to_stage > QueryProcessingStage::WithMergeableState;
Names columns_required_for_sampling;
Names columns_required_for_;
analysis_result = ExpressionAnalysisResult(
*query_analyzer,
metadata_snapshot,
first_stage,
second_stage,
options.only_analyze,
@ -1329,7 +1333,7 @@ void InterpreterSelectQuery::executeFetchColumns(
getSortDescriptionFromGroupBy(query),
query_info.syntax_analyzer_result);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage, metadata_snapshot);
}
Pipes pipes = storage->read(required_columns, metadata_snapshot, query_info, *context, processing_stage, max_block_size, max_streams);

View File

@ -225,7 +225,7 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt
for (const String & col : metadata_snapshot->getColumnsRequiredForPartitionKey())
key_columns.insert(col);
for (const String & col : merge_tree_data->getColumnsRequiredForSortingKey())
for (const String & col : metadata_snapshot->getColumnsRequiredForSortingKey())
key_columns.insert(col);
/// We don't process sample_by_ast separately because it must be among the primary key columns.
@ -731,7 +731,7 @@ size_t MutationsInterpreter::evaluateCommandsSize()
std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const
{
Names sort_columns = storage->getSortingKeyColumns();
Names sort_columns = metadata_snapshot->getSortingKeyColumns();
SortDescription sort_description;
size_t sort_columns_size = sort_columns.size();
sort_description.reserve(sort_columns_size);

View File

@ -319,35 +319,6 @@ NamesAndTypesList IStorage::getVirtuals() const
return {};
}
const KeyDescription & IStorage::getSortingKey() const
{
return metadata->sorting_key;
}
bool IStorage::isSortingKeyDefined() const
{
return metadata->sorting_key.definition_ast != nullptr;
}
bool IStorage::hasSortingKey() const
{
return !metadata->sorting_key.column_names.empty();
}
Names IStorage::getColumnsRequiredForSortingKey() const
{
if (hasSortingKey())
return metadata->sorting_key.expression->getRequiredColumns();
return {};
}
Names IStorage::getSortingKeyColumns() const
{
if (hasSortingKey())
return metadata->sorting_key.column_names;
return {};
}
const KeyDescription & IStorage::getPrimaryKey() const
{
return metadata->primary_key;

View File

@ -427,20 +427,6 @@ public:
/// Returns data paths if storage supports it, empty vector otherwise.
virtual Strings getDataPaths() const { return {}; }
/// Returns structure with sorting key.
const KeyDescription & getSortingKey() const;
/// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none.
ASTPtr getSortingKeyAST() const { return metadata->sorting_key.definition_ast; }
/// Storage has user-defined (in CREATE query) sorting key.
bool isSortingKeyDefined() const;
/// Storage has sorting key. It means, that it contains at least one column.
bool hasSortingKey() const;
/// Returns column names that need to be read to calculate sorting key.
Names getColumnsRequiredForSortingKey() const;
/// Returns columns names in sorting key specified by user in ORDER BY
/// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc.
Names getSortingKeyColumns() const;
/// Returns structure with primary key.
const KeyDescription & getPrimaryKey() const;
/// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none.
@ -467,8 +453,6 @@ public:
/// Returns column names that need to be read to calculate sampling key.
Names getColumnsRequiredForSampling() const;
/// Returns column names that need to be read for FINAL to work.
Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); }
/// Returns storage policy if storage supports it.
virtual StoragePolicyPtr getStoragePolicy() const { return {}; }

View File

@ -274,7 +274,7 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam
}
}
void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & /*old_metadata*/, bool attach) const
void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) const
{
if (!new_metadata.sorting_key.definition_ast)
throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS);
@ -312,7 +312,7 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata
auto all_columns = new_metadata.columns.getAllPhysical();
/// Order by check AST
if (hasSortingKey())
if (old_metadata.hasSortingKey())
{
/// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key
/// expression have just been added (so that the sorting order is guaranteed to be valid with the new key).
@ -321,7 +321,7 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata
Names new_sorting_key_columns = new_sorting_key.column_names;
ASTPtr added_key_column_expr_list = std::make_shared<ASTExpressionList>();
const auto & old_sorting_key_columns = getSortingKeyColumns();
const auto & old_sorting_key_columns = old_metadata.getSortingKeyColumns();
for (size_t new_i = 0, old_i = 0; new_i < sorting_key_size; ++new_i)
{
if (old_i < old_sorting_key_columns.size())
@ -342,7 +342,7 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata
NamesAndTypesList deleted_columns;
NamesAndTypesList added_columns;
getColumns().getAllPhysical().getDifference(all_columns, deleted_columns, added_columns);
old_metadata.getColumns().getAllPhysical().getDifference(all_columns, deleted_columns, added_columns);
for (const String & col : used_columns)
{
@ -415,7 +415,7 @@ ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression(const
ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const
{
return getCombinedIndicesExpression(getSortingKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context);
return getCombinedIndicesExpression(metadata_snapshot->getSortingKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context);
}
@ -487,8 +487,8 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta
for (const auto & col : old_metadata.getColumnsRequiredForPartitionKey())
columns_ttl_forbidden.insert(col);
if (hasSortingKey())
for (const auto & col : getColumnsRequiredForSortingKey())
if (old_metadata.hasSortingKey())
for (const auto & col : old_metadata.getColumnsRequiredForSortingKey())
columns_ttl_forbidden.insert(col);
for (const auto & [name, ttl_description] : new_column_ttls)
@ -1266,9 +1266,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
columns_alter_type_forbidden.insert(col);
}
if (hasSortingKey())
if (old_metadata.hasSortingKey())
{
auto sorting_key_expr = getSortingKey().expression;
auto sorting_key_expr = old_metadata.getSortingKey().expression;
for (const ExpressionAction & action : sorting_key_expr->getActions())
{
auto action_columns = action.getNeededColumns();
@ -2981,7 +2981,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour
return ast ? queryToString(ast) : "";
};
if (query_to_string(getSortingKeyAST()) != query_to_string(src_data->getSortingKeyAST()))
if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST()))
throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS);
if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST()))

View File

@ -612,8 +612,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
NamesAndTypesList merging_columns;
Names gathering_column_names, merging_column_names;
extractMergingAndGatheringColumns(
storage_columns, data.getSortingKey().expression, metadata_snapshot->getSecondaryIndices(),
data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names);
storage_columns,
metadata_snapshot->getSortingKey().expression,
metadata_snapshot->getSecondaryIndices(),
data.merging_params,
gathering_columns,
gathering_column_names,
merging_columns,
merging_column_names);
auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + future_part.name, disk);
MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(
@ -719,16 +725,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
Pipe pipe(std::move(input));
if (data.hasSortingKey())
if (metadata_snapshot->hasSortingKey())
{
auto expr = std::make_shared<ExpressionTransform>(pipe.getHeader(), data.getSortingKey().expression);
auto expr = std::make_shared<ExpressionTransform>(pipe.getHeader(), metadata_snapshot->getSortingKey().expression);
pipe.addSimpleTransform(std::move(expr));
}
pipes.emplace_back(std::move(pipe));
}
Names sort_columns = data.getSortingKeyColumns();
Names sort_columns = metadata_snapshot->getSortingKeyColumns();
SortDescription sort_description;
size_t sort_columns_size = sort_columns.size();
sort_description.reserve(sort_columns_size);

View File

@ -617,7 +617,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
if (select.final())
{
/// Add columns needed to calculate the sorting expression and the sign.
std::vector<String> add_columns = data.getColumnsRequiredForSortingKey();
std::vector<String> add_columns = metadata_snapshot->getColumnsRequiredForSortingKey();
column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end());
if (!data.merging_params.sign_column.empty())
@ -644,7 +644,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info)
{
size_t prefix_size = query_info.input_order_info->order_key_prefix_descr.size();
auto order_key_prefix_ast = data.getSortingKey().expression_list_ast->clone();
auto order_key_prefix_ast = metadata_snapshot->getSortingKey().expression_list_ast->clone();
order_key_prefix_ast->children.resize(prefix_size);
auto syntax_result = SyntaxAnalyzer(context).analyze(order_key_prefix_ast, data.getColumns().getAllPhysical());
@ -1064,7 +1064,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
{
SortDescription sort_description;
for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j)
sort_description.emplace_back(data.getSortingKey().column_names[j],
sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j],
input_order_info->direction, 1);
/// Drop temporary columns, added by 'sorting_key_prefix_expr'
@ -1138,11 +1138,11 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
if (!out_projection)
out_projection = createProjection(pipe, data);
pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), data.getSortingKey().expression));
pipe.addSimpleTransform(std::make_shared<ExpressionTransform>(pipe.getHeader(), metadata_snapshot->getSortingKey().expression));
pipes.emplace_back(std::move(pipe));
}
Names sort_columns = data.getSortingKeyColumns();
Names sort_columns = metadata_snapshot->getSortingKeyColumns();
SortDescription sort_description;
size_t sort_columns_size = sort_columns.size();
sort_description.reserve(sort_columns_size);

View File

@ -262,10 +262,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
new_data_part->volume->getDisk()->createDirectories(full_path);
/// If we need to calculate some columns to sort.
if (data.hasSortingKey() || metadata_snapshot->hasSecondaryIndices())
if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices())
data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot)->execute(block);
Names sort_columns = data.getSortingKeyColumns();
Names sort_columns = metadata_snapshot->getSortingKeyColumns();
SortDescription sort_description;
size_t sort_columns_size = sort_columns.size();
sort_description.reserve(sort_columns_size);

View File

@ -41,11 +41,11 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr
/// - When we have only ORDER BY, than store it in "primary key:" row of /metadata
/// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata
if (!data.isPrimaryKeyDefined())
primary_key = formattedAST(data.getSortingKey().expression_list_ast);
primary_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast);
else
{
primary_key = formattedAST(data.getPrimaryKey().expression_list_ast);
sorting_key = formattedAST(data.getSortingKey().expression_list_ast);
sorting_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast);
}
data_format_version = data.format_version;

View File

@ -30,20 +30,20 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
forbidden_columns.insert(elem.first);
}
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) const
{
Names sorting_key_columns;
if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
{
if (!merge_tree->hasSortingKey())
if (!metadata_snapshot->hasSortingKey())
return {};
sorting_key_columns = merge_tree->getSortingKeyColumns();
sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
}
else if (const auto * part = dynamic_cast<const StorageFromMergeTreeDataPart *>(storage.get()))
{
if (!part->hasSortingKey())
if (!metadata_snapshot->hasSortingKey())
return {};
sorting_key_columns = part->getSortingKeyColumns();
sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
}
else /// Inapplicable storage type
{

View File

@ -20,7 +20,7 @@ public:
const SortDescription & required_sort_description,
const SyntaxAnalyzerResultPtr & syntax_result);
InputOrderInfoPtr getInputOrder(const StoragePtr & storage) const;
InputOrderInfoPtr getInputOrder(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) const;
private:
/// Actions for every element of order expression to analyze functions for monotonicity

View File

@ -179,7 +179,7 @@ Pipes StorageBuffer::read(
if (dst_has_same_structure)
{
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination, metadata_snapshot);
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
pipes_from_dst = destination->read(

View File

@ -312,4 +312,34 @@ Names StorageInMemoryMetadata::getColumnsRequiredForPartitionKey() const
return partition_key.expression->getRequiredColumns();
return {};
}
const KeyDescription & StorageInMemoryMetadata::getSortingKey() const
{
return sorting_key;
}
bool StorageInMemoryMetadata::isSortingKeyDefined() const
{
return sorting_key.definition_ast != nullptr;
}
bool StorageInMemoryMetadata::hasSortingKey() const
{
return !sorting_key.column_names.empty();
}
Names StorageInMemoryMetadata::getColumnsRequiredForSortingKey() const
{
if (hasSortingKey())
return sorting_key.expression->getRequiredColumns();
return {};
}
Names StorageInMemoryMetadata::getSortingKeyColumns() const
{
if (hasSortingKey())
return sorting_key.column_names;
return {};
}
}

View File

@ -123,6 +123,23 @@ struct StorageInMemoryMetadata
bool hasPartitionKey() const;
/// Returns column names that need to be read to calculate partition key.
Names getColumnsRequiredForPartitionKey() const;
/// Returns structure with sorting key.
const KeyDescription & getSortingKey() const;
/// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none.
ASTPtr getSortingKeyAST() const { return sorting_key.definition_ast; }
/// Storage has user-defined (in CREATE query) sorting key.
bool isSortingKeyDefined() const;
/// Storage has sorting key. It means, that it contains at least one column.
bool hasSortingKey() const;
/// Returns column names that need to be read to calculate sorting key.
Names getColumnsRequiredForSortingKey() const;
/// Returns columns names in sorting key specified by user in ORDER BY
/// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc.
Names getSortingKeyColumns() const;
/// Returns column names that need to be read for FINAL to work.
Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); }
};
using StorageMetadataPtr = std::shared_ptr<StorageInMemoryMetadata>;

View File

@ -121,7 +121,7 @@ Pipes StorageMaterializedView::read(
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage, metadata_snapshot);
Pipes pipes = storage->read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);

View File

@ -184,7 +184,9 @@ Pipes StorageMerge::read(
{
for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
{
auto current_info = query_info.order_optimizer->getInputOrder(std::get<0>(*it));
auto storage_ptr = std::get<0>(*it);
auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
auto current_info = query_info.order_optimizer->getInputOrder(storage_ptr, storage_metadata_snapshot);
if (it == selected_tables.begin())
input_sorting_info = current_info;
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))

View File

@ -796,7 +796,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
{
/// Primary and sorting key become independent after this ALTER so we have to
/// save the old ORDER BY expression as the new primary key.
auto old_sorting_key_ast = getSortingKey().definition_ast;
auto old_sorting_key_ast = old_metadata.getSortingKey().definition_ast;
primary_key = KeyDescription::getKeyFromAST(
old_sorting_key_ast, new_metadata.columns, global_context);
}

View File

@ -122,11 +122,11 @@ protected:
throw;
}
auto metadadata_snapshot = storage->getInMemoryMetadataPtr();
columns = metadadata_snapshot->getColumns();
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
columns = metadata_snapshot->getColumns();
cols_required_for_partition_key = metadadata_snapshot->getColumnsRequiredForPartitionKey();
cols_required_for_sorting_key = storage->getColumnsRequiredForSortingKey();
cols_required_for_partition_key = metadata_snapshot->getColumnsRequiredForPartitionKey();
cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey();
cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey();
cols_required_for_sampling = storage->getColumnsRequiredForSampling();
column_sizes = storage->getColumnSizes();

View File

@ -375,7 +375,7 @@ protected:
if (columns_mask[src_index++])
{
assert(table != nullptr);
if ((expression_ptr = table->getSortingKey().expression_list_ast))
if ((expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast))
res_columns[res_index++]->insert(queryToString(expression_ptr));
else
res_columns[res_index++]->insertDefault();