Merge pull request #20153 from zlobober/customize_prewhere_optimizer

Allow using MergeTreeWhereOptimizer not only with MergeTree-based storages
This commit is contained in:
alexey-milovidov 2021-02-07 01:10:11 +03:00 committed by GitHub
commit 66b868d865
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 19 deletions

View File

@ -69,7 +69,6 @@
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/Transforms/JoiningTransform.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Storages/IStorage.h>
#include <Storages/StorageView.h>
@ -390,13 +389,18 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final())
{
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get()))
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())
{
/// Extract column compressed sizes.
std::unordered_map<std::string, UInt64> column_compressed_sizes;
for (const auto & [name, sizes] : column_sizes)
column_compressed_sizes[name] = sizes.data_compressed;
SelectQueryInfo current_info;
current_info.query = query_ptr;
current_info.syntax_analyzer_result = syntax_analyzer_result;
MergeTreeWhereOptimizer{current_info, *context, *merge_tree, metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
MergeTreeWhereOptimizer{current_info, *context, std::move(column_compressed_sizes), metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log};
}
}

View File

@ -30,7 +30,7 @@ static constexpr auto threshold = 2;
MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
SelectQueryInfo & query_info,
const Context & context,
const MergeTreeData & data,
std::unordered_map<std::string, UInt64> column_sizes_,
const StorageMetadataPtr & metadata_snapshot,
const Names & queried_columns_,
Poco::Logger * log_)
@ -39,28 +39,20 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
, queried_columns{queried_columns_}
, block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}
, log{log_}
, column_sizes{std::move(column_sizes_)}
{
const auto & primary_key = metadata_snapshot->getPrimaryKey();
if (!primary_key.column_names.empty())
first_primary_key_column = primary_key.column_names[0];
calculateColumnSizes(data, queried_columns);
for (const auto & [_, size] : column_sizes)
total_size_of_queried_columns += size;
determineArrayJoinedNames(query_info.query->as<ASTSelectQuery &>());
optimize(query_info.query->as<ASTSelectQuery &>());
}
void MergeTreeWhereOptimizer::calculateColumnSizes(const MergeTreeData & data, const Names & column_names)
{
for (const auto & column_name : column_names)
{
UInt64 size = data.getColumnCompressedSize(column_name);
column_sizes[column_name] = size;
total_size_of_queried_columns += size;
}
}
static void collectIdentifiersNoSubqueries(const ASTPtr & ast, NameSet & set)
{
if (auto opt_name = tryGetIdentifierName(ast))

View File

@ -33,7 +33,7 @@ public:
MergeTreeWhereOptimizer(
SelectQueryInfo & query_info,
const Context & context,
const MergeTreeData & data,
std::unordered_map<std::string, UInt64> column_sizes_,
const StorageMetadataPtr & metadata_snapshot,
const Names & queried_columns_,
Poco::Logger * log_);
@ -75,8 +75,6 @@ private:
/// Transform Conditions list to WHERE or PREWHERE expression.
static ASTPtr reconstruct(const Conditions & conditions);
void calculateColumnSizes(const MergeTreeData & data, const Names & column_names);
void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const;
void optimizeArbitrary(ASTSelectQuery & select) const;