Prune partition in verbatim way.

This commit is contained in:
Amos Bird 2020-10-22 14:18:10 +08:00
parent ccf57300eb
commit 30bf5e6d26
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
6 changed files with 85 additions and 41 deletions

View File

@ -6,6 +6,7 @@
#include <Poco/File.h>
#include <Common/FieldVisitors.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
@ -226,13 +227,15 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
}
std::optional<KeyCondition> minmax_idx_condition;
std::optional<PartitionPruner> partition_pruner;
if (data.minmax_idx_expr)
{
minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr);
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context);
if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue())
if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless()))
{
String msg = "MinMax index by columns (";
String msg = "Neither MinMax index by columns (";
bool first = true;
for (const String & col : data.minmax_idx_columns)
{
@ -242,7 +245,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
msg += ", ";
msg += col;
}
msg += ") is not used and setting 'force_index_by_date' is set";
msg += ") nor partition expr is used and setting 'force_index_by_date' is set";
throw Exception(msg, ErrorCodes::INDEX_NOT_USED);
}
@ -266,6 +269,12 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
continue;
if (partition_pruner)
{
if (partition_pruner->canBePruned(part))
continue;
}
if (max_block_numbers_to_read)
{
auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);

View File

@ -0,0 +1,53 @@
#pragma once
#include <unordered_map>
#include <Storages/KeyDescription.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/KeyCondition.h>
namespace DB
{
class PartitionPruner
{
private:
std::unordered_map<String, bool> partition_filter_map;
const KeyDescription & partition_key;
KeyCondition partition_condition;
bool useless;
using DataPart = IMergeTreeDataPart;
using DataPartPtr = std::shared_ptr<const DataPart>;
public:
PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context)
: partition_key(partition_key_)
, partition_condition(
query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, true /* strict */)
, useless(partition_condition.alwaysUnknownOrTrue())
{
}
bool canBePruned(DataPartPtr part)
{
if (part->isEmpty())
return true;
const auto & partition_id = part->info.partition_id;
bool is_valid;
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
is_valid = it->second;
else
{
const auto & partition_value = part->partition.value;
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
is_valid = partition_condition.mayBeTrueInRange(
partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
partition_filter_map.emplace(partition_id, is_valid);
}
return !is_valid;
}
bool isUseless() const { return useless; }
};
}

View File

@ -21,6 +21,7 @@
#include <Storages/PartitionCommands.h>
#include <Storages/MergeTree/MergeTreeBlockOutputStream.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Disks/StoragePolicy.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/checkDataPart.h>
@ -192,31 +193,14 @@ std::optional<UInt64> StorageMergeTree::totalRows() const
std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const
{
auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & partition_key = metadata_snapshot->getPartitionKey();
Names partition_key_columns = partition_key.column_names;
KeyCondition key_condition(
query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */);
if (key_condition.alwaysUnknownOrTrue())
PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context);
if (partition_pruner.isUseless())
return {};
std::unordered_map<String, bool> partition_filter_map;
size_t res = 0;
auto lock = lockParts();
for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
{
if (part->isEmpty())
continue;
const auto & partition_id = part->info.partition_id;
bool is_valid;
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
is_valid = it->second;
else
{
const auto & partition_value = part->partition.value;
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
partition_filter_map.emplace(partition_id, is_valid);
}
if (is_valid)
if (!partition_pruner.canBePruned(part))
res += part->rows_count;
}
return res;

View File

@ -17,6 +17,7 @@
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
@ -3662,28 +3663,13 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalRows() const
std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const
{
auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & partition_key = metadata_snapshot->getPartitionKey();
Names partition_key_columns = partition_key.column_names;
KeyCondition key_condition(
query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */);
if (key_condition.alwaysUnknownOrTrue())
PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context);
if (partition_pruner.isUseless())
return {};
std::unordered_map<String, bool> partition_filter_map;
size_t res = 0;
foreachCommittedParts([&](auto & part)
{
const auto & partition_id = part->info.partition_id;
bool is_valid;
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
is_valid = it->second;
else
{
const auto & partition_value = part->partition.value;
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
partition_filter_map.emplace(partition_id, is_valid);
}
if (is_valid)
if (!partition_pruner.canBePruned(part))
res += part->rows_count;
});
return res;

View File

@ -0,0 +1,11 @@
drop table if exists xy;
create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 100 order by y settings index_granularity = 1;
insert into xy values (1, 2), (2, 3);
SET max_rows_to_read = 1;
select * from xy where intHash64(x) % 100 = intHash64(1) % 100;
drop table if exists xy;