Prune partition in verbatim way.

This commit is contained in:
Amos Bird 2020-10-22 14:18:10 +08:00
parent ccf57300eb
commit 30bf5e6d26
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
6 changed files with 85 additions and 41 deletions

View File

@ -6,6 +6,7 @@
#include <Poco/File.h> #include <Poco/File.h>
#include <Common/FieldVisitors.h> #include <Common/FieldVisitors.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h> #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <Storages/MergeTree/MergeTreeSelectProcessor.h> #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h> #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
@ -226,13 +227,15 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
} }
std::optional<KeyCondition> minmax_idx_condition; std::optional<KeyCondition> minmax_idx_condition;
std::optional<PartitionPruner> partition_pruner;
if (data.minmax_idx_expr) if (data.minmax_idx_expr)
{ {
minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr);
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context);
if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless()))
{ {
String msg = "MinMax index by columns ("; String msg = "Neither MinMax index by columns (";
bool first = true; bool first = true;
for (const String & col : data.minmax_idx_columns) for (const String & col : data.minmax_idx_columns)
{ {
@ -242,7 +245,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
msg += ", "; msg += ", ";
msg += col; msg += col;
} }
msg += ") is not used and setting 'force_index_by_date' is set"; msg += ") nor partition expr is used and setting 'force_index_by_date' is set";
throw Exception(msg, ErrorCodes::INDEX_NOT_USED); throw Exception(msg, ErrorCodes::INDEX_NOT_USED);
} }
@ -266,6 +269,12 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true) part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
continue; continue;
if (partition_pruner)
{
if (partition_pruner->canBePruned(part))
continue;
}
if (max_block_numbers_to_read) if (max_block_numbers_to_read)
{ {
auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);

View File

@ -0,0 +1,53 @@
#pragma once
#include <unordered_map>
#include <Storages/KeyDescription.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/KeyCondition.h>
namespace DB
{
class PartitionPruner
{
private:
std::unordered_map<String, bool> partition_filter_map;
const KeyDescription & partition_key;
KeyCondition partition_condition;
bool useless;
using DataPart = IMergeTreeDataPart;
using DataPartPtr = std::shared_ptr<const DataPart>;
public:
PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context)
: partition_key(partition_key_)
, partition_condition(
query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, true /* strict */)
, useless(partition_condition.alwaysUnknownOrTrue())
{
}
bool canBePruned(DataPartPtr part)
{
if (part->isEmpty())
return true;
const auto & partition_id = part->info.partition_id;
bool is_valid;
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
is_valid = it->second;
else
{
const auto & partition_value = part->partition.value;
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
is_valid = partition_condition.mayBeTrueInRange(
partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
partition_filter_map.emplace(partition_id, is_valid);
}
return !is_valid;
}
bool isUseless() const { return useless; }
};
}

View File

@ -21,6 +21,7 @@
#include <Storages/PartitionCommands.h> #include <Storages/PartitionCommands.h>
#include <Storages/MergeTree/MergeTreeBlockOutputStream.h> #include <Storages/MergeTree/MergeTreeBlockOutputStream.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h> #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Disks/StoragePolicy.h> #include <Disks/StoragePolicy.h>
#include <Storages/MergeTree/MergeList.h> #include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/checkDataPart.h> #include <Storages/MergeTree/checkDataPart.h>
@ -192,31 +193,14 @@ std::optional<UInt64> StorageMergeTree::totalRows() const
std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const
{ {
auto metadata_snapshot = getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & partition_key = metadata_snapshot->getPartitionKey(); PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context);
Names partition_key_columns = partition_key.column_names; if (partition_pruner.isUseless())
KeyCondition key_condition(
query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */);
if (key_condition.alwaysUnknownOrTrue())
return {}; return {};
std::unordered_map<String, bool> partition_filter_map;
size_t res = 0; size_t res = 0;
auto lock = lockParts(); auto lock = lockParts();
for (const auto & part : getDataPartsStateRange(DataPartState::Committed)) for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
{ {
if (part->isEmpty()) if (!partition_pruner.canBePruned(part))
continue;
const auto & partition_id = part->info.partition_id;
bool is_valid;
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
is_valid = it->second;
else
{
const auto & partition_value = part->partition.value;
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
partition_filter_map.emplace(partition_id, is_valid);
}
if (is_valid)
res += part->rows_count; res += part->rows_count;
} }
return res; return res;

View File

@ -17,6 +17,7 @@
#include <Storages/StorageReplicatedMergeTree.h> #include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h> #include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/MergeList.h> #include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h> #include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h> #include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h> #include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
@ -3662,28 +3663,13 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalRows() const
std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const
{ {
auto metadata_snapshot = getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & partition_key = metadata_snapshot->getPartitionKey(); PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context);
Names partition_key_columns = partition_key.column_names; if (partition_pruner.isUseless())
KeyCondition key_condition(
query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */);
if (key_condition.alwaysUnknownOrTrue())
return {}; return {};
std::unordered_map<String, bool> partition_filter_map;
size_t res = 0; size_t res = 0;
foreachCommittedParts([&](auto & part) foreachCommittedParts([&](auto & part)
{ {
const auto & partition_id = part->info.partition_id; if (!partition_pruner.canBePruned(part))
bool is_valid;
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
is_valid = it->second;
else
{
const auto & partition_value = part->partition.value;
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
partition_filter_map.emplace(partition_id, is_valid);
}
if (is_valid)
res += part->rows_count; res += part->rows_count;
}); });
return res; return res;

View File

@ -0,0 +1,11 @@
drop table if exists xy;
create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 100 order by y settings index_granularity = 1;
insert into xy values (1, 2), (2, 3);
SET max_rows_to_read = 1;
select * from xy where intHash64(x) % 100 = intHash64(1) % 100;
drop table if exists xy;