mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Prune partition in verbatim way.
This commit is contained in:
parent
ccf57300eb
commit
30bf5e6d26
@ -6,6 +6,7 @@
|
||||
#include <Poco/File.h>
|
||||
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Storages/MergeTree/PartitionPruner.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
|
||||
#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
|
||||
#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
|
||||
@ -226,13 +227,15 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
|
||||
}
|
||||
|
||||
std::optional<KeyCondition> minmax_idx_condition;
|
||||
std::optional<PartitionPruner> partition_pruner;
|
||||
if (data.minmax_idx_expr)
|
||||
{
|
||||
minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr);
|
||||
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context);
|
||||
|
||||
if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue())
|
||||
if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless()))
|
||||
{
|
||||
String msg = "MinMax index by columns (";
|
||||
String msg = "Neither MinMax index by columns (";
|
||||
bool first = true;
|
||||
for (const String & col : data.minmax_idx_columns)
|
||||
{
|
||||
@ -242,7 +245,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
|
||||
msg += ", ";
|
||||
msg += col;
|
||||
}
|
||||
msg += ") is not used and setting 'force_index_by_date' is set";
|
||||
msg += ") nor partition expr is used and setting 'force_index_by_date' is set";
|
||||
|
||||
throw Exception(msg, ErrorCodes::INDEX_NOT_USED);
|
||||
}
|
||||
@ -266,6 +269,12 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
|
||||
part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
|
||||
continue;
|
||||
|
||||
if (partition_pruner)
|
||||
{
|
||||
if (partition_pruner->canBePruned(part))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (max_block_numbers_to_read)
|
||||
{
|
||||
auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
|
||||
|
53
src/Storages/MergeTree/PartitionPruner.h
Normal file
53
src/Storages/MergeTree/PartitionPruner.h
Normal file
@ -0,0 +1,53 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <Storages/KeyDescription.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class PartitionPruner
|
||||
{
|
||||
private:
|
||||
std::unordered_map<String, bool> partition_filter_map;
|
||||
const KeyDescription & partition_key;
|
||||
KeyCondition partition_condition;
|
||||
bool useless;
|
||||
using DataPart = IMergeTreeDataPart;
|
||||
using DataPartPtr = std::shared_ptr<const DataPart>;
|
||||
|
||||
public:
|
||||
PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context)
|
||||
: partition_key(partition_key_)
|
||||
, partition_condition(
|
||||
query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, true /* strict */)
|
||||
, useless(partition_condition.alwaysUnknownOrTrue())
|
||||
{
|
||||
}
|
||||
|
||||
bool canBePruned(DataPartPtr part)
|
||||
{
|
||||
if (part->isEmpty())
|
||||
return true;
|
||||
const auto & partition_id = part->info.partition_id;
|
||||
bool is_valid;
|
||||
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
|
||||
is_valid = it->second;
|
||||
else
|
||||
{
|
||||
const auto & partition_value = part->partition.value;
|
||||
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
|
||||
is_valid = partition_condition.mayBeTrueInRange(
|
||||
partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
|
||||
partition_filter_map.emplace(partition_id, is_valid);
|
||||
}
|
||||
return !is_valid;
|
||||
}
|
||||
|
||||
bool isUseless() const { return useless; }
|
||||
};
|
||||
|
||||
}
|
@ -21,6 +21,7 @@
|
||||
#include <Storages/PartitionCommands.h>
|
||||
#include <Storages/MergeTree/MergeTreeBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
||||
#include <Storages/MergeTree/PartitionPruner.h>
|
||||
#include <Disks/StoragePolicy.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
@ -192,31 +193,14 @@ std::optional<UInt64> StorageMergeTree::totalRows() const
|
||||
std::optional<UInt64> StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const auto & partition_key = metadata_snapshot->getPartitionKey();
|
||||
Names partition_key_columns = partition_key.column_names;
|
||||
KeyCondition key_condition(
|
||||
query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */);
|
||||
if (key_condition.alwaysUnknownOrTrue())
|
||||
PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context);
|
||||
if (partition_pruner.isUseless())
|
||||
return {};
|
||||
std::unordered_map<String, bool> partition_filter_map;
|
||||
size_t res = 0;
|
||||
auto lock = lockParts();
|
||||
for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
|
||||
{
|
||||
if (part->isEmpty())
|
||||
continue;
|
||||
const auto & partition_id = part->info.partition_id;
|
||||
bool is_valid;
|
||||
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
|
||||
is_valid = it->second;
|
||||
else
|
||||
{
|
||||
const auto & partition_value = part->partition.value;
|
||||
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
|
||||
is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
|
||||
partition_filter_map.emplace(partition_id, is_valid);
|
||||
}
|
||||
if (is_valid)
|
||||
if (!partition_pruner.canBePruned(part))
|
||||
res += part->rows_count;
|
||||
}
|
||||
return res;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
#include <Storages/MergeTree/PartitionPruner.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
|
||||
@ -3662,28 +3663,13 @@ std::optional<UInt64> StorageReplicatedMergeTree::totalRows() const
|
||||
std::optional<UInt64> StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const auto & partition_key = metadata_snapshot->getPartitionKey();
|
||||
Names partition_key_columns = partition_key.column_names;
|
||||
KeyCondition key_condition(
|
||||
query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */);
|
||||
if (key_condition.alwaysUnknownOrTrue())
|
||||
PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context);
|
||||
if (partition_pruner.isUseless())
|
||||
return {};
|
||||
std::unordered_map<String, bool> partition_filter_map;
|
||||
size_t res = 0;
|
||||
foreachCommittedParts([&](auto & part)
|
||||
{
|
||||
const auto & partition_id = part->info.partition_id;
|
||||
bool is_valid;
|
||||
if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end())
|
||||
is_valid = it->second;
|
||||
else
|
||||
{
|
||||
const auto & partition_value = part->partition.value;
|
||||
std::vector<FieldRef> index_value(partition_value.begin(), partition_value.end());
|
||||
is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types);
|
||||
partition_filter_map.emplace(partition_id, is_valid);
|
||||
}
|
||||
if (is_valid)
|
||||
if (!partition_pruner.canBePruned(part))
|
||||
res += part->rows_count;
|
||||
});
|
||||
return res;
|
||||
|
@ -0,0 +1 @@
|
||||
1 2
|
@ -0,0 +1,11 @@
|
||||
drop table if exists xy;
|
||||
|
||||
create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 100 order by y settings index_granularity = 1;
|
||||
|
||||
insert into xy values (1, 2), (2, 3);
|
||||
|
||||
SET max_rows_to_read = 1;
|
||||
|
||||
select * from xy where intHash64(x) % 100 = intHash64(1) % 100;
|
||||
|
||||
drop table if exists xy;
|
Loading…
Reference in New Issue
Block a user