2020-10-22 06:18:10 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
#include <Storages/KeyDescription.h>
|
|
|
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
|
|
|
#include <Storages/MergeTree/KeyCondition.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-11-06 03:50:58 +00:00
|
|
|
/// Pruning partitions in verbatim way using KeyCondition
|
2020-10-22 06:18:10 +00:00
|
|
|
class PartitionPruner
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
std::unordered_map<String, bool> partition_filter_map;
|
2021-05-21 16:14:01 +00:00
|
|
|
|
|
|
|
/// partition_key is adjusted here (with substitution from modulo to moduloLegacy).
|
|
|
|
KeyDescription partition_key;
|
|
|
|
|
2020-10-22 06:18:10 +00:00
|
|
|
KeyCondition partition_condition;
|
|
|
|
bool useless;
|
|
|
|
using DataPart = IMergeTreeDataPart;
|
|
|
|
using DataPartPtr = std::shared_ptr<const DataPart>;
|
|
|
|
|
|
|
|
public:
|
2021-05-21 16:14:01 +00:00
|
|
|
PartitionPruner(const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, ContextPtr context, bool strict)
|
|
|
|
: partition_key(MergeTreePartition::adjustPartitionKey(metadata, context))
|
2022-07-28 09:40:09 +00:00
|
|
|
, partition_condition(query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict)
|
Fix optimize_trivial_count_query with partition predicate
Consider the following example:
CREATE TABLE test(p DateTime, k int) ENGINE MergeTree PARTITION BY toDate(p) ORDER BY k;
INSERT INTO test VALUES ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);
- SELECT count() FROM test WHERE toDate(p) >= '2020-09-01' AND p <= '2020-09-01 00:00:00'
In this case rpn will be (FUNCTION_IN_RANGE, FUNCTION_UNKNOWN (due to strict), FUNCTION_AND)
and for optimize_trivial_count_query we cannot use index if there is at least one FUNCTION_UNKNOWN.
since there is no post processing and return count() based on only the first predicate is wrong.
Before this patch FUNCTION_UNKNOWN was allowed for optimize_trivial_count_query, and the result was wrong.
And two examples above just to show the difference, the behaviour hadn't been changed with this patch:
- SELECT * FROM test WHERE toDate(p) >= '2020-09-01' AND p <= '2020-09-01 00:00:00'
In this case will be (FUNCTION_IN_RANGE, FUNCTION_IN_RANGE (due to non-strict), FUNCTION_AND)
so it will prune everything out and nothing will be read.
- SELECT * FROM test WHERE toDate(p) >= '2020-09-01' AND toUnixTimestamp(p)%5==0
In this case will be (FUNCTION_IN_RANGE, FUNCTION_UNKNOWN, FUNCTION_AND)
and all, two, partitions will be scanned, but due to filtering later none of rows will be matched.
2020-11-06 18:14:36 +00:00
|
|
|
, useless(strict ? partition_condition.anyUnknownOrAlwaysTrue() : partition_condition.alwaysUnknownOrTrue())
|
2020-10-22 06:18:10 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2021-02-10 14:12:49 +00:00
|
|
|
bool canBePruned(const DataPart & part);
|
2020-10-22 06:18:10 +00:00
|
|
|
|
|
|
|
bool isUseless() const { return useless; }
|
2021-04-08 08:19:04 +00:00
|
|
|
|
2021-04-16 09:42:23 +00:00
|
|
|
const KeyCondition & getKeyCondition() const { return partition_condition; }
|
2020-10-22 06:18:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|