2020-10-22 06:18:10 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
#include <Storages/KeyDescription.h>
|
|
|
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
|
|
|
#include <Storages/MergeTree/KeyCondition.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-11-06 03:50:58 +00:00
|
|
|
/// Pruning partitions in verbatim way using KeyCondition
|
2020-10-22 06:18:10 +00:00
|
|
|
class PartitionPruner
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
std::unordered_map<String, bool> partition_filter_map;
|
|
|
|
const KeyDescription & partition_key;
|
|
|
|
KeyCondition partition_condition;
|
|
|
|
bool useless;
|
|
|
|
using DataPart = IMergeTreeDataPart;
|
|
|
|
using DataPartPtr = std::shared_ptr<const DataPart>;
|
|
|
|
|
|
|
|
public:
|
2020-11-06 11:18:42 +00:00
|
|
|
PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context, bool strict)
|
2020-10-22 06:18:10 +00:00
|
|
|
: partition_key(partition_key_)
|
|
|
|
, partition_condition(
|
2020-11-06 11:18:42 +00:00
|
|
|
query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict)
|
Fix optimize_trivial_count_query with partition predicate
Consider the following example:
CREATE TABLE test(p DateTime, k int) ENGINE MergeTree PARTITION BY toDate(p) ORDER BY k;
INSERT INTO test VALUES ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);
- SELECT count() FROM test WHERE toDate(p) >= '2020-09-01' AND p <= '2020-09-01 00:00:00'
In this case rpn will be (FUNCTION_IN_RANGE, FUNCTION_UNKNOWN (due to strict), FUNCTION_AND)
and for optimize_trivial_count_query we cannot use index if there is at least one FUNCTION_UNKNOWN.
since there is no post processing and return count() based on only the first predicate is wrong.
Before this patch FUNCTION_UNKNOWN was allowed for optimize_trivial_count_query, and the result was wrong.
And two examples above just to show the difference, the behaviour hadn't been changed with this patch:
- SELECT * FROM test WHERE toDate(p) >= '2020-09-01' AND p <= '2020-09-01 00:00:00'
In this case will be (FUNCTION_IN_RANGE, FUNCTION_IN_RANGE (due to non-strict), FUNCTION_AND)
so it will prune everything out and nothing will be read.
- SELECT * FROM test WHERE toDate(p) >= '2020-09-01' AND toUnixTimestamp(p)%5==0
In this case will be (FUNCTION_IN_RANGE, FUNCTION_UNKNOWN, FUNCTION_AND)
and all, two, partitions will be scanned, but due to filtering later none of rows will be matched.
2020-11-06 18:14:36 +00:00
|
|
|
, useless(strict ? partition_condition.anyUnknownOrAlwaysTrue() : partition_condition.alwaysUnknownOrTrue())
|
2020-10-22 06:18:10 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-11-06 03:50:58 +00:00
|
|
|
bool canBePruned(const DataPartPtr & part);
|
2020-10-22 06:18:10 +00:00
|
|
|
|
|
|
|
bool isUseless() const { return useless; }
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|