This commit is contained in:
Nikita Vasilev 2021-05-02 22:16:40 +03:00
parent e565bc47a8
commit 564a484642
12 changed files with 487 additions and 139 deletions

View File

@ -69,7 +69,7 @@ add_subdirectory (Coordination)
set(dbms_headers)
set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h Interpreters/AddIndexConstraintsOptimizer.cpp Interpreters/AddIndexConstraintsOptimizer.h)
set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h Interpreters/AddIndexConstraintsOptimizer.cpp Interpreters/AddIndexConstraintsOptimizer.h Storages/MergeTree/MergeTreeIndexMergedCondition.cpp Storages/MergeTree/MergeTreeIndexMergedCondition.h)
add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)

View File

@ -202,6 +202,54 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con
return CompareResult::UNKNOWN;
}
bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const
{
const auto result = compare(left, right);
if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN)
{
Poco::Logger::get("isPossibleCompare").information("unknonw");
return true;
}
if (expected == result)
return true;
static const std::set<std::pair<CompareResult, CompareResult>> possible_pairs = {
{CompareResult::EQUAL, CompareResult::LESS_OR_EQUAL},
{CompareResult::EQUAL, CompareResult::GREATER_OR_EQUAL},
{CompareResult::LESS_OR_EQUAL, CompareResult::LESS},
{CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER},
{CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::LESS, CompareResult::LESS},
{CompareResult::LESS, CompareResult::LESS_OR_EQUAL},
{CompareResult::GREATER, CompareResult::GREATER},
{CompareResult::GREATER, CompareResult::GREATER_OR_EQUAL},
};
return possible_pairs.contains({expected, result});
}
bool ComparisonGraph::isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const
{
const auto result = compare(left, right);
if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN)
return false;
if (expected == result)
return true;
static const std::set<std::pair<CompareResult, CompareResult>> possible_pairs = {
{CompareResult::LESS_OR_EQUAL, CompareResult::LESS},
{CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER},
{CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL},
};
return possible_pairs.contains({expected, result});
}
std::vector<ASTPtr> ComparisonGraph::getEqual(const ASTPtr & ast) const
{
const auto res = getComponentId(ast);

View File

@ -18,8 +18,6 @@ class ComparisonGraph
public:
ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas);
/// Works for string and num.
/// For other -- only eq.
enum class CompareResult
{
LESS,
@ -32,6 +30,12 @@ public:
CompareResult compare(const ASTPtr & left, const ASTPtr & right) const;
/// It's possible that left <expected> right
bool isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
/// It's always true that left <expected> right
bool isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
std::vector<ASTPtr> getEqual(const ASTPtr & ast) const;
std::optional<ASTPtr> getEqualConst(const ASTPtr & ast) const;

View File

@ -81,7 +81,7 @@ public:
}
template <typename F>
CNFQuery & iterateGroups(F func)
const CNFQuery & iterateGroups(F func) const
{
for (const auto & group : statements)
func(group);

View File

@ -128,23 +128,7 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis
if (func && func->arguments->children.size() == 2)
{
const auto expected = getExpectedCompare(atom);
const auto result = graph.compare(func->arguments->children[0], func->arguments->children[1]);
Poco::Logger::get("GRAPH REASON").information("neg: " + std::to_string(atom.negative));
Poco::Logger::get("GRAPH REASON").information(atom.ast->dumpTree());
Poco::Logger::get("GRAPH REASON").information(std::to_string(static_cast<int>(expected)) + " " + std::to_string(static_cast<int>(result)));
if (expected == ComparisonGraph::CompareResult::UNKNOWN || result == ComparisonGraph::CompareResult::UNKNOWN)
return false;
if (expected == result)
return true;
if (result == ComparisonGraph::CompareResult::EQUAL &&
(expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL || expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL))
return true;
if (result == ComparisonGraph::CompareResult::LESS && expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL)
return true;
if (result == ComparisonGraph::CompareResult::GREATER && expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL)
return true;
return graph.isAlwaysCompare(expected, func->arguments->children[0], func->arguments->children[1]);
}
}
return false;
@ -177,25 +161,7 @@ bool checkIfAtomAlwaysFalseGraph(const CNFQuery::AtomicFormula & atom, const Com
{
/// TODO: special support for !=
const auto expected = getExpectedCompare(atom);
const auto result = graph.compare(func->arguments->children[0], func->arguments->children[1]);
Poco::Logger::get("GRAPH REASON F").information("neg: " + std::to_string(atom.negative));
Poco::Logger::get("GRAPH REASON F").information(atom.ast->dumpTree());
Poco::Logger::get("GRAPH REASON F").information(std::to_string(static_cast<int>(expected)) + " " + std::to_string(static_cast<int>(result)));
if (expected == ComparisonGraph::CompareResult::UNKNOWN || result == ComparisonGraph::CompareResult::UNKNOWN)
return false;
if (expected == result)
return false;
else if (result == ComparisonGraph::CompareResult::EQUAL &&
(expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL || expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL))
return false;
else if (result == ComparisonGraph::CompareResult::LESS && expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL)
return false;
else if (result == ComparisonGraph::CompareResult::GREATER && expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL)
return false;
else
return true;
return !graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]);
}
return false;

View File

@ -611,14 +611,52 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
{
}
};
struct MergedDataSkippingIndexAndCondition
{
std::vector<MergeTreeIndexPtr> indices;
MergeTreeIndexMergedConditionPtr condition;
std::atomic<size_t> total_granules{0};
std::atomic<size_t> granules_dropped{0};
std::atomic<size_t> total_parts{0};
std::atomic<size_t> parts_dropped{0};
MergedDataSkippingIndexAndCondition(MergeTreeIndexMergedConditionPtr condition_)
: condition(condition_)
{
}
void addIndex(const MergeTreeIndexPtr & index)
{
indices.push_back(index);
condition->addIndex(indices.back());
}
};
std::list<DataSkippingIndexAndCondition> useful_indices;
std::unordered_map<size_t, std::shared_ptr<MergedDataSkippingIndexAndCondition>> merged_indices;
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
auto index_helper = MergeTreeIndexFactory::instance().get(index);
auto condition = index_helper->createIndexCondition(query_info, context);
if (!condition->alwaysUnknownOrTrue())
useful_indices.emplace_back(index_helper, condition);
if (index_helper->isMergeable())
{
if (!merged_indices.contains(index_helper->getGranularity()))
{
merged_indices.emplace(
index_helper->getGranularity(),
std::make_shared<MergedDataSkippingIndexAndCondition>(
std::make_shared<MergeTreeIndexMergedCondition>(query_info, context, index_helper->getGranularity())));
merged_indices.at(index_helper->getGranularity())->condition->addConstraints(metadata_snapshot->getConstraints());
}
merged_indices.at(index_helper->getGranularity())->addIndex(index_helper);
}
else
{
auto condition = index_helper->createIndexCondition(query_info, context);
if (!condition->alwaysUnknownOrTrue())
useful_indices.emplace_back(index_helper, condition);
}
}
if (settings.force_data_skipping_indices.changed)
@ -718,6 +756,29 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
index_and_condition.parts_dropped.fetch_add(1, std::memory_order_relaxed);
}
for (auto & [granularity, indices_and_condition] : merged_indices)
{
if (ranges.ranges.empty())
break;
indices_and_condition->total_parts.fetch_add(1, std::memory_order_relaxed);
size_t total_granules = 0;
size_t granules_dropped = 0;
ranges.ranges = filterMarksUsingMergedIndex(
indices_and_condition->indices, indices_and_condition->condition,
part, ranges.ranges,
settings, reader_settings,
total_granules, granules_dropped,
log);
indices_and_condition->total_granules.fetch_add(total_granules, std::memory_order_relaxed);
indices_and_condition->granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
if (ranges.ranges.empty())
indices_and_condition->parts_dropped.fetch_add(1, std::memory_order_relaxed);
}
if (!ranges.ranges.empty())
{
if (limits.max_rows || leaf_limits.max_rows)
@ -811,6 +872,23 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
.num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped});
}
for (const auto & [granularity, index_and_condition] : merged_indices)
{
const auto & index_name = "Merged";
LOG_DEBUG(log, "Index {} has dropped {}/{} granules.",
backQuote(index_name),
index_and_condition->granules_dropped, index_and_condition->total_granules);
std::string description = "MERGED GRANULARITY " + std::to_string(granularity);
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
.type = ReadFromMergeTree::IndexType::Skip,
.name = index_name,
.description = std::move(description),
.num_parts_after = index_and_condition->total_parts - index_and_condition->parts_dropped,
.num_granules_after = index_and_condition->total_granules - index_and_condition->granules_dropped});
}
LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
parts.size(), total_parts, parts_with_ranges.size(),
sum_marks_pk.load(std::memory_order_relaxed),
@ -1865,6 +1943,96 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
return res;
}
MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
MergeTreeIndices indices,
MergeTreeIndexMergedConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & total_granules,
size_t & granules_dropped,
Poco::Logger * log)
{
for (const auto & index_helper : indices)
{
if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
{
LOG_DEBUG(log, "File for index {} does not exist. Skipping it.", backQuote(index_helper->index.name));
return ranges;
}
}
auto index_granularity = indices.front()->index.granularity;
const size_t min_marks_for_seek = roundRowsOrBytesToMarks(
settings.merge_tree_min_rows_for_seek,
settings.merge_tree_min_bytes_for_seek,
part->index_granularity_info.fixed_index_granularity,
part->index_granularity_info.index_granularity_bytes);
size_t marks_count = part->getMarksCount();
size_t final_mark = part->index_granularity.hasFinalMark();
size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
std::vector<MergeTreeIndexReader> readers;
for (const auto & index_helper : indices)
{
readers.emplace_back(
index_helper,
part,
index_marks_count,
ranges,
reader_settings);
}
MarkRanges res;
/// Some granules can cover two or more ranges,
/// this variable is stored to avoid reading the same granule twice.
MergeTreeIndexGranules granules(indices.size(), nullptr);
bool granules_filled = false;
size_t last_index_mark = 0;
for (const auto & range : ranges)
{
MarkRange index_range(
range.begin / index_granularity,
(range.end + index_granularity - 1) / index_granularity);
if (last_index_mark != index_range.begin || !granules_filled)
for (auto & reader : readers)
reader.seek(index_range.begin);
total_granules += index_range.end - index_range.begin;
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
{
if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin)
for (size_t i = 0; i < readers.size(); ++i)
granules[i] = readers[i].read();
MarkRange data_range(
std::max(range.begin, index_mark * index_granularity),
std::min(range.end, (index_mark + 1) * index_granularity));
if (!condition->mayBeTrueOnGranule(granules))
{
++granules_dropped;
continue;
}
if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek)
res.push_back(data_range);
else
res.back().end = data_range.end;
}
last_index_mark = index_range.end - 1;
}
return res;
}
void MergeTreeDataSelectExecutor::selectPartsToRead(
MergeTreeData::DataPartsVector & parts,
const std::unordered_set<String> & part_values,

View File

@ -6,6 +6,7 @@
#include <Storages/MergeTree/RangesInDataPart.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Storages/MergeTree/MergeTreeIndexMergedCondition.h>
namespace DB
@ -125,6 +126,17 @@ private:
size_t & granules_dropped,
Poco::Logger * log);
static MarkRanges filterMarksUsingMergedIndex(
MergeTreeIndices index_helper,
MergeTreeIndexMergedConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & total_granules,
size_t & granules_dropped,
Poco::Logger * log);
struct PartFilterCounters
{
size_t num_initial_selected_parts = 0;

View File

@ -70,66 +70,6 @@ void MergeTreeIndexAggregatorHypothesis::update(const Block & block, size_t * po
*pos += rows_read;
}
MergeTreeIndexConditionHypothesis::MergeTreeIndexConditionHypothesis(
const String & index_name_,
const String & column_name_,
const SelectQueryInfo & query_,
ContextPtr)
: index_name(index_name_)
, column_name(column_name_)
{
const auto & select = query_.query->as<ASTSelectQuery &>();
if (select.where() && select.prewhere())
expression_ast = makeASTFunction(
"and",
select.where()->clone(),
select.prewhere()->clone());
else if (select.where())
expression_ast = select.where()->clone();
else if (select.prewhere())
expression_ast = select.prewhere()->clone();
}
std::pair<bool, bool> MergeTreeIndexConditionHypothesis::mayBeTrue(const ASTPtr & ast, const bool value) const
{
if (ast->getColumnName() == column_name)
return {value, !value};
auto * func = ast->as<ASTFunction>();
if (!func)
return {true, true};
auto & args = func->arguments->children;
if (func->name == "not")
{
const auto res = mayBeTrue(args[0], value);
return {res.second, res.first};
}
/*else if (func->name == "or")
{
}
else if (func->name == "and")
{
}*/
else
{
return {true, true};
}
}
bool MergeTreeIndexConditionHypothesis::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
{
if (idx_granule->empty())
return true;
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleHypothesis>(idx_granule);
if (!granule)
throw Exception(
"Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
return mayBeTrue(expression_ast, granule->met).first;
}
MergeTreeIndexGranulePtr MergeTreeIndexHypothesis::createIndexGranule() const
{
return std::make_shared<MergeTreeIndexGranuleHypothesis>(index.name);
@ -141,9 +81,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co
}
MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition(
const SelectQueryInfo & query, ContextPtr context) const
const SelectQueryInfo &, ContextPtr) const
{
return std::make_shared<MergeTreeIndexConditionHypothesis>(index.name, index.sample_block.getNames().front(), query, context);
return nullptr;
}
bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const
@ -156,8 +96,10 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index)
return std::make_shared<MergeTreeIndexHypothesis>(index);
}
void hypothesisIndexValidator(const IndexDescription &, bool /*attach*/)
void hypothesisIndexValidator(const IndexDescription & index, bool /*attach*/)
{
if (index.expression_list_ast->children.size() != 1)
throw Exception("Hypothesis index needs exactly one expression", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -2,12 +2,7 @@
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/SetVariants.h>
#include <memory>
#include <set>
namespace DB
{
@ -30,7 +25,7 @@ struct MergeTreeIndexGranuleHypothesis : public IMergeTreeIndexGranule
~MergeTreeIndexGranuleHypothesis() override = default;
String index_name;
const String & index_name;
bool is_empty = true;
bool met = true;
};
@ -50,39 +45,13 @@ struct MergeTreeIndexAggregatorHypothesis : IMergeTreeIndexAggregator
void update(const Block & block, size_t * pos, size_t limit) override;
private:
String index_name;
const String & index_name;
String column_name;
bool met = true;
bool is_empty = true;
};
class MergeTreeIndexConditionHypothesis : public IMergeTreeIndexCondition
{
public:
MergeTreeIndexConditionHypothesis(
const String & index_name_,
const String & column_name_,
const SelectQueryInfo & query,
ContextPtr context);
bool alwaysUnknownOrTrue() const override { return false; }
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
~MergeTreeIndexConditionHypothesis() override = default;
private:
std::pair<bool, bool> mayBeTrue(const ASTPtr & ast, const bool value) const;
String index_name;
String column_name;
ASTPtr expression_ast;
};
class MergeTreeIndexHypothesis : public IMergeTreeIndex
{
public:
@ -93,6 +62,8 @@ public:
~MergeTreeIndexHypothesis() override = default;
bool isMergeable() const override { return true; }
MergeTreeIndexGranulePtr createIndexGranule() const override;
MergeTreeIndexAggregatorPtr createIndexAggregator() const override;

View File

@ -0,0 +1,186 @@
#include <Storages/MergeTree/MergeTreeIndexMergedCondition.h>
#include <Storages/MergeTree/MergeTreeIndexHypothesis.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
}
MergeTreeIndexMergedCondition::MergeTreeIndexMergedCondition(
const SelectQueryInfo & query_,
ContextPtr /*context_*/,
const size_t granularity_)
: granularity(granularity_)
{
const auto & select = query_.query->as<ASTSelectQuery &>();
if (select.where() && select.prewhere())
expression_ast = makeASTFunction(
"and",
select.where()->clone(),
select.prewhere()->clone());
else if (select.where())
expression_ast = select.where()->clone();
else if (select.prewhere())
expression_ast = select.prewhere()->clone();
expression_cnf = std::make_unique<CNFQuery>(TreeCNFConverter::toCNF(expression_ast));
}
void MergeTreeIndexMergedCondition::addIndex(const MergeTreeIndexPtr & index)
{
if (!index->isMergeable() || index->getGranularity() != granularity)
throw Exception("Index can not be merged",
ErrorCodes::LOGICAL_ERROR);
const auto hypothesis_index = std::dynamic_pointer_cast<const MergeTreeIndexHypothesis>(index);
if (!hypothesis_index)
throw Exception(
"Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR);
static const std::set<std::string> relations = {
"equals", "less", "lessOrEquals", "greaterOrEquals", "greater"};
// TODO: move to index hypothesis
std::vector<ASTPtr> compare_hypotheses_data;
std::vector<CNFQuery::OrGroup> hypotheses_data;
const auto cnf = TreeCNFConverter::toCNF(hypothesis_index->index.expression_list_ast->children.front()).pullNotOutFunctions();
for (const auto & group : cnf.getStatements()) {
hypotheses_data.push_back(group);
if (group.size() == 1)
{
CNFQuery::AtomicFormula atom = *group.begin();
pushNotIn(atom);
if (atom.negative)
throw Exception("negative atom", ErrorCodes::LOGICAL_ERROR);
auto * func = atom.ast->as<ASTFunction>();
if (func && relations.count(func->name))
compare_hypotheses_data.push_back(atom.ast);
}
}
index_to_compare_atomic_hypotheses.push_back(compare_hypotheses_data);
index_to_atomic_hypotheses.push_back(hypotheses_data);
}
void MergeTreeIndexMergedCondition::addConstraints(const ConstraintsDescription & constraints_description)
{
auto atomic_constraints_data = constraints_description.getAtomicConstraintData();
for (auto & atom : atomic_constraints_data)
{
pushNotIn(atom);
atomic_constraints.push_back(atom.ast);
}
}
namespace
{
ComparisonGraph::CompareResult getExpectedCompare(const CNFQuery::AtomicFormula & atom)
{
static const std::map<std::string, std::string> inverse_relations = {
{"equals", "notEquals"},
{"less", "greaterOrEquals"},
{"lessOrEquals", "greater"},
{"notEquals", "equals"},
{"greaterOrEquals", "less"},
{"greater", "lessOrEquals"},
};
static const std::map<std::string, ComparisonGraph::CompareResult> relation_to_compare = {
{"equals", ComparisonGraph::CompareResult::EQUAL},
{"less", ComparisonGraph::CompareResult::LESS},
{"lessOrEquals", ComparisonGraph::CompareResult::LESS_OR_EQUAL},
{"notEquals", ComparisonGraph::CompareResult::UNKNOWN},
{"greaterOrEquals", ComparisonGraph::CompareResult::GREATER_OR_EQUAL},
{"greater", ComparisonGraph::CompareResult::GREATER},
};
const auto * func = atom.ast->as<ASTFunction>();
if (func && inverse_relations.count(func->name))
{
std::string function_name = func->name;
if (atom.negative)
function_name = inverse_relations.at(func->name);
return relation_to_compare.at(function_name);
}
return ComparisonGraph::CompareResult::UNKNOWN;
}
}
bool MergeTreeIndexMergedCondition::mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const
{
std::vector<bool> values;
for (const auto & index_granule : granules)
{
const auto granule = std::dynamic_pointer_cast<const MergeTreeIndexGranuleHypothesis>(index_granule);
if (!granule)
throw Exception("Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR);
values.push_back(granule->met);
}
const auto & graph = getGraph(values);
bool always_false = false;
expression_cnf->iterateGroups(
[&](const CNFQuery::OrGroup & or_group)
{
if (always_false)
return;
for (auto atom : or_group)
{
pushNotIn(atom);
Poco::Logger::get("KEK").information(atom.ast->dumpTree());
const auto * func = atom.ast->as<ASTFunction>();
if (func && func->arguments->children.size() == 2)
{
const auto expected = getExpectedCompare(atom);
if (graph.isPossibleCompare(
expected,
func->arguments->children[0],
func->arguments->children[1]))
{
return;
}
}
}
always_false = true;
});
return !always_false;
}
std::unique_ptr<ComparisonGraph> MergeTreeIndexMergedCondition::buildGraph(const std::vector<bool> & values) const
{
Poco::Logger::get("MergeTreeIndexMergedCondition").information("New graph");
std::vector<ASTPtr> active_atomic_formulas(atomic_constraints);
for (size_t i = 0; i < values.size(); ++i)
{
if (values[i])
active_atomic_formulas.insert(
std::end(active_atomic_formulas),
std::begin(index_to_compare_atomic_hypotheses[i]),
std::end(index_to_compare_atomic_hypotheses[i]));
}
return std::make_unique<ComparisonGraph>(active_atomic_formulas);
}
const ComparisonGraph & MergeTreeIndexMergedCondition::getGraph(const std::vector<bool> & values) const
{
if (!graphCache.contains(values))
graphCache[values] = buildGraph(values);
return *graphCache.at(values);
}
}

View File

@ -0,0 +1,47 @@
#pragma once
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/ComparisonGraph.h>
namespace DB
{
/*
* IndexCondition checking several indexes at the same time.
* Works only for hypotheses. (will also support minmax soon).
*/
class MergeTreeIndexMergedCondition
{
public:
MergeTreeIndexMergedCondition(
const SelectQueryInfo & query,
ContextPtr context,
const size_t granularity);
void addIndex(const MergeTreeIndexPtr & index);
void addConstraints(const ConstraintsDescription & constraints_description);
bool alwaysUnknownOrTrue() const { return false; } // TODO: replace < -> <=, > -> >= and assume all hypotheses are true + check path exists
bool mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const;
//TODO: add constraints
private:
std::unique_ptr<ComparisonGraph> buildGraph(const std::vector<bool> & values) const;
const ComparisonGraph & getGraph(const std::vector<bool> & values) const;
const size_t granularity;
ASTPtr expression_ast;
std::unique_ptr<CNFQuery> expression_cnf;
mutable std::unordered_map<std::vector<bool>, std::unique_ptr<ComparisonGraph>> graphCache;
std::vector<std::vector<ASTPtr>> index_to_compare_atomic_hypotheses;
std::vector<std::vector<CNFQuery::OrGroup>> index_to_atomic_hypotheses;
std::vector<ASTPtr> atomic_constraints;
};
using MergeTreeIndexMergedConditionPtr = std::shared_ptr<MergeTreeIndexMergedCondition>;
using MergeTreeIndexMergedConditions = std::vector<MergeTreeIndexMergedConditionPtr>;
}

View File

@ -62,6 +62,7 @@ public:
};
using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;
using MergeTreeIndexConditions = std::vector<MergeTreeIndexConditionPtr>;
struct IMergeTreeIndex
@ -75,6 +76,9 @@ struct IMergeTreeIndex
/// gets filename without extension
String getFileName() const { return INDEX_FILE_PREFIX + index.name; }
size_t getGranularity() const { return index.granularity; }
virtual bool isMergeable() const { return false; }
/// Checks whether the column is in data skipping index.
virtual bool mayBenefitFromIndexForIn(const ASTPtr & node) const = 0;