From 564a48464252d249d6278aab52b00c3ab8c3f4bc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 2 May 2021 22:16:40 +0300 Subject: [PATCH] fix --- src/CMakeLists.txt | 2 +- src/Interpreters/ComparisonGraph.cpp | 48 +++++ src/Interpreters/ComparisonGraph.h | 8 +- src/Interpreters/TreeCNFConverter.h | 2 +- .../WhereConstraintsOptimizer.cpp | 38 +--- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 174 +++++++++++++++- .../MergeTree/MergeTreeDataSelectExecutor.h | 12 ++ .../MergeTree/MergeTreeIndexHypothesis.cpp | 68 +------ .../MergeTree/MergeTreeIndexHypothesis.h | 37 +--- .../MergeTreeIndexMergedCondition.cpp | 186 ++++++++++++++++++ .../MergeTree/MergeTreeIndexMergedCondition.h | 47 +++++ src/Storages/MergeTree/MergeTreeIndices.h | 4 + 12 files changed, 487 insertions(+), 139 deletions(-) create mode 100644 src/Storages/MergeTree/MergeTreeIndexMergedCondition.cpp create mode 100644 src/Storages/MergeTree/MergeTreeIndexMergedCondition.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 16cdd989964..f6f4d5b6199 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -69,7 +69,7 @@ add_subdirectory (Coordination) set(dbms_headers) -set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h Interpreters/AddIndexConstraintsOptimizer.cpp Interpreters/AddIndexConstraintsOptimizer.h) +set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h Interpreters/AddIndexConstraintsOptimizer.cpp Interpreters/AddIndexConstraintsOptimizer.h Storages/MergeTree/MergeTreeIndexMergedCondition.cpp Storages/MergeTree/MergeTreeIndexMergedCondition.h) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index 57d1c398d05..8a4d9f0e0be 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -202,6 +202,54 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con return CompareResult::UNKNOWN; } +bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const +{ + const auto result = compare(left, right); + + if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN) + { + Poco::Logger::get("isPossibleCompare").information("unknonw"); + return true; + } + if (expected == result) + return true; + + static const std::set> possible_pairs = { + {CompareResult::EQUAL, CompareResult::LESS_OR_EQUAL}, + {CompareResult::EQUAL, CompareResult::GREATER_OR_EQUAL}, + {CompareResult::LESS_OR_EQUAL, CompareResult::LESS}, + {CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL}, + {CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER}, + {CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL}, + {CompareResult::LESS, CompareResult::LESS}, + {CompareResult::LESS, CompareResult::LESS_OR_EQUAL}, + {CompareResult::GREATER, CompareResult::GREATER}, + {CompareResult::GREATER, CompareResult::GREATER_OR_EQUAL}, + }; + + return possible_pairs.contains({expected, result}); +} + +bool ComparisonGraph::isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const +{ + const auto result = compare(left, right); + + if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN) + return false; + if (expected == result) + return true; + + static const std::set> possible_pairs = { + {CompareResult::LESS_OR_EQUAL, CompareResult::LESS}, + {CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL}, + {CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER}, + {CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL}, + }; + + return possible_pairs.contains({expected, result}); +} + + std::vector ComparisonGraph::getEqual(const ASTPtr & ast) const { const auto res = getComponentId(ast); diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 6eb396d2879..9fee991c399 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -18,8 +18,6 @@ class ComparisonGraph public: ComparisonGraph(const std::vector & atomic_formulas); - /// Works for string and num. - /// For other -- only eq. enum class CompareResult { LESS, @@ -32,6 +30,12 @@ public: CompareResult compare(const ASTPtr & left, const ASTPtr & right) const; + /// It's possible that left right + bool isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; + + /// It's always true that left right + bool isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; + std::vector getEqual(const ASTPtr & ast) const; std::optional getEqualConst(const ASTPtr & ast) const; diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index 1398f0314b6..af5659af678 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -81,7 +81,7 @@ public: } template - CNFQuery & iterateGroups(F func) + const CNFQuery & iterateGroups(F func) const { for (const auto & group : statements) func(group); diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp index 75a2380986c..d3034ae4316 100644 --- a/src/Interpreters/WhereConstraintsOptimizer.cpp +++ b/src/Interpreters/WhereConstraintsOptimizer.cpp @@ -128,23 +128,7 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis if (func && func->arguments->children.size() == 2) { const auto expected = getExpectedCompare(atom); - const auto result = graph.compare(func->arguments->children[0], func->arguments->children[1]); - Poco::Logger::get("GRAPH REASON").information("neg: " + std::to_string(atom.negative)); - Poco::Logger::get("GRAPH REASON").information(atom.ast->dumpTree()); - Poco::Logger::get("GRAPH REASON").information(std::to_string(static_cast(expected)) + " " + std::to_string(static_cast(result))); - - if (expected == ComparisonGraph::CompareResult::UNKNOWN || result == ComparisonGraph::CompareResult::UNKNOWN) - return false; - - if (expected == result) - return true; - if (result == ComparisonGraph::CompareResult::EQUAL && - (expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL || expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL)) - return true; - if (result == ComparisonGraph::CompareResult::LESS && expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL) - return true; - if (result == ComparisonGraph::CompareResult::GREATER && expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL) - return true; + return graph.isAlwaysCompare(expected, func->arguments->children[0], func->arguments->children[1]); } } return false; @@ -177,25 +161,7 @@ bool checkIfAtomAlwaysFalseGraph(const CNFQuery::AtomicFormula & atom, const Com { /// TODO: special support for != const auto expected = getExpectedCompare(atom); - const auto result = graph.compare(func->arguments->children[0], func->arguments->children[1]); - Poco::Logger::get("GRAPH REASON F").information("neg: " + std::to_string(atom.negative)); - Poco::Logger::get("GRAPH REASON F").information(atom.ast->dumpTree()); - Poco::Logger::get("GRAPH REASON F").information(std::to_string(static_cast(expected)) + " " + std::to_string(static_cast(result))); - - if (expected == ComparisonGraph::CompareResult::UNKNOWN || result == ComparisonGraph::CompareResult::UNKNOWN) - return false; - - if (expected == result) - return false; - else if (result == ComparisonGraph::CompareResult::EQUAL && - (expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL || expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL)) - return false; - else if (result == ComparisonGraph::CompareResult::LESS && expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL) - return false; - else if (result == ComparisonGraph::CompareResult::GREATER && expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL) - return false; - else - return true; + return !graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]); } return false; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8245364d87a..ae692439038 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -611,14 +611,52 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( { } }; + + struct MergedDataSkippingIndexAndCondition + { + std::vector indices; + MergeTreeIndexMergedConditionPtr condition; + std::atomic total_granules{0}; + std::atomic granules_dropped{0}; + std::atomic total_parts{0}; + std::atomic parts_dropped{0}; + + MergedDataSkippingIndexAndCondition(MergeTreeIndexMergedConditionPtr condition_) + : condition(condition_) + { + } + + void addIndex(const MergeTreeIndexPtr & index) + { + indices.push_back(index); + condition->addIndex(indices.back()); + } + }; + std::list useful_indices; + std::unordered_map> merged_indices; for (const auto & index : metadata_snapshot->getSecondaryIndices()) { auto index_helper = MergeTreeIndexFactory::instance().get(index); - auto condition = index_helper->createIndexCondition(query_info, context); - if (!condition->alwaysUnknownOrTrue()) - useful_indices.emplace_back(index_helper, condition); + if (index_helper->isMergeable()) + { + if (!merged_indices.contains(index_helper->getGranularity())) + { + merged_indices.emplace( + index_helper->getGranularity(), + std::make_shared( + std::make_shared(query_info, context, index_helper->getGranularity()))); + merged_indices.at(index_helper->getGranularity())->condition->addConstraints(metadata_snapshot->getConstraints()); + } + merged_indices.at(index_helper->getGranularity())->addIndex(index_helper); + } + else + { + auto condition = index_helper->createIndexCondition(query_info, context); + if (!condition->alwaysUnknownOrTrue()) + useful_indices.emplace_back(index_helper, condition); + } } if (settings.force_data_skipping_indices.changed) @@ -718,6 +756,29 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( index_and_condition.parts_dropped.fetch_add(1, std::memory_order_relaxed); } + for (auto & [granularity, indices_and_condition] : merged_indices) + { + if (ranges.ranges.empty()) + break; + + indices_and_condition->total_parts.fetch_add(1, std::memory_order_relaxed); + + size_t total_granules = 0; + size_t granules_dropped = 0; + ranges.ranges = filterMarksUsingMergedIndex( + indices_and_condition->indices, indices_and_condition->condition, + part, ranges.ranges, + settings, reader_settings, + total_granules, granules_dropped, + log); + + indices_and_condition->total_granules.fetch_add(total_granules, std::memory_order_relaxed); + indices_and_condition->granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); + + if (ranges.ranges.empty()) + indices_and_condition->parts_dropped.fetch_add(1, std::memory_order_relaxed); + } + if (!ranges.ranges.empty()) { if (limits.max_rows || leaf_limits.max_rows) @@ -811,6 +872,23 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( .num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped}); } + for (const auto & [granularity, index_and_condition] : merged_indices) + { + const auto & index_name = "Merged"; + LOG_DEBUG(log, "Index {} has dropped {}/{} granules.", + backQuote(index_name), + index_and_condition->granules_dropped, index_and_condition->total_granules); + + std::string description = "MERGED GRANULARITY " + std::to_string(granularity); + + index_stats->emplace_back(ReadFromMergeTree::IndexStat{ + .type = ReadFromMergeTree::IndexType::Skip, + .name = index_name, + .description = std::move(description), + .num_parts_after = index_and_condition->total_parts - index_and_condition->parts_dropped, + .num_granules_after = index_and_condition->total_granules - index_and_condition->granules_dropped}); + } + LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges", parts.size(), total_parts, parts_with_ranges.size(), sum_marks_pk.load(std::memory_order_relaxed), @@ -1865,6 +1943,96 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( return res; } +MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( + MergeTreeIndices indices, + MergeTreeIndexMergedConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings, + const MergeTreeReaderSettings & reader_settings, + size_t & total_granules, + size_t & granules_dropped, + Poco::Logger * log) +{ + for (const auto & index_helper : indices) + { + if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx")) + { + LOG_DEBUG(log, "File for index {} does not exist. Skipping it.", backQuote(index_helper->index.name)); + return ranges; + } + } + + auto index_granularity = indices.front()->index.granularity; + + const size_t min_marks_for_seek = roundRowsOrBytesToMarks( + settings.merge_tree_min_rows_for_seek, + settings.merge_tree_min_bytes_for_seek, + part->index_granularity_info.fixed_index_granularity, + part->index_granularity_info.index_granularity_bytes); + + size_t marks_count = part->getMarksCount(); + size_t final_mark = part->index_granularity.hasFinalMark(); + size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity; + + std::vector readers; + for (const auto & index_helper : indices) + { + readers.emplace_back( + index_helper, + part, + index_marks_count, + ranges, + reader_settings); + } + + MarkRanges res; + + /// Some granules can cover two or more ranges, + /// this variable is stored to avoid reading the same granule twice. + MergeTreeIndexGranules granules(indices.size(), nullptr); + bool granules_filled = false; + size_t last_index_mark = 0; + for (const auto & range : ranges) + { + MarkRange index_range( + range.begin / index_granularity, + (range.end + index_granularity - 1) / index_granularity); + + if (last_index_mark != index_range.begin || !granules_filled) + for (auto & reader : readers) + reader.seek(index_range.begin); + + total_granules += index_range.end - index_range.begin; + + for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) + { + if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin) + for (size_t i = 0; i < readers.size(); ++i) + granules[i] = readers[i].read(); + + MarkRange data_range( + std::max(range.begin, index_mark * index_granularity), + std::min(range.end, (index_mark + 1) * index_granularity)); + + if (!condition->mayBeTrueOnGranule(granules)) + { + ++granules_dropped; + continue; + } + + if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek) + res.push_back(data_range); + else + res.back().end = data_range.end; + } + + last_index_mark = index_range.end - 1; + } + + return res; +} + void MergeTreeDataSelectExecutor::selectPartsToRead( MergeTreeData::DataPartsVector & parts, const std::unordered_set & part_values, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index d7193fbfbfa..8ad885e4b29 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -125,6 +126,17 @@ private: size_t & granules_dropped, Poco::Logger * log); + static MarkRanges filterMarksUsingMergedIndex( + MergeTreeIndices index_helper, + MergeTreeIndexMergedConditionPtr condition, + MergeTreeData::DataPartPtr part, + const MarkRanges & ranges, + const Settings & settings, + const MergeTreeReaderSettings & reader_settings, + size_t & total_granules, + size_t & granules_dropped, + Poco::Logger * log); + struct PartFilterCounters { size_t num_initial_selected_parts = 0; diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index a34af89f063..263d9057159 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -70,66 +70,6 @@ void MergeTreeIndexAggregatorHypothesis::update(const Block & block, size_t * po *pos += rows_read; } -MergeTreeIndexConditionHypothesis::MergeTreeIndexConditionHypothesis( - const String & index_name_, - const String & column_name_, - const SelectQueryInfo & query_, - ContextPtr) - : index_name(index_name_) - , column_name(column_name_) -{ - const auto & select = query_.query->as(); - - if (select.where() && select.prewhere()) - expression_ast = makeASTFunction( - "and", - select.where()->clone(), - select.prewhere()->clone()); - else if (select.where()) - expression_ast = select.where()->clone(); - else if (select.prewhere()) - expression_ast = select.prewhere()->clone(); -} - -std::pair MergeTreeIndexConditionHypothesis::mayBeTrue(const ASTPtr & ast, const bool value) const -{ - if (ast->getColumnName() == column_name) - return {value, !value}; - - auto * func = ast->as(); - if (!func) - return {true, true}; - auto & args = func->arguments->children; - if (func->name == "not") - { - const auto res = mayBeTrue(args[0], value); - return {res.second, res.first}; - } - /*else if (func->name == "or") - { - - } - else if (func->name == "and") - { - - }*/ - else - { - return {true, true}; - } -} - -bool MergeTreeIndexConditionHypothesis::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const -{ - if (idx_granule->empty()) - return true; - auto granule = std::dynamic_pointer_cast(idx_granule); - if (!granule) - throw Exception( - "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); - return mayBeTrue(expression_ast, granule->met).first; -} - MergeTreeIndexGranulePtr MergeTreeIndexHypothesis::createIndexGranule() const { return std::make_shared(index.name); @@ -141,9 +81,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co } MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( - const SelectQueryInfo & query, ContextPtr context) const + const SelectQueryInfo &, ContextPtr) const { - return std::make_shared(index.name, index.sample_block.getNames().front(), query, context); + return nullptr; } bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const @@ -156,8 +96,10 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index) return std::make_shared(index); } -void hypothesisIndexValidator(const IndexDescription &, bool /*attach*/) +void hypothesisIndexValidator(const IndexDescription & index, bool /*attach*/) { + if (index.expression_list_ast->children.size() != 1) + throw Exception("Hypothesis index needs exactly one expression", ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index ae11b710aa0..1989cab6a8e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -2,12 +2,7 @@ #include #include - -#include - #include -#include - namespace DB { @@ -30,7 +25,7 @@ struct MergeTreeIndexGranuleHypothesis : public IMergeTreeIndexGranule ~MergeTreeIndexGranuleHypothesis() override = default; - String index_name; + const String & index_name; bool is_empty = true; bool met = true; }; @@ -50,39 +45,13 @@ struct MergeTreeIndexAggregatorHypothesis : IMergeTreeIndexAggregator void update(const Block & block, size_t * pos, size_t limit) override; private: - String index_name; + const String & index_name; String column_name; bool met = true; bool is_empty = true; }; - -class MergeTreeIndexConditionHypothesis : public IMergeTreeIndexCondition -{ -public: - MergeTreeIndexConditionHypothesis( - const String & index_name_, - const String & column_name_, - const SelectQueryInfo & query, - ContextPtr context); - - bool alwaysUnknownOrTrue() const override { return false; } - - bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - - ~MergeTreeIndexConditionHypothesis() override = default; - -private: - std::pair mayBeTrue(const ASTPtr & ast, const bool value) const; - - String index_name; - - String column_name; - ASTPtr expression_ast; -}; - - class MergeTreeIndexHypothesis : public IMergeTreeIndex { public: @@ -93,6 +62,8 @@ public: ~MergeTreeIndexHypothesis() override = default; + bool isMergeable() const override { return true; } + MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMergedCondition.cpp b/src/Storages/MergeTree/MergeTreeIndexMergedCondition.cpp new file mode 100644 index 00000000000..fd4640000aa --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexMergedCondition.cpp @@ -0,0 +1,186 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +MergeTreeIndexMergedCondition::MergeTreeIndexMergedCondition( + const SelectQueryInfo & query_, + ContextPtr /*context_*/, + const size_t granularity_) + : granularity(granularity_) +{ + const auto & select = query_.query->as(); + + if (select.where() && select.prewhere()) + expression_ast = makeASTFunction( + "and", + select.where()->clone(), + select.prewhere()->clone()); + else if (select.where()) + expression_ast = select.where()->clone(); + else if (select.prewhere()) + expression_ast = select.prewhere()->clone(); + + expression_cnf = std::make_unique(TreeCNFConverter::toCNF(expression_ast)); +} + +void MergeTreeIndexMergedCondition::addIndex(const MergeTreeIndexPtr & index) +{ + if (!index->isMergeable() || index->getGranularity() != granularity) + throw Exception("Index can not be merged", + ErrorCodes::LOGICAL_ERROR); + + const auto hypothesis_index = std::dynamic_pointer_cast(index); + if (!hypothesis_index) + throw Exception( + "Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR); + + static const std::set relations = { + "equals", "less", "lessOrEquals", "greaterOrEquals", "greater"}; + + // TODO: move to index hypothesis + std::vector compare_hypotheses_data; + std::vector hypotheses_data; + const auto cnf = TreeCNFConverter::toCNF(hypothesis_index->index.expression_list_ast->children.front()).pullNotOutFunctions(); + for (const auto & group : cnf.getStatements()) { + hypotheses_data.push_back(group); + if (group.size() == 1) + { + CNFQuery::AtomicFormula atom = *group.begin(); + pushNotIn(atom); + if (atom.negative) + throw Exception("negative atom", ErrorCodes::LOGICAL_ERROR); + + auto * func = atom.ast->as(); + if (func && relations.count(func->name)) + compare_hypotheses_data.push_back(atom.ast); + } + } + index_to_compare_atomic_hypotheses.push_back(compare_hypotheses_data); + index_to_atomic_hypotheses.push_back(hypotheses_data); +} + +void MergeTreeIndexMergedCondition::addConstraints(const ConstraintsDescription & constraints_description) +{ + auto atomic_constraints_data = constraints_description.getAtomicConstraintData(); + for (auto & atom : atomic_constraints_data) + { + pushNotIn(atom); + atomic_constraints.push_back(atom.ast); + } +} + +namespace +{ + +ComparisonGraph::CompareResult getExpectedCompare(const CNFQuery::AtomicFormula & atom) +{ + static const std::map inverse_relations = { + {"equals", "notEquals"}, + {"less", "greaterOrEquals"}, + {"lessOrEquals", "greater"}, + {"notEquals", "equals"}, + {"greaterOrEquals", "less"}, + {"greater", "lessOrEquals"}, + }; + + static const std::map relation_to_compare = { + {"equals", ComparisonGraph::CompareResult::EQUAL}, + {"less", ComparisonGraph::CompareResult::LESS}, + {"lessOrEquals", ComparisonGraph::CompareResult::LESS_OR_EQUAL}, + {"notEquals", ComparisonGraph::CompareResult::UNKNOWN}, + {"greaterOrEquals", ComparisonGraph::CompareResult::GREATER_OR_EQUAL}, + {"greater", ComparisonGraph::CompareResult::GREATER}, + }; + + + const auto * func = atom.ast->as(); + if (func && inverse_relations.count(func->name)) + { + std::string function_name = func->name; + if (atom.negative) + function_name = inverse_relations.at(func->name); + return relation_to_compare.at(function_name); + } + return ComparisonGraph::CompareResult::UNKNOWN; +} + +} + +bool MergeTreeIndexMergedCondition::mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const +{ + std::vector values; + for (const auto & index_granule : granules) + { + const auto granule = std::dynamic_pointer_cast(index_granule); + if (!granule) + throw Exception("Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR); + values.push_back(granule->met); + } + const auto & graph = getGraph(values); + + bool always_false = false; + expression_cnf->iterateGroups( + [&](const CNFQuery::OrGroup & or_group) + { + if (always_false) + return; + + for (auto atom : or_group) + { + pushNotIn(atom); + Poco::Logger::get("KEK").information(atom.ast->dumpTree()); + const auto * func = atom.ast->as(); + if (func && func->arguments->children.size() == 2) + { + const auto expected = getExpectedCompare(atom); + if (graph.isPossibleCompare( + expected, + func->arguments->children[0], + func->arguments->children[1])) + { + return; + } + } + } + always_false = true; + }); + return !always_false; +} + +std::unique_ptr MergeTreeIndexMergedCondition::buildGraph(const std::vector & values) const +{ + Poco::Logger::get("MergeTreeIndexMergedCondition").information("New graph"); + std::vector active_atomic_formulas(atomic_constraints); + for (size_t i = 0; i < values.size(); ++i) + { + if (values[i]) + active_atomic_formulas.insert( + std::end(active_atomic_formulas), + std::begin(index_to_compare_atomic_hypotheses[i]), + std::end(index_to_compare_atomic_hypotheses[i])); + } + return std::make_unique(active_atomic_formulas); +} + +const ComparisonGraph & MergeTreeIndexMergedCondition::getGraph(const std::vector & values) const +{ + if (!graphCache.contains(values)) + graphCache[values] = buildGraph(values); + return *graphCache.at(values); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeIndexMergedCondition.h b/src/Storages/MergeTree/MergeTreeIndexMergedCondition.h new file mode 100644 index 00000000000..3048a9d6bdc --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexMergedCondition.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/* + * IndexCondition checking several indexes at the same time. + * Works only for hypotheses. (will also support minmax soon). + */ +class MergeTreeIndexMergedCondition +{ +public: + MergeTreeIndexMergedCondition( + const SelectQueryInfo & query, + ContextPtr context, + const size_t granularity); + + void addIndex(const MergeTreeIndexPtr & index); + void addConstraints(const ConstraintsDescription & constraints_description); + + bool alwaysUnknownOrTrue() const { return false; } // TODO: replace < -> <=, > -> >= and assume all hypotheses are true + check path exists + bool mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const; + + //TODO: add constraints +private: + std::unique_ptr buildGraph(const std::vector & values) const; + const ComparisonGraph & getGraph(const std::vector & values) const; + + const size_t granularity; + ASTPtr expression_ast; + std::unique_ptr expression_cnf; + + mutable std::unordered_map, std::unique_ptr> graphCache; + + std::vector> index_to_compare_atomic_hypotheses; + std::vector> index_to_atomic_hypotheses; + std::vector atomic_constraints; +}; + +using MergeTreeIndexMergedConditionPtr = std::shared_ptr; +using MergeTreeIndexMergedConditions = std::vector; + +} diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 7e5cb4156cb..319be92960d 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -62,6 +62,7 @@ public: }; using MergeTreeIndexConditionPtr = std::shared_ptr; +using MergeTreeIndexConditions = std::vector; struct IMergeTreeIndex @@ -75,6 +76,9 @@ struct IMergeTreeIndex /// gets filename without extension String getFileName() const { return INDEX_FILE_PREFIX + index.name; } + size_t getGranularity() const { return index.granularity; } + + virtual bool isMergeable() const { return false; } /// Checks whether the column is in data skipping index. virtual bool mayBenefitFromIndexForIn(const ASTPtr & node) const = 0;