mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-29 21:20:49 +00:00
fix
This commit is contained in:
parent
e565bc47a8
commit
564a484642
@ -69,7 +69,7 @@ add_subdirectory (Coordination)
|
||||
|
||||
|
||||
set(dbms_headers)
|
||||
set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h Interpreters/AddIndexConstraintsOptimizer.cpp Interpreters/AddIndexConstraintsOptimizer.h)
|
||||
set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h Storages/MergeTree/SubstituteColumnOptimizer.cpp Storages/MergeTree/SubstituteColumnOptimizer.h Storages/MergeTree/MergeTreeIndexHypothesis.cpp Storages/MergeTree/MergeTreeIndexHypothesis.h Interpreters/AddIndexConstraintsOptimizer.cpp Interpreters/AddIndexConstraintsOptimizer.h Storages/MergeTree/MergeTreeIndexMergedCondition.cpp Storages/MergeTree/MergeTreeIndexMergedCondition.h)
|
||||
|
||||
add_headers_and_sources(clickhouse_common_io Common)
|
||||
add_headers_and_sources(clickhouse_common_io Common/HashTable)
|
||||
|
@ -202,6 +202,54 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con
|
||||
return CompareResult::UNKNOWN;
|
||||
}
|
||||
|
||||
bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const
|
||||
{
|
||||
const auto result = compare(left, right);
|
||||
|
||||
if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN)
|
||||
{
|
||||
Poco::Logger::get("isPossibleCompare").information("unknonw");
|
||||
return true;
|
||||
}
|
||||
if (expected == result)
|
||||
return true;
|
||||
|
||||
static const std::set<std::pair<CompareResult, CompareResult>> possible_pairs = {
|
||||
{CompareResult::EQUAL, CompareResult::LESS_OR_EQUAL},
|
||||
{CompareResult::EQUAL, CompareResult::GREATER_OR_EQUAL},
|
||||
{CompareResult::LESS_OR_EQUAL, CompareResult::LESS},
|
||||
{CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL},
|
||||
{CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER},
|
||||
{CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL},
|
||||
{CompareResult::LESS, CompareResult::LESS},
|
||||
{CompareResult::LESS, CompareResult::LESS_OR_EQUAL},
|
||||
{CompareResult::GREATER, CompareResult::GREATER},
|
||||
{CompareResult::GREATER, CompareResult::GREATER_OR_EQUAL},
|
||||
};
|
||||
|
||||
return possible_pairs.contains({expected, result});
|
||||
}
|
||||
|
||||
bool ComparisonGraph::isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const
|
||||
{
|
||||
const auto result = compare(left, right);
|
||||
|
||||
if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN)
|
||||
return false;
|
||||
if (expected == result)
|
||||
return true;
|
||||
|
||||
static const std::set<std::pair<CompareResult, CompareResult>> possible_pairs = {
|
||||
{CompareResult::LESS_OR_EQUAL, CompareResult::LESS},
|
||||
{CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL},
|
||||
{CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER},
|
||||
{CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL},
|
||||
};
|
||||
|
||||
return possible_pairs.contains({expected, result});
|
||||
}
|
||||
|
||||
|
||||
std::vector<ASTPtr> ComparisonGraph::getEqual(const ASTPtr & ast) const
|
||||
{
|
||||
const auto res = getComponentId(ast);
|
||||
|
@ -18,8 +18,6 @@ class ComparisonGraph
|
||||
public:
|
||||
ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas);
|
||||
|
||||
/// Works for string and num.
|
||||
/// For other -- only eq.
|
||||
enum class CompareResult
|
||||
{
|
||||
LESS,
|
||||
@ -32,6 +30,12 @@ public:
|
||||
|
||||
CompareResult compare(const ASTPtr & left, const ASTPtr & right) const;
|
||||
|
||||
/// It's possible that left <expected> right
|
||||
bool isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
|
||||
|
||||
/// It's always true that left <expected> right
|
||||
bool isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
|
||||
|
||||
std::vector<ASTPtr> getEqual(const ASTPtr & ast) const;
|
||||
std::optional<ASTPtr> getEqualConst(const ASTPtr & ast) const;
|
||||
|
||||
|
@ -81,7 +81,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
CNFQuery & iterateGroups(F func)
|
||||
const CNFQuery & iterateGroups(F func) const
|
||||
{
|
||||
for (const auto & group : statements)
|
||||
func(group);
|
||||
|
@ -128,23 +128,7 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis
|
||||
if (func && func->arguments->children.size() == 2)
|
||||
{
|
||||
const auto expected = getExpectedCompare(atom);
|
||||
const auto result = graph.compare(func->arguments->children[0], func->arguments->children[1]);
|
||||
Poco::Logger::get("GRAPH REASON").information("neg: " + std::to_string(atom.negative));
|
||||
Poco::Logger::get("GRAPH REASON").information(atom.ast->dumpTree());
|
||||
Poco::Logger::get("GRAPH REASON").information(std::to_string(static_cast<int>(expected)) + " " + std::to_string(static_cast<int>(result)));
|
||||
|
||||
if (expected == ComparisonGraph::CompareResult::UNKNOWN || result == ComparisonGraph::CompareResult::UNKNOWN)
|
||||
return false;
|
||||
|
||||
if (expected == result)
|
||||
return true;
|
||||
if (result == ComparisonGraph::CompareResult::EQUAL &&
|
||||
(expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL || expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL))
|
||||
return true;
|
||||
if (result == ComparisonGraph::CompareResult::LESS && expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL)
|
||||
return true;
|
||||
if (result == ComparisonGraph::CompareResult::GREATER && expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL)
|
||||
return true;
|
||||
return graph.isAlwaysCompare(expected, func->arguments->children[0], func->arguments->children[1]);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
@ -177,25 +161,7 @@ bool checkIfAtomAlwaysFalseGraph(const CNFQuery::AtomicFormula & atom, const Com
|
||||
{
|
||||
/// TODO: special support for !=
|
||||
const auto expected = getExpectedCompare(atom);
|
||||
const auto result = graph.compare(func->arguments->children[0], func->arguments->children[1]);
|
||||
Poco::Logger::get("GRAPH REASON F").information("neg: " + std::to_string(atom.negative));
|
||||
Poco::Logger::get("GRAPH REASON F").information(atom.ast->dumpTree());
|
||||
Poco::Logger::get("GRAPH REASON F").information(std::to_string(static_cast<int>(expected)) + " " + std::to_string(static_cast<int>(result)));
|
||||
|
||||
if (expected == ComparisonGraph::CompareResult::UNKNOWN || result == ComparisonGraph::CompareResult::UNKNOWN)
|
||||
return false;
|
||||
|
||||
if (expected == result)
|
||||
return false;
|
||||
else if (result == ComparisonGraph::CompareResult::EQUAL &&
|
||||
(expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL || expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL))
|
||||
return false;
|
||||
else if (result == ComparisonGraph::CompareResult::LESS && expected == ComparisonGraph::CompareResult::LESS_OR_EQUAL)
|
||||
return false;
|
||||
else if (result == ComparisonGraph::CompareResult::GREATER && expected == ComparisonGraph::CompareResult::GREATER_OR_EQUAL)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
return !graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -611,14 +611,52 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct MergedDataSkippingIndexAndCondition
|
||||
{
|
||||
std::vector<MergeTreeIndexPtr> indices;
|
||||
MergeTreeIndexMergedConditionPtr condition;
|
||||
std::atomic<size_t> total_granules{0};
|
||||
std::atomic<size_t> granules_dropped{0};
|
||||
std::atomic<size_t> total_parts{0};
|
||||
std::atomic<size_t> parts_dropped{0};
|
||||
|
||||
MergedDataSkippingIndexAndCondition(MergeTreeIndexMergedConditionPtr condition_)
|
||||
: condition(condition_)
|
||||
{
|
||||
}
|
||||
|
||||
void addIndex(const MergeTreeIndexPtr & index)
|
||||
{
|
||||
indices.push_back(index);
|
||||
condition->addIndex(indices.back());
|
||||
}
|
||||
};
|
||||
|
||||
std::list<DataSkippingIndexAndCondition> useful_indices;
|
||||
std::unordered_map<size_t, std::shared_ptr<MergedDataSkippingIndexAndCondition>> merged_indices;
|
||||
|
||||
for (const auto & index : metadata_snapshot->getSecondaryIndices())
|
||||
{
|
||||
auto index_helper = MergeTreeIndexFactory::instance().get(index);
|
||||
auto condition = index_helper->createIndexCondition(query_info, context);
|
||||
if (!condition->alwaysUnknownOrTrue())
|
||||
useful_indices.emplace_back(index_helper, condition);
|
||||
if (index_helper->isMergeable())
|
||||
{
|
||||
if (!merged_indices.contains(index_helper->getGranularity()))
|
||||
{
|
||||
merged_indices.emplace(
|
||||
index_helper->getGranularity(),
|
||||
std::make_shared<MergedDataSkippingIndexAndCondition>(
|
||||
std::make_shared<MergeTreeIndexMergedCondition>(query_info, context, index_helper->getGranularity())));
|
||||
merged_indices.at(index_helper->getGranularity())->condition->addConstraints(metadata_snapshot->getConstraints());
|
||||
}
|
||||
merged_indices.at(index_helper->getGranularity())->addIndex(index_helper);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto condition = index_helper->createIndexCondition(query_info, context);
|
||||
if (!condition->alwaysUnknownOrTrue())
|
||||
useful_indices.emplace_back(index_helper, condition);
|
||||
}
|
||||
}
|
||||
|
||||
if (settings.force_data_skipping_indices.changed)
|
||||
@ -718,6 +756,29 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
index_and_condition.parts_dropped.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
for (auto & [granularity, indices_and_condition] : merged_indices)
|
||||
{
|
||||
if (ranges.ranges.empty())
|
||||
break;
|
||||
|
||||
indices_and_condition->total_parts.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
size_t total_granules = 0;
|
||||
size_t granules_dropped = 0;
|
||||
ranges.ranges = filterMarksUsingMergedIndex(
|
||||
indices_and_condition->indices, indices_and_condition->condition,
|
||||
part, ranges.ranges,
|
||||
settings, reader_settings,
|
||||
total_granules, granules_dropped,
|
||||
log);
|
||||
|
||||
indices_and_condition->total_granules.fetch_add(total_granules, std::memory_order_relaxed);
|
||||
indices_and_condition->granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
|
||||
|
||||
if (ranges.ranges.empty())
|
||||
indices_and_condition->parts_dropped.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
if (!ranges.ranges.empty())
|
||||
{
|
||||
if (limits.max_rows || leaf_limits.max_rows)
|
||||
@ -811,6 +872,23 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
.num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped});
|
||||
}
|
||||
|
||||
for (const auto & [granularity, index_and_condition] : merged_indices)
|
||||
{
|
||||
const auto & index_name = "Merged";
|
||||
LOG_DEBUG(log, "Index {} has dropped {}/{} granules.",
|
||||
backQuote(index_name),
|
||||
index_and_condition->granules_dropped, index_and_condition->total_granules);
|
||||
|
||||
std::string description = "MERGED GRANULARITY " + std::to_string(granularity);
|
||||
|
||||
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
||||
.type = ReadFromMergeTree::IndexType::Skip,
|
||||
.name = index_name,
|
||||
.description = std::move(description),
|
||||
.num_parts_after = index_and_condition->total_parts - index_and_condition->parts_dropped,
|
||||
.num_granules_after = index_and_condition->total_granules - index_and_condition->granules_dropped});
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
|
||||
parts.size(), total_parts, parts_with_ranges.size(),
|
||||
sum_marks_pk.load(std::memory_order_relaxed),
|
||||
@ -1865,6 +1943,96 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
return res;
|
||||
}
|
||||
|
||||
MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
|
||||
MergeTreeIndices indices,
|
||||
MergeTreeIndexMergedConditionPtr condition,
|
||||
MergeTreeData::DataPartPtr part,
|
||||
const MarkRanges & ranges,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
size_t & total_granules,
|
||||
size_t & granules_dropped,
|
||||
Poco::Logger * log)
|
||||
{
|
||||
for (const auto & index_helper : indices)
|
||||
{
|
||||
if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
|
||||
{
|
||||
LOG_DEBUG(log, "File for index {} does not exist. Skipping it.", backQuote(index_helper->index.name));
|
||||
return ranges;
|
||||
}
|
||||
}
|
||||
|
||||
auto index_granularity = indices.front()->index.granularity;
|
||||
|
||||
const size_t min_marks_for_seek = roundRowsOrBytesToMarks(
|
||||
settings.merge_tree_min_rows_for_seek,
|
||||
settings.merge_tree_min_bytes_for_seek,
|
||||
part->index_granularity_info.fixed_index_granularity,
|
||||
part->index_granularity_info.index_granularity_bytes);
|
||||
|
||||
size_t marks_count = part->getMarksCount();
|
||||
size_t final_mark = part->index_granularity.hasFinalMark();
|
||||
size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
|
||||
|
||||
std::vector<MergeTreeIndexReader> readers;
|
||||
for (const auto & index_helper : indices)
|
||||
{
|
||||
readers.emplace_back(
|
||||
index_helper,
|
||||
part,
|
||||
index_marks_count,
|
||||
ranges,
|
||||
reader_settings);
|
||||
}
|
||||
|
||||
MarkRanges res;
|
||||
|
||||
/// Some granules can cover two or more ranges,
|
||||
/// this variable is stored to avoid reading the same granule twice.
|
||||
MergeTreeIndexGranules granules(indices.size(), nullptr);
|
||||
bool granules_filled = false;
|
||||
size_t last_index_mark = 0;
|
||||
for (const auto & range : ranges)
|
||||
{
|
||||
MarkRange index_range(
|
||||
range.begin / index_granularity,
|
||||
(range.end + index_granularity - 1) / index_granularity);
|
||||
|
||||
if (last_index_mark != index_range.begin || !granules_filled)
|
||||
for (auto & reader : readers)
|
||||
reader.seek(index_range.begin);
|
||||
|
||||
total_granules += index_range.end - index_range.begin;
|
||||
|
||||
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
|
||||
{
|
||||
if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin)
|
||||
for (size_t i = 0; i < readers.size(); ++i)
|
||||
granules[i] = readers[i].read();
|
||||
|
||||
MarkRange data_range(
|
||||
std::max(range.begin, index_mark * index_granularity),
|
||||
std::min(range.end, (index_mark + 1) * index_granularity));
|
||||
|
||||
if (!condition->mayBeTrueOnGranule(granules))
|
||||
{
|
||||
++granules_dropped;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek)
|
||||
res.push_back(data_range);
|
||||
else
|
||||
res.back().end = data_range.end;
|
||||
}
|
||||
|
||||
last_index_mark = index_range.end - 1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void MergeTreeDataSelectExecutor::selectPartsToRead(
|
||||
MergeTreeData::DataPartsVector & parts,
|
||||
const std::unordered_set<String> & part_values,
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Storages/MergeTree/RangesInDataPart.h>
|
||||
#include <Storages/MergeTree/PartitionPruner.h>
|
||||
#include <Processors/QueryPlan/ReadFromMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexMergedCondition.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -125,6 +126,17 @@ private:
|
||||
size_t & granules_dropped,
|
||||
Poco::Logger * log);
|
||||
|
||||
static MarkRanges filterMarksUsingMergedIndex(
|
||||
MergeTreeIndices index_helper,
|
||||
MergeTreeIndexMergedConditionPtr condition,
|
||||
MergeTreeData::DataPartPtr part,
|
||||
const MarkRanges & ranges,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
size_t & total_granules,
|
||||
size_t & granules_dropped,
|
||||
Poco::Logger * log);
|
||||
|
||||
struct PartFilterCounters
|
||||
{
|
||||
size_t num_initial_selected_parts = 0;
|
||||
|
@ -70,66 +70,6 @@ void MergeTreeIndexAggregatorHypothesis::update(const Block & block, size_t * po
|
||||
*pos += rows_read;
|
||||
}
|
||||
|
||||
MergeTreeIndexConditionHypothesis::MergeTreeIndexConditionHypothesis(
|
||||
const String & index_name_,
|
||||
const String & column_name_,
|
||||
const SelectQueryInfo & query_,
|
||||
ContextPtr)
|
||||
: index_name(index_name_)
|
||||
, column_name(column_name_)
|
||||
{
|
||||
const auto & select = query_.query->as<ASTSelectQuery &>();
|
||||
|
||||
if (select.where() && select.prewhere())
|
||||
expression_ast = makeASTFunction(
|
||||
"and",
|
||||
select.where()->clone(),
|
||||
select.prewhere()->clone());
|
||||
else if (select.where())
|
||||
expression_ast = select.where()->clone();
|
||||
else if (select.prewhere())
|
||||
expression_ast = select.prewhere()->clone();
|
||||
}
|
||||
|
||||
std::pair<bool, bool> MergeTreeIndexConditionHypothesis::mayBeTrue(const ASTPtr & ast, const bool value) const
|
||||
{
|
||||
if (ast->getColumnName() == column_name)
|
||||
return {value, !value};
|
||||
|
||||
auto * func = ast->as<ASTFunction>();
|
||||
if (!func)
|
||||
return {true, true};
|
||||
auto & args = func->arguments->children;
|
||||
if (func->name == "not")
|
||||
{
|
||||
const auto res = mayBeTrue(args[0], value);
|
||||
return {res.second, res.first};
|
||||
}
|
||||
/*else if (func->name == "or")
|
||||
{
|
||||
|
||||
}
|
||||
else if (func->name == "and")
|
||||
{
|
||||
|
||||
}*/
|
||||
else
|
||||
{
|
||||
return {true, true};
|
||||
}
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionHypothesis::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
|
||||
{
|
||||
if (idx_granule->empty())
|
||||
return true;
|
||||
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleHypothesis>(idx_granule);
|
||||
if (!granule)
|
||||
throw Exception(
|
||||
"Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
|
||||
return mayBeTrue(expression_ast, granule->met).first;
|
||||
}
|
||||
|
||||
MergeTreeIndexGranulePtr MergeTreeIndexHypothesis::createIndexGranule() const
|
||||
{
|
||||
return std::make_shared<MergeTreeIndexGranuleHypothesis>(index.name);
|
||||
@ -141,9 +81,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co
|
||||
}
|
||||
|
||||
MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition(
|
||||
const SelectQueryInfo & query, ContextPtr context) const
|
||||
const SelectQueryInfo &, ContextPtr) const
|
||||
{
|
||||
return std::make_shared<MergeTreeIndexConditionHypothesis>(index.name, index.sample_block.getNames().front(), query, context);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const
|
||||
@ -156,8 +96,10 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index)
|
||||
return std::make_shared<MergeTreeIndexHypothesis>(index);
|
||||
}
|
||||
|
||||
void hypothesisIndexValidator(const IndexDescription &, bool /*attach*/)
|
||||
void hypothesisIndexValidator(const IndexDescription & index, bool /*attach*/)
|
||||
{
|
||||
if (index.expression_list_ast->children.size() != 1)
|
||||
throw Exception("Hypothesis index needs exactly one expression", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,12 +2,7 @@
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeIndices.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
|
||||
#include <Interpreters/SetVariants.h>
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -30,7 +25,7 @@ struct MergeTreeIndexGranuleHypothesis : public IMergeTreeIndexGranule
|
||||
|
||||
~MergeTreeIndexGranuleHypothesis() override = default;
|
||||
|
||||
String index_name;
|
||||
const String & index_name;
|
||||
bool is_empty = true;
|
||||
bool met = true;
|
||||
};
|
||||
@ -50,39 +45,13 @@ struct MergeTreeIndexAggregatorHypothesis : IMergeTreeIndexAggregator
|
||||
void update(const Block & block, size_t * pos, size_t limit) override;
|
||||
|
||||
private:
|
||||
String index_name;
|
||||
const String & index_name;
|
||||
String column_name;
|
||||
|
||||
bool met = true;
|
||||
bool is_empty = true;
|
||||
};
|
||||
|
||||
|
||||
class MergeTreeIndexConditionHypothesis : public IMergeTreeIndexCondition
|
||||
{
|
||||
public:
|
||||
MergeTreeIndexConditionHypothesis(
|
||||
const String & index_name_,
|
||||
const String & column_name_,
|
||||
const SelectQueryInfo & query,
|
||||
ContextPtr context);
|
||||
|
||||
bool alwaysUnknownOrTrue() const override { return false; }
|
||||
|
||||
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
|
||||
|
||||
~MergeTreeIndexConditionHypothesis() override = default;
|
||||
|
||||
private:
|
||||
std::pair<bool, bool> mayBeTrue(const ASTPtr & ast, const bool value) const;
|
||||
|
||||
String index_name;
|
||||
|
||||
String column_name;
|
||||
ASTPtr expression_ast;
|
||||
};
|
||||
|
||||
|
||||
class MergeTreeIndexHypothesis : public IMergeTreeIndex
|
||||
{
|
||||
public:
|
||||
@ -93,6 +62,8 @@ public:
|
||||
|
||||
~MergeTreeIndexHypothesis() override = default;
|
||||
|
||||
bool isMergeable() const override { return true; }
|
||||
|
||||
MergeTreeIndexGranulePtr createIndexGranule() const override;
|
||||
MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
|
||||
|
||||
|
186
src/Storages/MergeTree/MergeTreeIndexMergedCondition.cpp
Normal file
186
src/Storages/MergeTree/MergeTreeIndexMergedCondition.cpp
Normal file
@ -0,0 +1,186 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexMergedCondition.h>
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeIndexHypothesis.h>
|
||||
#include <Interpreters/TreeCNFConverter.h>
|
||||
#include <Interpreters/ComparisonGraph.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
MergeTreeIndexMergedCondition::MergeTreeIndexMergedCondition(
|
||||
const SelectQueryInfo & query_,
|
||||
ContextPtr /*context_*/,
|
||||
const size_t granularity_)
|
||||
: granularity(granularity_)
|
||||
{
|
||||
const auto & select = query_.query->as<ASTSelectQuery &>();
|
||||
|
||||
if (select.where() && select.prewhere())
|
||||
expression_ast = makeASTFunction(
|
||||
"and",
|
||||
select.where()->clone(),
|
||||
select.prewhere()->clone());
|
||||
else if (select.where())
|
||||
expression_ast = select.where()->clone();
|
||||
else if (select.prewhere())
|
||||
expression_ast = select.prewhere()->clone();
|
||||
|
||||
expression_cnf = std::make_unique<CNFQuery>(TreeCNFConverter::toCNF(expression_ast));
|
||||
}
|
||||
|
||||
void MergeTreeIndexMergedCondition::addIndex(const MergeTreeIndexPtr & index)
|
||||
{
|
||||
if (!index->isMergeable() || index->getGranularity() != granularity)
|
||||
throw Exception("Index can not be merged",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
const auto hypothesis_index = std::dynamic_pointer_cast<const MergeTreeIndexHypothesis>(index);
|
||||
if (!hypothesis_index)
|
||||
throw Exception(
|
||||
"Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
static const std::set<std::string> relations = {
|
||||
"equals", "less", "lessOrEquals", "greaterOrEquals", "greater"};
|
||||
|
||||
// TODO: move to index hypothesis
|
||||
std::vector<ASTPtr> compare_hypotheses_data;
|
||||
std::vector<CNFQuery::OrGroup> hypotheses_data;
|
||||
const auto cnf = TreeCNFConverter::toCNF(hypothesis_index->index.expression_list_ast->children.front()).pullNotOutFunctions();
|
||||
for (const auto & group : cnf.getStatements()) {
|
||||
hypotheses_data.push_back(group);
|
||||
if (group.size() == 1)
|
||||
{
|
||||
CNFQuery::AtomicFormula atom = *group.begin();
|
||||
pushNotIn(atom);
|
||||
if (atom.negative)
|
||||
throw Exception("negative atom", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto * func = atom.ast->as<ASTFunction>();
|
||||
if (func && relations.count(func->name))
|
||||
compare_hypotheses_data.push_back(atom.ast);
|
||||
}
|
||||
}
|
||||
index_to_compare_atomic_hypotheses.push_back(compare_hypotheses_data);
|
||||
index_to_atomic_hypotheses.push_back(hypotheses_data);
|
||||
}
|
||||
|
||||
void MergeTreeIndexMergedCondition::addConstraints(const ConstraintsDescription & constraints_description)
|
||||
{
|
||||
auto atomic_constraints_data = constraints_description.getAtomicConstraintData();
|
||||
for (auto & atom : atomic_constraints_data)
|
||||
{
|
||||
pushNotIn(atom);
|
||||
atomic_constraints.push_back(atom.ast);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
ComparisonGraph::CompareResult getExpectedCompare(const CNFQuery::AtomicFormula & atom)
|
||||
{
|
||||
static const std::map<std::string, std::string> inverse_relations = {
|
||||
{"equals", "notEquals"},
|
||||
{"less", "greaterOrEquals"},
|
||||
{"lessOrEquals", "greater"},
|
||||
{"notEquals", "equals"},
|
||||
{"greaterOrEquals", "less"},
|
||||
{"greater", "lessOrEquals"},
|
||||
};
|
||||
|
||||
static const std::map<std::string, ComparisonGraph::CompareResult> relation_to_compare = {
|
||||
{"equals", ComparisonGraph::CompareResult::EQUAL},
|
||||
{"less", ComparisonGraph::CompareResult::LESS},
|
||||
{"lessOrEquals", ComparisonGraph::CompareResult::LESS_OR_EQUAL},
|
||||
{"notEquals", ComparisonGraph::CompareResult::UNKNOWN},
|
||||
{"greaterOrEquals", ComparisonGraph::CompareResult::GREATER_OR_EQUAL},
|
||||
{"greater", ComparisonGraph::CompareResult::GREATER},
|
||||
};
|
||||
|
||||
|
||||
const auto * func = atom.ast->as<ASTFunction>();
|
||||
if (func && inverse_relations.count(func->name))
|
||||
{
|
||||
std::string function_name = func->name;
|
||||
if (atom.negative)
|
||||
function_name = inverse_relations.at(func->name);
|
||||
return relation_to_compare.at(function_name);
|
||||
}
|
||||
return ComparisonGraph::CompareResult::UNKNOWN;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool MergeTreeIndexMergedCondition::mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const
|
||||
{
|
||||
std::vector<bool> values;
|
||||
for (const auto & index_granule : granules)
|
||||
{
|
||||
const auto granule = std::dynamic_pointer_cast<const MergeTreeIndexGranuleHypothesis>(index_granule);
|
||||
if (!granule)
|
||||
throw Exception("Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR);
|
||||
values.push_back(granule->met);
|
||||
}
|
||||
const auto & graph = getGraph(values);
|
||||
|
||||
bool always_false = false;
|
||||
expression_cnf->iterateGroups(
|
||||
[&](const CNFQuery::OrGroup & or_group)
|
||||
{
|
||||
if (always_false)
|
||||
return;
|
||||
|
||||
for (auto atom : or_group)
|
||||
{
|
||||
pushNotIn(atom);
|
||||
Poco::Logger::get("KEK").information(atom.ast->dumpTree());
|
||||
const auto * func = atom.ast->as<ASTFunction>();
|
||||
if (func && func->arguments->children.size() == 2)
|
||||
{
|
||||
const auto expected = getExpectedCompare(atom);
|
||||
if (graph.isPossibleCompare(
|
||||
expected,
|
||||
func->arguments->children[0],
|
||||
func->arguments->children[1]))
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
always_false = true;
|
||||
});
|
||||
return !always_false;
|
||||
}
|
||||
|
||||
std::unique_ptr<ComparisonGraph> MergeTreeIndexMergedCondition::buildGraph(const std::vector<bool> & values) const
|
||||
{
|
||||
Poco::Logger::get("MergeTreeIndexMergedCondition").information("New graph");
|
||||
std::vector<ASTPtr> active_atomic_formulas(atomic_constraints);
|
||||
for (size_t i = 0; i < values.size(); ++i)
|
||||
{
|
||||
if (values[i])
|
||||
active_atomic_formulas.insert(
|
||||
std::end(active_atomic_formulas),
|
||||
std::begin(index_to_compare_atomic_hypotheses[i]),
|
||||
std::end(index_to_compare_atomic_hypotheses[i]));
|
||||
}
|
||||
return std::make_unique<ComparisonGraph>(active_atomic_formulas);
|
||||
}
|
||||
|
||||
const ComparisonGraph & MergeTreeIndexMergedCondition::getGraph(const std::vector<bool> & values) const
|
||||
{
|
||||
if (!graphCache.contains(values))
|
||||
graphCache[values] = buildGraph(values);
|
||||
return *graphCache.at(values);
|
||||
}
|
||||
|
||||
}
|
47
src/Storages/MergeTree/MergeTreeIndexMergedCondition.h
Normal file
47
src/Storages/MergeTree/MergeTreeIndexMergedCondition.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeIndices.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Interpreters/ComparisonGraph.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* IndexCondition checking several indexes at the same time.
|
||||
* Works only for hypotheses. (will also support minmax soon).
|
||||
*/
|
||||
class MergeTreeIndexMergedCondition
|
||||
{
|
||||
public:
|
||||
MergeTreeIndexMergedCondition(
|
||||
const SelectQueryInfo & query,
|
||||
ContextPtr context,
|
||||
const size_t granularity);
|
||||
|
||||
void addIndex(const MergeTreeIndexPtr & index);
|
||||
void addConstraints(const ConstraintsDescription & constraints_description);
|
||||
|
||||
bool alwaysUnknownOrTrue() const { return false; } // TODO: replace < -> <=, > -> >= and assume all hypotheses are true + check path exists
|
||||
bool mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const;
|
||||
|
||||
//TODO: add constraints
|
||||
private:
|
||||
std::unique_ptr<ComparisonGraph> buildGraph(const std::vector<bool> & values) const;
|
||||
const ComparisonGraph & getGraph(const std::vector<bool> & values) const;
|
||||
|
||||
const size_t granularity;
|
||||
ASTPtr expression_ast;
|
||||
std::unique_ptr<CNFQuery> expression_cnf;
|
||||
|
||||
mutable std::unordered_map<std::vector<bool>, std::unique_ptr<ComparisonGraph>> graphCache;
|
||||
|
||||
std::vector<std::vector<ASTPtr>> index_to_compare_atomic_hypotheses;
|
||||
std::vector<std::vector<CNFQuery::OrGroup>> index_to_atomic_hypotheses;
|
||||
std::vector<ASTPtr> atomic_constraints;
|
||||
};
|
||||
|
||||
using MergeTreeIndexMergedConditionPtr = std::shared_ptr<MergeTreeIndexMergedCondition>;
|
||||
using MergeTreeIndexMergedConditions = std::vector<MergeTreeIndexMergedConditionPtr>;
|
||||
|
||||
}
|
@ -62,6 +62,7 @@ public:
|
||||
};
|
||||
|
||||
using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;
|
||||
using MergeTreeIndexConditions = std::vector<MergeTreeIndexConditionPtr>;
|
||||
|
||||
|
||||
struct IMergeTreeIndex
|
||||
@ -75,6 +76,9 @@ struct IMergeTreeIndex
|
||||
|
||||
/// gets filename without extension
|
||||
String getFileName() const { return INDEX_FILE_PREFIX + index.name; }
|
||||
size_t getGranularity() const { return index.granularity; }
|
||||
|
||||
virtual bool isMergeable() const { return false; }
|
||||
|
||||
/// Checks whether the column is in data skipping index.
|
||||
virtual bool mayBenefitFromIndexForIn(const ASTPtr & node) const = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user