improvements

This commit is contained in:
Nikita Vasilev 2021-05-04 21:43:58 +03:00
parent 125498c97d
commit 496d8ff46c
19 changed files with 190 additions and 176 deletions

View File

@ -12,10 +12,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
AddIndexConstraintsOptimizer::AddIndexConstraintsOptimizer(
const StorageMetadataPtr & metadata_snapshot_)
@ -124,8 +120,10 @@ namespace
{
Poco::Logger::get("INDEX_HINT_CREATE").information("CHECK");
const auto * func = atom.ast->as<ASTFunction>();
if (func && func->arguments->children.size() == 2 && getRelationMap().contains(func->name)) {
auto check_and_insert = [&](const size_t index, const ComparisonGraph::CompareResult need_result) -> bool {
if (func && func->arguments->children.size() == 2 && getRelationMap().contains(func->name))
{
auto check_and_insert = [&](const size_t index, const ComparisonGraph::CompareResult need_result) -> bool
{
if (!onlyConstants(func->arguments->children[1 - index]))
return false;
@ -167,13 +165,14 @@ void AddIndexConstraintsOptimizer::perform(CNFQuery & cnf_query)
const std::unordered_set<std::string_view> primary_key_set(std::begin(primary_key), std::end(primary_key));
ASTs primary_key_only_asts;
for (const auto & vertex : graph.getVertexes())
for (const auto & vertex : graph.getVertices())
for (const auto & ast : vertex)
if (hasIndexColumns(ast, primary_key_set) && onlyIndexColumns(ast, primary_key_set))
primary_key_only_asts.push_back(ast);
CNFQuery::AndGroup and_group;
cnf_query.iterateGroups([&and_group, &graph, &primary_key_only_asts](const auto & or_group) {
cnf_query.iterateGroups([&and_group, &graph, &primary_key_only_asts](const auto & or_group)
{
auto add_group = createIndexHintGroup(or_group, graph, primary_key_only_asts);
if (!add_group.empty())
and_group.emplace(std::move(add_group));

View File

@ -36,25 +36,29 @@ ASTPtr ComparisonGraph::normalizeAtom(const ASTPtr & atom) const
ComparisonGraph::ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas)
{
static const std::map<std::string, Edge::Type> relation_to_enum = {
static const std::unordered_map<std::string, Edge::Type> relation_to_enum =
{
{"equals", Edge::Type::EQUAL},
{"less", Edge::Type::LESS},
{"lessOrEquals", Edge::Type::LESS_OR_EQUAL},
};
Graph g;
for (const auto & atom_raw : atomic_formulas) {
for (const auto & atom_raw : atomic_formulas)
{
const auto atom = normalizeAtom(atom_raw);
const auto bad_term = std::numeric_limits<std::size_t>::max();
auto get_index = [](const ASTPtr & ast, Graph & asts_graph) -> std::size_t {
auto get_index = [](const ASTPtr & ast, Graph & asts_graph) -> std::size_t
{
const auto it = asts_graph.ast_hash_to_component.find(ast->getTreeHash());
if (it != std::end(asts_graph.ast_hash_to_component))
{
if (!std::any_of(
std::cbegin(asts_graph.vertexes[it->second].asts),
std::cend(asts_graph.vertexes[it->second].asts),
[ast](const ASTPtr & constraint_ast) {
std::cbegin(asts_graph.vertices[it->second].asts),
std::cend(asts_graph.vertices[it->second].asts),
[ast](const ASTPtr & constraint_ast)
{
return constraint_ast->getTreeHash() == ast->getTreeHash()
&& constraint_ast->getColumnName() == ast->getColumnName();
}))
@ -66,10 +70,10 @@ ComparisonGraph::ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas)
}
else
{
asts_graph.ast_hash_to_component[ast->getTreeHash()] = asts_graph.vertexes.size();
asts_graph.vertexes.push_back(EqualComponent{{ast}});
asts_graph.ast_hash_to_component[ast->getTreeHash()] = asts_graph.vertices.size();
asts_graph.vertices.push_back(EqualComponent{{ast}});
asts_graph.edges.emplace_back();
return asts_graph.vertexes.size() - 1;
return asts_graph.vertices.size() - 1;
}
};
@ -263,15 +267,18 @@ std::optional<std::size_t> ComparisonGraph::getComponentId(const ASTPtr & ast) c
return {};
const size_t index = hash_it->second;
if (std::any_of(
std::cbegin(graph.vertexes[index].asts),
std::cend(graph.vertexes[index].asts),
std::cbegin(graph.vertices[index].asts),
std::cend(graph.vertices[index].asts),
[ast](const ASTPtr & constraint_ast)
{
return constraint_ast->getTreeHash() == ast->getTreeHash() &&
constraint_ast->getColumnName() == ast->getColumnName();
})) {
}))
{
return index;
} else {
}
else
{
return {};
}
}
@ -283,18 +290,21 @@ bool ComparisonGraph::hasPath(const size_t left, const size_t right) const
std::vector<ASTPtr> ComparisonGraph::getComponent(const std::size_t id) const
{
return graph.vertexes[id].asts;
return graph.vertices[id].asts;
}
bool ComparisonGraph::EqualComponent::hasConstant() const {
bool ComparisonGraph::EqualComponent::hasConstant() const
{
return constant_index != -1;
}
ASTPtr ComparisonGraph::EqualComponent::getConstant() const {
ASTPtr ComparisonGraph::EqualComponent::getConstant() const
{
return asts[constant_index];
}
void ComparisonGraph::EqualComponent::buildConstants() {
void ComparisonGraph::EqualComponent::buildConstants()
{
constant_index = -1;
for (size_t i = 0; i < asts.size(); ++i)
{
@ -308,7 +318,8 @@ void ComparisonGraph::EqualComponent::buildConstants() {
ComparisonGraph::CompareResult ComparisonGraph::getCompareResult(const std::string & name)
{
static const std::unordered_map<std::string, CompareResult> relation_to_compare = {
static const std::unordered_map<std::string, CompareResult> relation_to_compare =
{
{"equals", CompareResult::EQUAL},
{"notEquals", CompareResult::NOT_EQUAL},
{"less", CompareResult::LESS},
@ -323,7 +334,8 @@ ComparisonGraph::CompareResult ComparisonGraph::getCompareResult(const std::stri
ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(const CompareResult result)
{
static const std::unordered_map<CompareResult, CompareResult> inverse_relations = {
static const std::unordered_map<CompareResult, CompareResult> inverse_relations =
{
{CompareResult::NOT_EQUAL, CompareResult::EQUAL},
{CompareResult::EQUAL, CompareResult::NOT_EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::LESS},
@ -341,8 +353,8 @@ std::optional<ASTPtr> ComparisonGraph::getEqualConst(const ASTPtr & ast) const
if (hash_it == std::end(graph.ast_hash_to_component))
return std::nullopt;
const size_t index = hash_it->second;
return graph.vertexes[index].hasConstant()
? std::optional<ASTPtr>{graph.vertexes[index].getConstant()}
return graph.vertices[index].hasConstant()
? std::optional<ASTPtr>{graph.vertices[index].getConstant()}
: std::nullopt;
}
@ -360,7 +372,7 @@ std::optional<std::pair<Field, bool>> ComparisonGraph::getConstUpperBound(const
const ssize_t from = ast_const_upper_bound[to];
if (from == -1)
return std::nullopt;
return std::make_pair(graph.vertexes[from].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
return std::make_pair(graph.vertices[from].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
}
std::optional<std::pair<Field, bool>> ComparisonGraph::getConstLowerBound(const ASTPtr & ast) const
@ -377,7 +389,7 @@ std::optional<std::pair<Field, bool>> ComparisonGraph::getConstLowerBound(const
const ssize_t to = ast_const_lower_bound[from];
if (to == -1)
return std::nullopt;
return std::make_pair(graph.vertexes[to].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
return std::make_pair(graph.vertices[to].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
}
void ComparisonGraph::dfsOrder(const Graph & asts_graph, size_t v, std::vector<bool> & visited, std::vector<size_t> & order) const
@ -397,9 +409,9 @@ ComparisonGraph::Graph ComparisonGraph::reverseGraph(const Graph & asts_graph) c
{
Graph g;
g.ast_hash_to_component = asts_graph.ast_hash_to_component;
g.vertexes = asts_graph.vertexes;
g.edges.resize(g.vertexes.size());
for (size_t v = 0; v < asts_graph.vertexes.size(); ++v)
g.vertices = asts_graph.vertices;
g.edges.resize(g.vertices.size());
for (size_t v = 0; v < asts_graph.vertices.size(); ++v)
{
for (const auto & edge : asts_graph.edges[v])
{
@ -409,10 +421,10 @@ ComparisonGraph::Graph ComparisonGraph::reverseGraph(const Graph & asts_graph) c
return asts_graph;
}
std::vector<ASTs> ComparisonGraph::getVertexes() const
std::vector<ASTs> ComparisonGraph::getVertices() const
{
std::vector<ASTs> result;
for (const auto & vertex : graph.vertexes)
for (const auto & vertex : graph.vertices)
{
result.emplace_back();
for (const auto & ast : vertex.asts)
@ -438,7 +450,7 @@ ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAstsGraph(const Graph & as
{
Poco::Logger::get("Graph").information("building");
/// Find strongly connected component
const auto n = asts_graph.vertexes.size();
const auto n = asts_graph.vertices.size();
std::vector<size_t> order;
{
@ -466,19 +478,19 @@ ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAstsGraph(const Graph & as
}
Graph result;
result.vertexes.resize(component);
result.vertices.resize(component);
result.edges.resize(component);
for (const auto & [hash, index] : asts_graph.ast_hash_to_component)
{
result.ast_hash_to_component[hash] = components[index];
result.vertexes[components[index]].asts.insert(
std::end(result.vertexes[components[index]].asts),
std::begin(asts_graph.vertexes[index].asts),
std::end(asts_graph.vertexes[index].asts)); // asts_graph has only one ast per vertex
result.vertices[components[index]].asts.insert(
std::end(result.vertices[components[index]].asts),
std::begin(asts_graph.vertices[index].asts),
std::end(asts_graph.vertices[index].asts)); // asts_graph has only one ast per vertex
}
/// Calculate constants
for (auto & vertex : result.vertexes)
for (auto & vertex : result.vertices)
{
vertex.buildConstants();
}
@ -494,16 +506,16 @@ ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAstsGraph(const Graph & as
// TODO: make edges unique (left most strict)
}
for (size_t v = 0; v < result.vertexes.size(); ++v)
for (size_t v = 0; v < result.vertices.size(); ++v)
{
for (size_t u = 0; u < result.vertexes.size(); ++u)
for (size_t u = 0; u < result.vertices.size(); ++u)
{
if (v == u)
continue;
if (result.vertexes[v].hasConstant() && result.vertexes[u].hasConstant())
if (result.vertices[v].hasConstant() && result.vertices[u].hasConstant())
{
const auto * left = result.vertexes[v].getConstant()->as<ASTLiteral>();
const auto * right = result.vertexes[u].getConstant()->as<ASTLiteral>();
const auto * left = result.vertices[v].getConstant()->as<ASTLiteral>();
const auto * right = result.vertices[u].getConstant()->as<ASTLiteral>();
/// Only less. Equal constant fields = equal literals so it was already considered above.
if (left->value > right->value)
@ -521,7 +533,7 @@ std::map<std::pair<size_t, size_t>, ComparisonGraph::Path> ComparisonGraph::Buil
{
// min path : < = -1, =< = 0
const auto inf = std::numeric_limits<int8_t>::max();
const size_t n = graph.vertexes.size();
const size_t n = graph.vertices.size();
std::vector<std::vector<int8_t>> results(n, std::vector<int8_t>(n, inf));
for (size_t v = 0; v < n; ++v)
{
@ -547,24 +559,27 @@ std::map<std::pair<size_t, size_t>, ComparisonGraph::Path> ComparisonGraph::Buil
std::pair<std::vector<ssize_t>, std::vector<ssize_t>> ComparisonGraph::buildConstBounds() const
{
const size_t n = graph.vertexes.size();
const size_t n = graph.vertices.size();
std::vector<ssize_t> lower(n, -1);
std::vector<ssize_t> upper(n, -1);
auto get_value = [this](const size_t vertex) -> Field {
return graph.vertexes[vertex].getConstant()->as<ASTLiteral>()->value;
auto get_value = [this](const size_t vertex) -> Field
{
return graph.vertices[vertex].getConstant()->as<ASTLiteral>()->value;
};
for (const auto & [edge, path] : dists)
{
const auto [from, to] = edge;
if (graph.vertexes[to].hasConstant()) {
if (graph.vertices[to].hasConstant())
{
if (lower[from] == -1
|| get_value(lower[from]) > get_value(to)
|| (get_value(lower[from]) >= get_value(to) && dists.at({from, to}) == Path::LESS))
lower[from] = to;
}
if (graph.vertexes[from].hasConstant()) {
if (graph.vertices[from].hasConstant())
{
if (upper[to] == -1
|| get_value(upper[to]) < get_value(from)
|| (get_value(upper[to]) <= get_value(from) && dists.at({from, to}) == Path::LESS))

View File

@ -53,7 +53,7 @@ public:
std::optional<std::pair<Field, bool>> getConstUpperBound(const ASTPtr & ast) const;
std::optional<std::pair<Field, bool>> getConstLowerBound(const ASTPtr & ast) const;
std::vector<ASTs> getVertexes() const;
std::vector<ASTs> getVertices() const;
private:
/// strongly connected component
@ -86,14 +86,16 @@ private:
struct Graph
{
struct ASTHash {
size_t operator() (const IAST::Hash & hash) const {
struct ASTHash
{
size_t operator() (const IAST::Hash & hash) const
{
return hash.first;
}
};
std::unordered_map<IAST::Hash, size_t, ASTHash> ast_hash_to_component;
std::vector<EqualComponent> vertexes;
std::vector<EqualComponent> vertices;
std::vector<std::vector<Edge>> edges;
};

View File

@ -1 +1,38 @@
#include "ConstraintMatcherVisitor.h"
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/IAST.h>
#include <Poco/Logger.h>
namespace DB
{
bool ConstraintMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return node->as<ASTFunction>() || node->as<ASTExpressionList>();
}
std::optional<bool> ConstraintMatcher::getASTValue(const ASTPtr & node, Data & data)
{
const auto it = data.constraints.find(node->getTreeHash().second);
if (it != std::end(data.constraints))
{
for (const auto & ast : it->second)
{
if (node->getColumnName() == ast->getColumnName())
{
return true;
}
}
}
return std::nullopt;
}
void ConstraintMatcher::visit(ASTPtr & ast, Data & data)
{
if (const auto always_value = getASTValue(ast, data); always_value)
ast = std::make_shared<ASTLiteral>(static_cast<UInt8>(*always_value));
}
}

View File

@ -1,13 +1,6 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/IAST.h>
#include <Poco/Logger.h>
namespace DB
{
@ -21,25 +14,9 @@ struct ConstraintMatcher
using Visitor = InDepthNodeVisitor<ConstraintMatcher, true>;
static bool needChildVisit(const ASTPtr & node, const ASTPtr &) { return node->as<ASTFunction>() || node->as<ASTExpressionList>(); }
static std::optional<bool> getASTValue(const ASTPtr & node, Data & data) {
const auto it = data.constraints.find(node->getTreeHash().second);
if (it != std::end(data.constraints)) {
for (const auto & ast : it->second) {
if (node->getColumnName() == ast->getColumnName()) {
return true;
}
}
}
return std::nullopt;
}
static void visit(ASTPtr & ast, Data & data)
{
if (const auto always_value = getASTValue(ast, data); always_value)
ast = std::make_shared<ASTLiteral>(static_cast<UInt8>(*always_value));
}
static bool needChildVisit(const ASTPtr & node, const ASTPtr &);
static std::optional<bool> getASTValue(const ASTPtr & node, Data & data);
static void visit(ASTPtr & ast, Data & data);
};
using ConstraintMatcherVisitor = InDepthNodeVisitor<ConstraintMatcher, true>;

View File

@ -1,7 +1,6 @@
#include <Interpreters/TreeCNFConverter.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Poco/Logger.h>
namespace DB
@ -37,7 +36,8 @@ void traversePushNot(ASTPtr & node, bool add_negation)
if (func && (func->name == "and" || func->name == "or"))
{
if (add_negation) {
if (add_negation)
{
/// apply De Morgan's Law
node = makeASTFunction(
(func->name == "and" ? "or" : "and"),
@ -302,7 +302,7 @@ CNFQuery & CNFQuery::pushNotInFuntions()
std::string CNFQuery::dump() const
{
std::stringstream res;
WriteBufferFromOwnString res;
bool first = true;
for (const auto & group : statements)
{

View File

@ -511,23 +511,23 @@ void optimizeLimitBy(const ASTSelectQuery * select_query)
}
/// Use constraints to get rid of useless parts of query
void optimizeWithConstraints(ASTSelectQuery * select_query, Aliases & aliases, const NameSet & source_columns_set,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
void optimizeWithConstraints(ASTSelectQuery * select_query, Aliases & /*aliases*/, const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot)
{
WhereConstraintsOptimizer(select_query, aliases, source_columns_set, tables_with_columns, metadata_snapshot).perform();
WhereConstraintsOptimizer(select_query, metadata_snapshot).perform();
if (select_query->where())
Poco::Logger::get("CNF").information(select_query->where()->dumpTree());
else
Poco::Logger::get("CNF").information("NO WHERE");
}
void optimizeSubstituteColumn(ASTSelectQuery * select_query, Aliases & aliases, const NameSet & source_columns_set,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
void optimizeSubstituteColumn(ASTSelectQuery * select_query, Aliases & /*aliases*/, const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot,
const ConstStoragePtr & storage)
{
SubstituteColumnOptimizer(select_query, aliases, source_columns_set, tables_with_columns, metadata_snapshot, storage).perform();
SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
}
/// transform where to CNF for more convenient optimization

View File

@ -4,7 +4,6 @@
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTConstraintDeclaration.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Interpreters/AddIndexConstraintsOptimizer.h>
#include <Parsers/ASTSelectQuery.h>
@ -13,21 +12,10 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
WhereConstraintsOptimizer::WhereConstraintsOptimizer(
ASTSelectQuery * select_query_,
Aliases & /*aliases_*/,
const NameSet & /*source_columns_set_*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns_*/,
const StorageMetadataPtr & metadata_snapshot_)
: select_query(select_query_)
/* , aliases(aliases_)
, source_columns_set(source_columns_set_)
, tables_with_columns(tables_with_columns_)*/
, metadata_snapshot(metadata_snapshot_)
{
}
@ -181,13 +169,18 @@ void WhereConstraintsOptimizer::perform()
auto cnf = TreeCNFConverter::toCNF(select_query->where());
Poco::Logger::get("BEFORE OPT").information(cnf.dump());
cnf.pullNotOutFunctions()
.filterAlwaysTrueGroups([&constraint_data, &compare_graph](const auto & group) { /// remove always true groups from CNF
.filterAlwaysTrueGroups([&constraint_data, &compare_graph](const auto & group)
{
/// remove always true groups from CNF
return !checkIfGroupAlwaysTrueFullMatch(group, constraint_data) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
})
.filterAlwaysFalseAtoms([&constraint_data, &compare_graph](const auto & atom) { /// remove always false atoms from CNF
.filterAlwaysFalseAtoms([&constraint_data, &compare_graph](const auto & atom)
{
/// remove always false atoms from CNF
return !checkIfAtomAlwaysFalseFullMatch(atom, constraint_data) && !checkIfAtomAlwaysFalseGraph(atom, compare_graph);
})
.transformAtoms([&compare_graph](const auto & atom) {
.transformAtoms([&compare_graph](const auto & atom)
{
return replaceTermsToConstants(atom, compare_graph);
})
.pushNotInFuntions();

View File

@ -17,17 +17,14 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class WhereConstraintsOptimizer final
{
public:
WhereConstraintsOptimizer(ASTSelectQuery * select_query, Aliases & /* aliases */, const NameSet & /* source_columns_set */,
const std::vector<TableWithColumnNamesAndTypes> & /* tables_with_columns */,
WhereConstraintsOptimizer(
ASTSelectQuery * select_query,
const StorageMetadataPtr & metadata_snapshot);
void perform();
private:
ASTSelectQuery * select_query;
/*Aliases & aliases;
const NameSet & source_columns_set;
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns;*/
const StorageMetadataPtr & metadata_snapshot;
};

View File

@ -10,7 +10,8 @@ namespace DB
class ASTConstraintDeclaration : public IAST
{
public:
enum class Type {
enum class Type
{
CHECK,
ASSUME,
};

View File

@ -53,13 +53,15 @@ ASTs ConstraintsDescription::filterConstraints(ConstraintType selection) const
case ASTConstraintDeclaration::Type::ASSUME:
return static_cast<UInt32>(ConstraintType::ASSUME);
}
throw Exception("Unknown constraint type.", ErrorCodes::LOGICAL_ERROR);
};
ASTs res;
res.reserve(constraints.size());
for (const auto & constraint : constraints)
{
if ((ast_to_decr_constraint_type(constraint->as<ASTConstraintDeclaration>()->type) & static_cast<UInt32>(selection)) != 0) {
if ((ast_to_decr_constraint_type(constraint->as<ASTConstraintDeclaration>()->type) & static_cast<UInt32>(selection)) != 0)
{
res.push_back(constraint);
}
}
@ -88,7 +90,8 @@ std::vector<CNFQuery::AtomicFormula> ConstraintsDescription::getAtomicConstraint
Poco::Logger::get("atomic_formula: initial:").information(constraint->as<ASTConstraintDeclaration>()->expr->ptr()->dumpTree());
const auto cnf = TreeCNFConverter::toCNF(constraint->as<ASTConstraintDeclaration>()->expr->ptr())
.pullNotOutFunctions();
for (const auto & group : cnf.getStatements()) {
for (const auto & group : cnf.getStatements())
{
if (group.size() == 1)
constraint_data.push_back(*group.begin());
}

View File

@ -20,7 +20,8 @@ public:
static ConstraintsDescription parse(const String & str);
enum class ConstraintType {
enum class ConstraintType
{
CHECK = 1,
ASSUME = 2,
ALWAYS_TRUE = CHECK | ASSUME,

View File

@ -2008,8 +2008,13 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
{
if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin)
{
for (size_t i = 0; i < readers.size(); ++i)
{
granules[i] = readers[i].read();
granules_filled = true;
}
}
MarkRange data_range(
std::max(range.begin, index_mark * index_granularity),

View File

@ -83,7 +83,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co
MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition(
const SelectQueryInfo &, ContextPtr) const
{
return nullptr;
throw Exception("Not supported", ErrorCodes::LOGICAL_ERROR);
}
bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const

View File

@ -13,7 +13,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
}
MergeTreeIndexMergedCondition::MergeTreeIndexMergedCondition(
@ -103,7 +102,8 @@ ComparisonGraph::CompareResult getExpectedCompare(const CNFQuery::AtomicFormula
}
/// Replaces < -> <=, > -> >= and assumes that all hypotheses are true then checks if path exists
bool MergeTreeIndexMergedCondition::alwaysUnknownOrTrue() const {
bool MergeTreeIndexMergedCondition::alwaysUnknownOrTrue() const
{
std::vector<ASTPtr> active_atomic_formulas(atomic_constraints);
for (size_t i = 0; i < index_to_compare_atomic_hypotheses.size(); ++i)
{

View File

@ -189,8 +189,8 @@ void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const
if (select.where())
{
Poco::Logger::get("MTPRWHERE WHE").information(select.where()->getColumnName());
Poco::Logger::get("MTPRWHERE WHE").information(select.where()->dumpTree());
Poco::Logger::get("MTPRWHERE WHERE").information(select.where()->getColumnName());
Poco::Logger::get("MTPRWHERE WHERE").information(select.where()->dumpTree());
}
if(select.prewhere())
{

View File

@ -13,10 +13,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
@ -224,15 +220,9 @@ void bruteforce(
SubstituteColumnOptimizer::SubstituteColumnOptimizer(
ASTSelectQuery * select_query_,
Aliases & /*aliases_*/,
const NameSet & /*source_columns_set_*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns_*/,
const StorageMetadataPtr & metadata_snapshot_,
const ConstStoragePtr & storage_)
: select_query(select_query_)
/* , aliases(aliases_)
, source_columns_set(source_columns_set_)
, tables_with_columns(tables_with_columns_)*/
, metadata_snapshot(metadata_snapshot_)
, storage(storage_)
{
@ -261,7 +251,8 @@ void SubstituteColumnOptimizer::perform()
ast->setAlias(ast->getAliasOrColumnName());
}
auto run_for_all = [&](const auto func) {
auto run_for_all = [&](const auto func)
{
if (select_query->where())
func(select_query->refWhere(), false);
if (select_query->prewhere())
@ -279,7 +270,8 @@ void SubstituteColumnOptimizer::perform()
ComponentVisitor::Data component_data(
compare_graph, components, old_name, name_to_component, counter_id);
std::unordered_set<String> identifiers;
auto preprocess = [&](ASTPtr & ast, bool) {
auto preprocess = [&](ASTPtr & ast, bool)
{
ComponentVisitor(component_data).visit(ast);
collectIdentifiers(ast, identifiers);
};
@ -320,7 +312,8 @@ void SubstituteColumnOptimizer::perform()
for (size_t i = 0; i < min_expressions.size(); ++i)
id_to_expression_map[components_list[i]] = min_expressions[i];
auto process = [&](ASTPtr & ast, bool is_select) {
auto process = [&](ASTPtr & ast, bool is_select)
{
SubstituteColumnVisitor::Data substitute_data{id_to_expression_map, name_to_component, old_name, is_select};
SubstituteColumnVisitor(substitute_data).visit(ast);
};

View File

@ -19,18 +19,13 @@ class SubstituteColumnOptimizer
public:
SubstituteColumnOptimizer(
ASTSelectQuery * select_query,
Aliases & /* aliases */, const NameSet & /* source_columns_set */,
const std::vector<TableWithColumnNamesAndTypes> & /* tables_with_columns */,
const StorageMetadataPtr & /* metadata_snapshot */,
const StorageMetadataPtr & metadata_snapshot,
const ConstStoragePtr & storage);
void perform();
private:
ASTSelectQuery * select_query;
/*Aliases & aliases;
const NameSet & source_columns_set;
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns;*/
const StorageMetadataPtr & metadata_snapshot;
ConstStoragePtr storage;
};

View File

@ -6,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
SETTINGS="SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1;"
SETTINGS="SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1"
$CLICKHOUSE_CLIENT -n --query="
$SETTINGS;
DROP DATABASE IF EXISTS hypothesis_test;
DROP TABLE IF EXISTS hypothesis_test.test;
DROP TABLE IF EXISTS hypothesis_test.test2;
@ -25,30 +26,28 @@ CREATE TABLE hypothesis_test.test (
) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO hypothesis_test.test VALUES
(0, 1, 2, 2),
(1, 2, 1, 2),
(2, 2, 2, 1),
(3, 1, 2, 3)"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;
INSERT INTO hypothesis_test.test VALUES (0, 1, 2, 2), (1, 2, 1, 2), (2, 2, 2, 1), (3, 1, 2, 3)"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b > a FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b > a FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b <= a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b <= a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b >= a FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b >= a FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b = a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b = a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c = a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c = a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c > a FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c > a FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="
$SETTINGS;
CREATE TABLE hypothesis_test.test2 (
i UInt64,
a UInt64,
@ -57,22 +56,20 @@ CREATE TABLE hypothesis_test.test2 (
) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO hypothesis_test.test2 VALUES
(0, 1, 2),
(1, 2, 1),
(2, 2, 2),
(3, 1, 0)"
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;
INSERT INTO hypothesis_test.test2 VALUES (0, 1, 2), (1, 2, 1), (2, 2, 2), (3, 1, 0)"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a < b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a < b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a = b FORMAT JSON" | grep "rows_read" # 1
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a = b FORMAT JSON" | grep "rows_read" # 1
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a != b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a != b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="
$SETTINGS;
CREATE TABLE hypothesis_test.test3 (
i UInt64,
a UInt64,
@ -81,22 +78,21 @@ CREATE TABLE hypothesis_test.test3 (
) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
"
$CLICKHOUSE_CLIENT --query="INSERT INTO hypothesis_test.test3 VALUES
(0, 1, 2),
(1, 2, 1),
(2, 2, 2),
(3, 1, 0)"
$CLICKHOUSE_CLIENT -n --query="
$SETTINGS;
INSERT INTO hypothesis_test.test3 VALUES (0, 1, 2), (1, 2, 1), (2, 2, 2), (3, 1, 0)"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a < b FORMAT JSON" | grep "rows_read" # 3
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a < b FORMAT JSON" | grep "rows_read" # 3
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a = b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a = b FORMAT JSON" | grep "rows_read" # 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a != b FORMAT JSON" | grep "rows_read" # 3
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a != b FORMAT JSON" | grep "rows_read" # 3
$CLICKHOUSE_CLIENT -n --query="
$SETTINGS;
DROP TABLE hypothesis_test.test;
DROP TABLE hypothesis_test.test2;
DROP TABLE hypothesis_test.test3;