mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
improvements
This commit is contained in:
parent
125498c97d
commit
496d8ff46c
@ -12,10 +12,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
AddIndexConstraintsOptimizer::AddIndexConstraintsOptimizer(
|
||||
const StorageMetadataPtr & metadata_snapshot_)
|
||||
@ -124,8 +120,10 @@ namespace
|
||||
{
|
||||
Poco::Logger::get("INDEX_HINT_CREATE").information("CHECK");
|
||||
const auto * func = atom.ast->as<ASTFunction>();
|
||||
if (func && func->arguments->children.size() == 2 && getRelationMap().contains(func->name)) {
|
||||
auto check_and_insert = [&](const size_t index, const ComparisonGraph::CompareResult need_result) -> bool {
|
||||
if (func && func->arguments->children.size() == 2 && getRelationMap().contains(func->name))
|
||||
{
|
||||
auto check_and_insert = [&](const size_t index, const ComparisonGraph::CompareResult need_result) -> bool
|
||||
{
|
||||
if (!onlyConstants(func->arguments->children[1 - index]))
|
||||
return false;
|
||||
|
||||
@ -167,13 +165,14 @@ void AddIndexConstraintsOptimizer::perform(CNFQuery & cnf_query)
|
||||
const std::unordered_set<std::string_view> primary_key_set(std::begin(primary_key), std::end(primary_key));
|
||||
|
||||
ASTs primary_key_only_asts;
|
||||
for (const auto & vertex : graph.getVertexes())
|
||||
for (const auto & vertex : graph.getVertices())
|
||||
for (const auto & ast : vertex)
|
||||
if (hasIndexColumns(ast, primary_key_set) && onlyIndexColumns(ast, primary_key_set))
|
||||
primary_key_only_asts.push_back(ast);
|
||||
|
||||
CNFQuery::AndGroup and_group;
|
||||
cnf_query.iterateGroups([&and_group, &graph, &primary_key_only_asts](const auto & or_group) {
|
||||
cnf_query.iterateGroups([&and_group, &graph, &primary_key_only_asts](const auto & or_group)
|
||||
{
|
||||
auto add_group = createIndexHintGroup(or_group, graph, primary_key_only_asts);
|
||||
if (!add_group.empty())
|
||||
and_group.emplace(std::move(add_group));
|
||||
|
@ -36,25 +36,29 @@ ASTPtr ComparisonGraph::normalizeAtom(const ASTPtr & atom) const
|
||||
|
||||
ComparisonGraph::ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas)
|
||||
{
|
||||
static const std::map<std::string, Edge::Type> relation_to_enum = {
|
||||
static const std::unordered_map<std::string, Edge::Type> relation_to_enum =
|
||||
{
|
||||
{"equals", Edge::Type::EQUAL},
|
||||
{"less", Edge::Type::LESS},
|
||||
{"lessOrEquals", Edge::Type::LESS_OR_EQUAL},
|
||||
};
|
||||
|
||||
Graph g;
|
||||
for (const auto & atom_raw : atomic_formulas) {
|
||||
for (const auto & atom_raw : atomic_formulas)
|
||||
{
|
||||
const auto atom = normalizeAtom(atom_raw);
|
||||
|
||||
const auto bad_term = std::numeric_limits<std::size_t>::max();
|
||||
auto get_index = [](const ASTPtr & ast, Graph & asts_graph) -> std::size_t {
|
||||
auto get_index = [](const ASTPtr & ast, Graph & asts_graph) -> std::size_t
|
||||
{
|
||||
const auto it = asts_graph.ast_hash_to_component.find(ast->getTreeHash());
|
||||
if (it != std::end(asts_graph.ast_hash_to_component))
|
||||
{
|
||||
if (!std::any_of(
|
||||
std::cbegin(asts_graph.vertexes[it->second].asts),
|
||||
std::cend(asts_graph.vertexes[it->second].asts),
|
||||
[ast](const ASTPtr & constraint_ast) {
|
||||
std::cbegin(asts_graph.vertices[it->second].asts),
|
||||
std::cend(asts_graph.vertices[it->second].asts),
|
||||
[ast](const ASTPtr & constraint_ast)
|
||||
{
|
||||
return constraint_ast->getTreeHash() == ast->getTreeHash()
|
||||
&& constraint_ast->getColumnName() == ast->getColumnName();
|
||||
}))
|
||||
@ -66,10 +70,10 @@ ComparisonGraph::ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas)
|
||||
}
|
||||
else
|
||||
{
|
||||
asts_graph.ast_hash_to_component[ast->getTreeHash()] = asts_graph.vertexes.size();
|
||||
asts_graph.vertexes.push_back(EqualComponent{{ast}});
|
||||
asts_graph.ast_hash_to_component[ast->getTreeHash()] = asts_graph.vertices.size();
|
||||
asts_graph.vertices.push_back(EqualComponent{{ast}});
|
||||
asts_graph.edges.emplace_back();
|
||||
return asts_graph.vertexes.size() - 1;
|
||||
return asts_graph.vertices.size() - 1;
|
||||
}
|
||||
};
|
||||
|
||||
@ -263,15 +267,18 @@ std::optional<std::size_t> ComparisonGraph::getComponentId(const ASTPtr & ast) c
|
||||
return {};
|
||||
const size_t index = hash_it->second;
|
||||
if (std::any_of(
|
||||
std::cbegin(graph.vertexes[index].asts),
|
||||
std::cend(graph.vertexes[index].asts),
|
||||
std::cbegin(graph.vertices[index].asts),
|
||||
std::cend(graph.vertices[index].asts),
|
||||
[ast](const ASTPtr & constraint_ast)
|
||||
{
|
||||
return constraint_ast->getTreeHash() == ast->getTreeHash() &&
|
||||
constraint_ast->getColumnName() == ast->getColumnName();
|
||||
})) {
|
||||
}))
|
||||
{
|
||||
return index;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@ -283,18 +290,21 @@ bool ComparisonGraph::hasPath(const size_t left, const size_t right) const
|
||||
|
||||
std::vector<ASTPtr> ComparisonGraph::getComponent(const std::size_t id) const
|
||||
{
|
||||
return graph.vertexes[id].asts;
|
||||
return graph.vertices[id].asts;
|
||||
}
|
||||
|
||||
bool ComparisonGraph::EqualComponent::hasConstant() const {
|
||||
bool ComparisonGraph::EqualComponent::hasConstant() const
|
||||
{
|
||||
return constant_index != -1;
|
||||
}
|
||||
|
||||
ASTPtr ComparisonGraph::EqualComponent::getConstant() const {
|
||||
ASTPtr ComparisonGraph::EqualComponent::getConstant() const
|
||||
{
|
||||
return asts[constant_index];
|
||||
}
|
||||
|
||||
void ComparisonGraph::EqualComponent::buildConstants() {
|
||||
void ComparisonGraph::EqualComponent::buildConstants()
|
||||
{
|
||||
constant_index = -1;
|
||||
for (size_t i = 0; i < asts.size(); ++i)
|
||||
{
|
||||
@ -308,7 +318,8 @@ void ComparisonGraph::EqualComponent::buildConstants() {
|
||||
|
||||
ComparisonGraph::CompareResult ComparisonGraph::getCompareResult(const std::string & name)
|
||||
{
|
||||
static const std::unordered_map<std::string, CompareResult> relation_to_compare = {
|
||||
static const std::unordered_map<std::string, CompareResult> relation_to_compare =
|
||||
{
|
||||
{"equals", CompareResult::EQUAL},
|
||||
{"notEquals", CompareResult::NOT_EQUAL},
|
||||
{"less", CompareResult::LESS},
|
||||
@ -323,7 +334,8 @@ ComparisonGraph::CompareResult ComparisonGraph::getCompareResult(const std::stri
|
||||
|
||||
ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(const CompareResult result)
|
||||
{
|
||||
static const std::unordered_map<CompareResult, CompareResult> inverse_relations = {
|
||||
static const std::unordered_map<CompareResult, CompareResult> inverse_relations =
|
||||
{
|
||||
{CompareResult::NOT_EQUAL, CompareResult::EQUAL},
|
||||
{CompareResult::EQUAL, CompareResult::NOT_EQUAL},
|
||||
{CompareResult::GREATER_OR_EQUAL, CompareResult::LESS},
|
||||
@ -341,8 +353,8 @@ std::optional<ASTPtr> ComparisonGraph::getEqualConst(const ASTPtr & ast) const
|
||||
if (hash_it == std::end(graph.ast_hash_to_component))
|
||||
return std::nullopt;
|
||||
const size_t index = hash_it->second;
|
||||
return graph.vertexes[index].hasConstant()
|
||||
? std::optional<ASTPtr>{graph.vertexes[index].getConstant()}
|
||||
return graph.vertices[index].hasConstant()
|
||||
? std::optional<ASTPtr>{graph.vertices[index].getConstant()}
|
||||
: std::nullopt;
|
||||
}
|
||||
|
||||
@ -360,7 +372,7 @@ std::optional<std::pair<Field, bool>> ComparisonGraph::getConstUpperBound(const
|
||||
const ssize_t from = ast_const_upper_bound[to];
|
||||
if (from == -1)
|
||||
return std::nullopt;
|
||||
return std::make_pair(graph.vertexes[from].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
|
||||
return std::make_pair(graph.vertices[from].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
|
||||
}
|
||||
|
||||
std::optional<std::pair<Field, bool>> ComparisonGraph::getConstLowerBound(const ASTPtr & ast) const
|
||||
@ -377,7 +389,7 @@ std::optional<std::pair<Field, bool>> ComparisonGraph::getConstLowerBound(const
|
||||
const ssize_t to = ast_const_lower_bound[from];
|
||||
if (to == -1)
|
||||
return std::nullopt;
|
||||
return std::make_pair(graph.vertexes[to].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
|
||||
return std::make_pair(graph.vertices[to].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::LESS);
|
||||
}
|
||||
|
||||
void ComparisonGraph::dfsOrder(const Graph & asts_graph, size_t v, std::vector<bool> & visited, std::vector<size_t> & order) const
|
||||
@ -397,9 +409,9 @@ ComparisonGraph::Graph ComparisonGraph::reverseGraph(const Graph & asts_graph) c
|
||||
{
|
||||
Graph g;
|
||||
g.ast_hash_to_component = asts_graph.ast_hash_to_component;
|
||||
g.vertexes = asts_graph.vertexes;
|
||||
g.edges.resize(g.vertexes.size());
|
||||
for (size_t v = 0; v < asts_graph.vertexes.size(); ++v)
|
||||
g.vertices = asts_graph.vertices;
|
||||
g.edges.resize(g.vertices.size());
|
||||
for (size_t v = 0; v < asts_graph.vertices.size(); ++v)
|
||||
{
|
||||
for (const auto & edge : asts_graph.edges[v])
|
||||
{
|
||||
@ -409,10 +421,10 @@ ComparisonGraph::Graph ComparisonGraph::reverseGraph(const Graph & asts_graph) c
|
||||
return asts_graph;
|
||||
}
|
||||
|
||||
std::vector<ASTs> ComparisonGraph::getVertexes() const
|
||||
std::vector<ASTs> ComparisonGraph::getVertices() const
|
||||
{
|
||||
std::vector<ASTs> result;
|
||||
for (const auto & vertex : graph.vertexes)
|
||||
for (const auto & vertex : graph.vertices)
|
||||
{
|
||||
result.emplace_back();
|
||||
for (const auto & ast : vertex.asts)
|
||||
@ -438,7 +450,7 @@ ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAstsGraph(const Graph & as
|
||||
{
|
||||
Poco::Logger::get("Graph").information("building");
|
||||
/// Find strongly connected component
|
||||
const auto n = asts_graph.vertexes.size();
|
||||
const auto n = asts_graph.vertices.size();
|
||||
|
||||
std::vector<size_t> order;
|
||||
{
|
||||
@ -466,19 +478,19 @@ ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAstsGraph(const Graph & as
|
||||
}
|
||||
|
||||
Graph result;
|
||||
result.vertexes.resize(component);
|
||||
result.vertices.resize(component);
|
||||
result.edges.resize(component);
|
||||
for (const auto & [hash, index] : asts_graph.ast_hash_to_component)
|
||||
{
|
||||
result.ast_hash_to_component[hash] = components[index];
|
||||
result.vertexes[components[index]].asts.insert(
|
||||
std::end(result.vertexes[components[index]].asts),
|
||||
std::begin(asts_graph.vertexes[index].asts),
|
||||
std::end(asts_graph.vertexes[index].asts)); // asts_graph has only one ast per vertex
|
||||
result.vertices[components[index]].asts.insert(
|
||||
std::end(result.vertices[components[index]].asts),
|
||||
std::begin(asts_graph.vertices[index].asts),
|
||||
std::end(asts_graph.vertices[index].asts)); // asts_graph has only one ast per vertex
|
||||
}
|
||||
|
||||
/// Calculate constants
|
||||
for (auto & vertex : result.vertexes)
|
||||
for (auto & vertex : result.vertices)
|
||||
{
|
||||
vertex.buildConstants();
|
||||
}
|
||||
@ -494,16 +506,16 @@ ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAstsGraph(const Graph & as
|
||||
// TODO: make edges unique (left most strict)
|
||||
}
|
||||
|
||||
for (size_t v = 0; v < result.vertexes.size(); ++v)
|
||||
for (size_t v = 0; v < result.vertices.size(); ++v)
|
||||
{
|
||||
for (size_t u = 0; u < result.vertexes.size(); ++u)
|
||||
for (size_t u = 0; u < result.vertices.size(); ++u)
|
||||
{
|
||||
if (v == u)
|
||||
continue;
|
||||
if (result.vertexes[v].hasConstant() && result.vertexes[u].hasConstant())
|
||||
if (result.vertices[v].hasConstant() && result.vertices[u].hasConstant())
|
||||
{
|
||||
const auto * left = result.vertexes[v].getConstant()->as<ASTLiteral>();
|
||||
const auto * right = result.vertexes[u].getConstant()->as<ASTLiteral>();
|
||||
const auto * left = result.vertices[v].getConstant()->as<ASTLiteral>();
|
||||
const auto * right = result.vertices[u].getConstant()->as<ASTLiteral>();
|
||||
|
||||
/// Only less. Equal constant fields = equal literals so it was already considered above.
|
||||
if (left->value > right->value)
|
||||
@ -521,7 +533,7 @@ std::map<std::pair<size_t, size_t>, ComparisonGraph::Path> ComparisonGraph::Buil
|
||||
{
|
||||
// min path : < = -1, =< = 0
|
||||
const auto inf = std::numeric_limits<int8_t>::max();
|
||||
const size_t n = graph.vertexes.size();
|
||||
const size_t n = graph.vertices.size();
|
||||
std::vector<std::vector<int8_t>> results(n, std::vector<int8_t>(n, inf));
|
||||
for (size_t v = 0; v < n; ++v)
|
||||
{
|
||||
@ -547,24 +559,27 @@ std::map<std::pair<size_t, size_t>, ComparisonGraph::Path> ComparisonGraph::Buil
|
||||
|
||||
std::pair<std::vector<ssize_t>, std::vector<ssize_t>> ComparisonGraph::buildConstBounds() const
|
||||
{
|
||||
const size_t n = graph.vertexes.size();
|
||||
const size_t n = graph.vertices.size();
|
||||
std::vector<ssize_t> lower(n, -1);
|
||||
std::vector<ssize_t> upper(n, -1);
|
||||
|
||||
auto get_value = [this](const size_t vertex) -> Field {
|
||||
return graph.vertexes[vertex].getConstant()->as<ASTLiteral>()->value;
|
||||
auto get_value = [this](const size_t vertex) -> Field
|
||||
{
|
||||
return graph.vertices[vertex].getConstant()->as<ASTLiteral>()->value;
|
||||
};
|
||||
|
||||
for (const auto & [edge, path] : dists)
|
||||
{
|
||||
const auto [from, to] = edge;
|
||||
if (graph.vertexes[to].hasConstant()) {
|
||||
if (graph.vertices[to].hasConstant())
|
||||
{
|
||||
if (lower[from] == -1
|
||||
|| get_value(lower[from]) > get_value(to)
|
||||
|| (get_value(lower[from]) >= get_value(to) && dists.at({from, to}) == Path::LESS))
|
||||
lower[from] = to;
|
||||
}
|
||||
if (graph.vertexes[from].hasConstant()) {
|
||||
if (graph.vertices[from].hasConstant())
|
||||
{
|
||||
if (upper[to] == -1
|
||||
|| get_value(upper[to]) < get_value(from)
|
||||
|| (get_value(upper[to]) <= get_value(from) && dists.at({from, to}) == Path::LESS))
|
||||
|
@ -53,7 +53,7 @@ public:
|
||||
std::optional<std::pair<Field, bool>> getConstUpperBound(const ASTPtr & ast) const;
|
||||
std::optional<std::pair<Field, bool>> getConstLowerBound(const ASTPtr & ast) const;
|
||||
|
||||
std::vector<ASTs> getVertexes() const;
|
||||
std::vector<ASTs> getVertices() const;
|
||||
|
||||
private:
|
||||
/// strongly connected component
|
||||
@ -86,14 +86,16 @@ private:
|
||||
|
||||
struct Graph
|
||||
{
|
||||
struct ASTHash {
|
||||
size_t operator() (const IAST::Hash & hash) const {
|
||||
struct ASTHash
|
||||
{
|
||||
size_t operator() (const IAST::Hash & hash) const
|
||||
{
|
||||
return hash.first;
|
||||
}
|
||||
};
|
||||
|
||||
std::unordered_map<IAST::Hash, size_t, ASTHash> ast_hash_to_component;
|
||||
std::vector<EqualComponent> vertexes;
|
||||
std::vector<EqualComponent> vertices;
|
||||
std::vector<std::vector<Edge>> edges;
|
||||
};
|
||||
|
||||
|
@ -1 +1,38 @@
|
||||
#include "ConstraintMatcherVisitor.h"
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ConstraintMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||
{
|
||||
return node->as<ASTFunction>() || node->as<ASTExpressionList>();
|
||||
}
|
||||
|
||||
std::optional<bool> ConstraintMatcher::getASTValue(const ASTPtr & node, Data & data)
|
||||
{
|
||||
const auto it = data.constraints.find(node->getTreeHash().second);
|
||||
if (it != std::end(data.constraints))
|
||||
{
|
||||
for (const auto & ast : it->second)
|
||||
{
|
||||
if (node->getColumnName() == ast->getColumnName())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void ConstraintMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (const auto always_value = getASTValue(ast, data); always_value)
|
||||
ast = std::make_shared<ASTLiteral>(static_cast<UInt8>(*always_value));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -21,25 +14,9 @@ struct ConstraintMatcher
|
||||
|
||||
using Visitor = InDepthNodeVisitor<ConstraintMatcher, true>;
|
||||
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr &) { return node->as<ASTFunction>() || node->as<ASTExpressionList>(); }
|
||||
|
||||
static std::optional<bool> getASTValue(const ASTPtr & node, Data & data) {
|
||||
const auto it = data.constraints.find(node->getTreeHash().second);
|
||||
if (it != std::end(data.constraints)) {
|
||||
for (const auto & ast : it->second) {
|
||||
if (node->getColumnName() == ast->getColumnName()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (const auto always_value = getASTValue(ast, data); always_value)
|
||||
ast = std::make_shared<ASTLiteral>(static_cast<UInt8>(*always_value));
|
||||
}
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr &);
|
||||
static std::optional<bool> getASTValue(const ASTPtr & node, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
};
|
||||
|
||||
using ConstraintMatcherVisitor = InDepthNodeVisitor<ConstraintMatcher, true>;
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Interpreters/TreeCNFConverter.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
namespace DB
|
||||
@ -37,7 +36,8 @@ void traversePushNot(ASTPtr & node, bool add_negation)
|
||||
|
||||
if (func && (func->name == "and" || func->name == "or"))
|
||||
{
|
||||
if (add_negation) {
|
||||
if (add_negation)
|
||||
{
|
||||
/// apply De Morgan's Law
|
||||
node = makeASTFunction(
|
||||
(func->name == "and" ? "or" : "and"),
|
||||
@ -302,7 +302,7 @@ CNFQuery & CNFQuery::pushNotInFuntions()
|
||||
|
||||
std::string CNFQuery::dump() const
|
||||
{
|
||||
std::stringstream res;
|
||||
WriteBufferFromOwnString res;
|
||||
bool first = true;
|
||||
for (const auto & group : statements)
|
||||
{
|
||||
|
@ -511,23 +511,23 @@ void optimizeLimitBy(const ASTSelectQuery * select_query)
|
||||
}
|
||||
|
||||
/// Use constraints to get rid of useless parts of query
|
||||
void optimizeWithConstraints(ASTSelectQuery * select_query, Aliases & aliases, const NameSet & source_columns_set,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
|
||||
void optimizeWithConstraints(ASTSelectQuery * select_query, Aliases & /*aliases*/, const NameSet & /*source_columns_set*/,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
|
||||
const StorageMetadataPtr & metadata_snapshot)
|
||||
{
|
||||
WhereConstraintsOptimizer(select_query, aliases, source_columns_set, tables_with_columns, metadata_snapshot).perform();
|
||||
WhereConstraintsOptimizer(select_query, metadata_snapshot).perform();
|
||||
if (select_query->where())
|
||||
Poco::Logger::get("CNF").information(select_query->where()->dumpTree());
|
||||
else
|
||||
Poco::Logger::get("CNF").information("NO WHERE");
|
||||
}
|
||||
|
||||
void optimizeSubstituteColumn(ASTSelectQuery * select_query, Aliases & aliases, const NameSet & source_columns_set,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
|
||||
void optimizeSubstituteColumn(ASTSelectQuery * select_query, Aliases & /*aliases*/, const NameSet & /*source_columns_set*/,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const ConstStoragePtr & storage)
|
||||
{
|
||||
SubstituteColumnOptimizer(select_query, aliases, source_columns_set, tables_with_columns, metadata_snapshot, storage).perform();
|
||||
SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
|
||||
}
|
||||
|
||||
/// transform where to CNF for more convenient optimization
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Interpreters/ComparisonGraph.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTConstraintDeclaration.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
#include <Interpreters/AddIndexConstraintsOptimizer.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
@ -13,21 +12,10 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
WhereConstraintsOptimizer::WhereConstraintsOptimizer(
|
||||
ASTSelectQuery * select_query_,
|
||||
Aliases & /*aliases_*/,
|
||||
const NameSet & /*source_columns_set_*/,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns_*/,
|
||||
const StorageMetadataPtr & metadata_snapshot_)
|
||||
: select_query(select_query_)
|
||||
/* , aliases(aliases_)
|
||||
, source_columns_set(source_columns_set_)
|
||||
, tables_with_columns(tables_with_columns_)*/
|
||||
, metadata_snapshot(metadata_snapshot_)
|
||||
{
|
||||
}
|
||||
@ -181,13 +169,18 @@ void WhereConstraintsOptimizer::perform()
|
||||
auto cnf = TreeCNFConverter::toCNF(select_query->where());
|
||||
Poco::Logger::get("BEFORE OPT").information(cnf.dump());
|
||||
cnf.pullNotOutFunctions()
|
||||
.filterAlwaysTrueGroups([&constraint_data, &compare_graph](const auto & group) { /// remove always true groups from CNF
|
||||
.filterAlwaysTrueGroups([&constraint_data, &compare_graph](const auto & group)
|
||||
{
|
||||
/// remove always true groups from CNF
|
||||
return !checkIfGroupAlwaysTrueFullMatch(group, constraint_data) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
|
||||
})
|
||||
.filterAlwaysFalseAtoms([&constraint_data, &compare_graph](const auto & atom) { /// remove always false atoms from CNF
|
||||
.filterAlwaysFalseAtoms([&constraint_data, &compare_graph](const auto & atom)
|
||||
{
|
||||
/// remove always false atoms from CNF
|
||||
return !checkIfAtomAlwaysFalseFullMatch(atom, constraint_data) && !checkIfAtomAlwaysFalseGraph(atom, compare_graph);
|
||||
})
|
||||
.transformAtoms([&compare_graph](const auto & atom) {
|
||||
.transformAtoms([&compare_graph](const auto & atom)
|
||||
{
|
||||
return replaceTermsToConstants(atom, compare_graph);
|
||||
})
|
||||
.pushNotInFuntions();
|
||||
|
@ -17,17 +17,14 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
|
||||
class WhereConstraintsOptimizer final
|
||||
{
|
||||
public:
|
||||
WhereConstraintsOptimizer(ASTSelectQuery * select_query, Aliases & /* aliases */, const NameSet & /* source_columns_set */,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /* tables_with_columns */,
|
||||
WhereConstraintsOptimizer(
|
||||
ASTSelectQuery * select_query,
|
||||
const StorageMetadataPtr & metadata_snapshot);
|
||||
|
||||
void perform();
|
||||
|
||||
private:
|
||||
ASTSelectQuery * select_query;
|
||||
/*Aliases & aliases;
|
||||
const NameSet & source_columns_set;
|
||||
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns;*/
|
||||
const StorageMetadataPtr & metadata_snapshot;
|
||||
};
|
||||
|
||||
|
@ -10,7 +10,8 @@ namespace DB
|
||||
class ASTConstraintDeclaration : public IAST
|
||||
{
|
||||
public:
|
||||
enum class Type {
|
||||
enum class Type
|
||||
{
|
||||
CHECK,
|
||||
ASSUME,
|
||||
};
|
||||
|
@ -53,13 +53,15 @@ ASTs ConstraintsDescription::filterConstraints(ConstraintType selection) const
|
||||
case ASTConstraintDeclaration::Type::ASSUME:
|
||||
return static_cast<UInt32>(ConstraintType::ASSUME);
|
||||
}
|
||||
throw Exception("Unknown constraint type.", ErrorCodes::LOGICAL_ERROR);
|
||||
};
|
||||
|
||||
ASTs res;
|
||||
res.reserve(constraints.size());
|
||||
for (const auto & constraint : constraints)
|
||||
{
|
||||
if ((ast_to_decr_constraint_type(constraint->as<ASTConstraintDeclaration>()->type) & static_cast<UInt32>(selection)) != 0) {
|
||||
if ((ast_to_decr_constraint_type(constraint->as<ASTConstraintDeclaration>()->type) & static_cast<UInt32>(selection)) != 0)
|
||||
{
|
||||
res.push_back(constraint);
|
||||
}
|
||||
}
|
||||
@ -88,7 +90,8 @@ std::vector<CNFQuery::AtomicFormula> ConstraintsDescription::getAtomicConstraint
|
||||
Poco::Logger::get("atomic_formula: initial:").information(constraint->as<ASTConstraintDeclaration>()->expr->ptr()->dumpTree());
|
||||
const auto cnf = TreeCNFConverter::toCNF(constraint->as<ASTConstraintDeclaration>()->expr->ptr())
|
||||
.pullNotOutFunctions();
|
||||
for (const auto & group : cnf.getStatements()) {
|
||||
for (const auto & group : cnf.getStatements())
|
||||
{
|
||||
if (group.size() == 1)
|
||||
constraint_data.push_back(*group.begin());
|
||||
}
|
||||
|
@ -20,7 +20,8 @@ public:
|
||||
|
||||
static ConstraintsDescription parse(const String & str);
|
||||
|
||||
enum class ConstraintType {
|
||||
enum class ConstraintType
|
||||
{
|
||||
CHECK = 1,
|
||||
ASSUME = 2,
|
||||
ALWAYS_TRUE = CHECK | ASSUME,
|
||||
|
@ -2008,8 +2008,13 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
|
||||
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
|
||||
{
|
||||
if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin)
|
||||
{
|
||||
for (size_t i = 0; i < readers.size(); ++i)
|
||||
{
|
||||
granules[i] = readers[i].read();
|
||||
granules_filled = true;
|
||||
}
|
||||
}
|
||||
|
||||
MarkRange data_range(
|
||||
std::max(range.begin, index_mark * index_granularity),
|
||||
|
@ -83,7 +83,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co
|
||||
MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition(
|
||||
const SelectQueryInfo &, ContextPtr) const
|
||||
{
|
||||
return nullptr;
|
||||
throw Exception("Not supported", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const
|
||||
|
@ -13,7 +13,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
MergeTreeIndexMergedCondition::MergeTreeIndexMergedCondition(
|
||||
@ -103,7 +102,8 @@ ComparisonGraph::CompareResult getExpectedCompare(const CNFQuery::AtomicFormula
|
||||
}
|
||||
|
||||
/// Replaces < -> <=, > -> >= and assumes that all hypotheses are true then checks if path exists
|
||||
bool MergeTreeIndexMergedCondition::alwaysUnknownOrTrue() const {
|
||||
bool MergeTreeIndexMergedCondition::alwaysUnknownOrTrue() const
|
||||
{
|
||||
std::vector<ASTPtr> active_atomic_formulas(atomic_constraints);
|
||||
for (size_t i = 0; i < index_to_compare_atomic_hypotheses.size(); ++i)
|
||||
{
|
||||
|
@ -189,8 +189,8 @@ void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const
|
||||
|
||||
if (select.where())
|
||||
{
|
||||
Poco::Logger::get("MTPRWHERE WHE").information(select.where()->getColumnName());
|
||||
Poco::Logger::get("MTPRWHERE WHE").information(select.where()->dumpTree());
|
||||
Poco::Logger::get("MTPRWHERE WHERE").information(select.where()->getColumnName());
|
||||
Poco::Logger::get("MTPRWHERE WHERE").information(select.where()->dumpTree());
|
||||
}
|
||||
if(select.prewhere())
|
||||
{
|
||||
|
@ -13,10 +13,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -224,15 +220,9 @@ void bruteforce(
|
||||
|
||||
SubstituteColumnOptimizer::SubstituteColumnOptimizer(
|
||||
ASTSelectQuery * select_query_,
|
||||
Aliases & /*aliases_*/,
|
||||
const NameSet & /*source_columns_set_*/,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns_*/,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const ConstStoragePtr & storage_)
|
||||
: select_query(select_query_)
|
||||
/* , aliases(aliases_)
|
||||
, source_columns_set(source_columns_set_)
|
||||
, tables_with_columns(tables_with_columns_)*/
|
||||
, metadata_snapshot(metadata_snapshot_)
|
||||
, storage(storage_)
|
||||
{
|
||||
@ -261,7 +251,8 @@ void SubstituteColumnOptimizer::perform()
|
||||
ast->setAlias(ast->getAliasOrColumnName());
|
||||
}
|
||||
|
||||
auto run_for_all = [&](const auto func) {
|
||||
auto run_for_all = [&](const auto func)
|
||||
{
|
||||
if (select_query->where())
|
||||
func(select_query->refWhere(), false);
|
||||
if (select_query->prewhere())
|
||||
@ -279,7 +270,8 @@ void SubstituteColumnOptimizer::perform()
|
||||
ComponentVisitor::Data component_data(
|
||||
compare_graph, components, old_name, name_to_component, counter_id);
|
||||
std::unordered_set<String> identifiers;
|
||||
auto preprocess = [&](ASTPtr & ast, bool) {
|
||||
auto preprocess = [&](ASTPtr & ast, bool)
|
||||
{
|
||||
ComponentVisitor(component_data).visit(ast);
|
||||
collectIdentifiers(ast, identifiers);
|
||||
};
|
||||
@ -320,7 +312,8 @@ void SubstituteColumnOptimizer::perform()
|
||||
for (size_t i = 0; i < min_expressions.size(); ++i)
|
||||
id_to_expression_map[components_list[i]] = min_expressions[i];
|
||||
|
||||
auto process = [&](ASTPtr & ast, bool is_select) {
|
||||
auto process = [&](ASTPtr & ast, bool is_select)
|
||||
{
|
||||
SubstituteColumnVisitor::Data substitute_data{id_to_expression_map, name_to_component, old_name, is_select};
|
||||
SubstituteColumnVisitor(substitute_data).visit(ast);
|
||||
};
|
||||
|
@ -19,18 +19,13 @@ class SubstituteColumnOptimizer
|
||||
public:
|
||||
SubstituteColumnOptimizer(
|
||||
ASTSelectQuery * select_query,
|
||||
Aliases & /* aliases */, const NameSet & /* source_columns_set */,
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /* tables_with_columns */,
|
||||
const StorageMetadataPtr & /* metadata_snapshot */,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const ConstStoragePtr & storage);
|
||||
|
||||
void perform();
|
||||
|
||||
private:
|
||||
ASTSelectQuery * select_query;
|
||||
/*Aliases & aliases;
|
||||
const NameSet & source_columns_set;
|
||||
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns;*/
|
||||
const StorageMetadataPtr & metadata_snapshot;
|
||||
ConstStoragePtr storage;
|
||||
};
|
||||
|
@ -6,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
SETTINGS="SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1;"
|
||||
SETTINGS="SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1"
|
||||
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
$SETTINGS;
|
||||
DROP DATABASE IF EXISTS hypothesis_test;
|
||||
DROP TABLE IF EXISTS hypothesis_test.test;
|
||||
DROP TABLE IF EXISTS hypothesis_test.test2;
|
||||
@ -25,30 +26,28 @@ CREATE TABLE hypothesis_test.test (
|
||||
) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
|
||||
"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO hypothesis_test.test VALUES
|
||||
(0, 1, 2, 2),
|
||||
(1, 2, 1, 2),
|
||||
(2, 2, 2, 1),
|
||||
(3, 1, 2, 3)"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;
|
||||
INSERT INTO hypothesis_test.test VALUES (0, 1, 2, 2), (1, 2, 1, 2), (2, 2, 2, 1), (3, 1, 2, 3)"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b > a FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b > a FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b <= a FORMAT JSON" | grep "rows_read"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b <= a FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b >= a FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b >= a FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE b = a FORMAT JSON" | grep "rows_read"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE b = a FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c = a FORMAT JSON" | grep "rows_read"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c = a FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c > a FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c > a FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test WHERE c < a FORMAT JSON" | grep "rows_read"
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
$SETTINGS;
|
||||
CREATE TABLE hypothesis_test.test2 (
|
||||
i UInt64,
|
||||
a UInt64,
|
||||
@ -57,22 +56,20 @@ CREATE TABLE hypothesis_test.test2 (
|
||||
) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
|
||||
"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO hypothesis_test.test2 VALUES
|
||||
(0, 1, 2),
|
||||
(1, 2, 1),
|
||||
(2, 2, 2),
|
||||
(3, 1, 0)"
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;
|
||||
INSERT INTO hypothesis_test.test2 VALUES (0, 1, 2), (1, 2, 1), (2, 2, 2), (3, 1, 0)"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a < b FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a < b FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a = b FORMAT JSON" | grep "rows_read" # 1
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a = b FORMAT JSON" | grep "rows_read" # 1
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test2 WHERE a != b FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS; SELECT count() FROM hypothesis_test.test2 WHERE a != b FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
$SETTINGS;
|
||||
CREATE TABLE hypothesis_test.test3 (
|
||||
i UInt64,
|
||||
a UInt64,
|
||||
@ -81,22 +78,21 @@ CREATE TABLE hypothesis_test.test3 (
|
||||
) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity=1;
|
||||
"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO hypothesis_test.test3 VALUES
|
||||
(0, 1, 2),
|
||||
(1, 2, 1),
|
||||
(2, 2, 2),
|
||||
(3, 1, 0)"
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
$SETTINGS;
|
||||
INSERT INTO hypothesis_test.test3 VALUES (0, 1, 2), (1, 2, 1), (2, 2, 2), (3, 1, 0)"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a < b FORMAT JSON" | grep "rows_read" # 3
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a < b FORMAT JSON" | grep "rows_read" # 3
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a <= b FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a = b FORMAT JSON" | grep "rows_read" # 4
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a = b FORMAT JSON" | grep "rows_read" # 4
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT count() FROM hypothesis_test.test3 WHERE a != b FORMAT JSON" | grep "rows_read" # 3
|
||||
$CLICKHOUSE_CLIENT -n --query="$SETTINGS;SELECT count() FROM hypothesis_test.test3 WHERE a != b FORMAT JSON" | grep "rows_read" # 3
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
$SETTINGS;
|
||||
DROP TABLE hypothesis_test.test;
|
||||
DROP TABLE hypothesis_test.test2;
|
||||
DROP TABLE hypothesis_test.test3;
|
||||
|
Loading…
Reference in New Issue
Block a user