mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
more optimization
This commit is contained in:
parent
92e4f1370e
commit
10fecb9183
@ -30,7 +30,7 @@ void splitMultiLogic(ASTPtr & node)
|
||||
}
|
||||
}
|
||||
|
||||
/// Push NOT to leafs
|
||||
/// Push NOT to leafs, remove NOT NOT ...
|
||||
void traversePushNot(ASTPtr & node, bool add_negation)
|
||||
{
|
||||
auto * func = node->as<ASTFunction>();
|
||||
@ -196,7 +196,7 @@ ASTPtr TreeCNFConverter::fromCNF(const CNFQuery & cnf)
|
||||
or_groups.push_back(makeASTFunction("or"));
|
||||
auto * func = or_groups.back()->as<ASTFunction>();
|
||||
for (const auto & ast : group)
|
||||
func->arguments->children.push_back(ast);
|
||||
func->arguments->children.push_back(ast->clone());
|
||||
}
|
||||
}
|
||||
|
||||
@ -211,6 +211,90 @@ ASTPtr TreeCNFConverter::fromCNF(const CNFQuery & cnf)
|
||||
return res;
|
||||
}
|
||||
|
||||
void pullNotOut(ASTPtr & node)
|
||||
{
|
||||
static const std::map<std::string, std::string> inverse_relations = {
|
||||
{"notEquals", "equals"},
|
||||
{"greaterOrEquals", "less"},
|
||||
{"greater", "lessOrEquals"},
|
||||
{"notIn", "in"},
|
||||
{"notLike", "like"},
|
||||
{"notEmpty", "empty"},
|
||||
};
|
||||
|
||||
auto * func = node->as<ASTFunction>();
|
||||
if (!func)
|
||||
return;
|
||||
if (auto it = inverse_relations.find(func->name); it != std::end(inverse_relations))
|
||||
{
|
||||
/// inverse func
|
||||
node = node->clone();
|
||||
auto * new_func = node->as<ASTFunction>();
|
||||
new_func->name = it->second;
|
||||
/// add not
|
||||
node = makeASTFunction("not", node);
|
||||
}
|
||||
}
|
||||
|
||||
void pushNotIn(ASTPtr & node)
|
||||
{
|
||||
static const std::map<std::string, std::string> inverse_relations = {
|
||||
{"equals", "notEquals"},
|
||||
{"less", "greaterOrEquals"},
|
||||
{"lessOrEquals", "greater"},
|
||||
{"in", "notIn"},
|
||||
{"like", "notLike"},
|
||||
{"empty", "notEmpty"},
|
||||
};
|
||||
|
||||
auto * func = node->as<ASTFunction>();
|
||||
if (!func)
|
||||
return;
|
||||
if (auto it = inverse_relations.find(func->name); it != std::end(inverse_relations))
|
||||
{
|
||||
/// inverse func
|
||||
node = node->clone();
|
||||
auto * new_func = node->as<ASTFunction>();
|
||||
new_func->name = it->second;
|
||||
/// add not
|
||||
node = makeASTFunction("not", node);
|
||||
}
|
||||
}
|
||||
|
||||
CNFQuery & CNFQuery::pullNotOutFunctions()
|
||||
{
|
||||
transformAtoms([](const ASTPtr & node) -> ASTPtr
|
||||
{
|
||||
auto * func = node->as<ASTFunction>();
|
||||
if (!func)
|
||||
return node;
|
||||
ASTPtr result = node->clone();
|
||||
if (func->name == "not")
|
||||
pullNotOut(func->arguments->children.front());
|
||||
else
|
||||
pullNotOut(result);
|
||||
traversePushNot(result, false);
|
||||
return result;
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
|
||||
CNFQuery & CNFQuery::pushNotInFuntions()
|
||||
{
|
||||
transformAtoms([](const ASTPtr & node) -> ASTPtr
|
||||
{
|
||||
auto * func = node->as<ASTFunction>();
|
||||
if (!func)
|
||||
return node;
|
||||
ASTPtr result = node->clone();
|
||||
if (func->name == "not")
|
||||
pushNotIn(func->arguments->children.front());
|
||||
traversePushNot(result, false);
|
||||
return result;
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::string CNFQuery::dump() const
|
||||
{
|
||||
std::stringstream res;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
@ -14,11 +15,11 @@ public:
|
||||
using OrGroup = std::set<ASTPtr>; // Add NOT container???
|
||||
using AndGroup = std::set<OrGroup>;
|
||||
|
||||
CNFQuery(AndGroup && statements_)
|
||||
: statements(std::move(statements_)) {}
|
||||
CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { }
|
||||
|
||||
template <typename P>
|
||||
void filterGroups(P predicate) {
|
||||
CNFQuery & filterGroups(P predicate) /// delete always true groups
|
||||
{
|
||||
AndGroup filtered;
|
||||
for (const auto & or_group : statements)
|
||||
{
|
||||
@ -26,28 +27,78 @@ public:
|
||||
filtered.insert(or_group);
|
||||
}
|
||||
std::swap(statements, filtered);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
void filterAtoms(P predicate) {
|
||||
CNFQuery & filterAtoms(P predicate) /// delete always false atoms
|
||||
{
|
||||
AndGroup filtered;
|
||||
for (const auto & or_group : statements)
|
||||
{
|
||||
OrGroup filtered_group;
|
||||
for (auto ast : or_group) {
|
||||
for (auto ast : or_group)
|
||||
{
|
||||
if (predicate(ast))
|
||||
filtered_group.insert(ast);
|
||||
}
|
||||
if (!filtered_group.empty())
|
||||
filtered.insert(filtered_group);
|
||||
else
|
||||
{
|
||||
/// all atoms false -> group false -> CNF false
|
||||
filtered.clear();
|
||||
filtered_group.clear();
|
||||
filtered_group.insert(std::make_shared<ASTLiteral>(static_cast<UInt8>(0)));
|
||||
filtered.insert(filtered_group);
|
||||
std::swap(statements, filtered);
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
std::swap(statements, filtered);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
CNFQuery & transformGroups(F func)
|
||||
{
|
||||
AndGroup result;
|
||||
for (const auto & group : statements)
|
||||
{
|
||||
auto new_group = func(group);
|
||||
if (!new_group.empty())
|
||||
result.insert(std::move(new_group));
|
||||
}
|
||||
std::swap(statements, result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
CNFQuery & transformAtoms(F func)
|
||||
{
|
||||
transformGroups([func](const OrGroup & group) -> OrGroup
|
||||
{
|
||||
OrGroup result;
|
||||
for (const auto & ast : group)
|
||||
{
|
||||
auto new_ast = func(ast);
|
||||
if (new_ast)
|
||||
result.insert(std::move(new_ast));
|
||||
}
|
||||
return result;
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
|
||||
const AndGroup & getStatements() const { return statements; }
|
||||
|
||||
std::string dump() const;
|
||||
|
||||
/// Converts != -> NOT =; <,>= -> (NOT) <; >,<= -> (NOT) <= for simpler matching
|
||||
CNFQuery & pullNotOutFunctions();
|
||||
/// Revert pullNotOutFunctions actions
|
||||
CNFQuery & pushNotInFuntions();
|
||||
|
||||
private:
|
||||
AndGroup statements;
|
||||
};
|
||||
|
@ -514,6 +514,10 @@ void optimizeWithConstraints(ASTSelectQuery * select_query, Aliases & aliases, c
|
||||
const StorageMetadataPtr & metadata_snapshot)
|
||||
{
|
||||
WhereConstraintsOptimizer(select_query, aliases, source_columns_set, tables_with_columns, metadata_snapshot).perform();
|
||||
if (select_query->where())
|
||||
Poco::Logger::get("KEK").information(select_query->where()->dumpTree());
|
||||
else
|
||||
Poco::Logger::get("KEK").information("NO WHERE");
|
||||
}
|
||||
|
||||
/// transform where to CNF for more convenient optimization
|
||||
@ -524,6 +528,8 @@ void convertQueryToCNF(ASTSelectQuery * select_query)
|
||||
auto cnf_form = TreeCNFConverter::toCNF(select_query->where());
|
||||
select_query->refWhere() = TreeCNFConverter::fromCNF(cnf_form);
|
||||
}
|
||||
if (select_query->where())
|
||||
Poco::Logger::get("KEK").information(select_query->where()->dumpTree());
|
||||
}
|
||||
|
||||
/// Remove duplicated columns from USING(...).
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/WhereConstraintsOptimizer.h>
|
||||
|
||||
#include <Interpreters/TreeCNFConverter.h>
|
||||
#include <Interpreters/ConstraintMatcherVisitor.h>
|
||||
#include <Parsers/ASTConstraintDeclaration.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
@ -7,16 +8,17 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ConstraintMatcherVisitor::Data getConstraintData(const StorageMetadataPtr & metadata_snapshot)
|
||||
std::vector<std::vector<ASTPtr>> getConstraintData(const StorageMetadataPtr & metadata_snapshot)
|
||||
{
|
||||
ConstraintMatcherVisitor::Data constraint_data;
|
||||
std::vector<std::vector<ASTPtr>> constraint_data;
|
||||
|
||||
for (const auto & constraint : metadata_snapshot->getConstraints().filterConstraints(ConstraintsDescription::ConstraintType::ALWAYS_TRUE))
|
||||
for (const auto & constraint :
|
||||
metadata_snapshot->getConstraints().filterConstraints(ConstraintsDescription::ConstraintType::ALWAYS_TRUE))
|
||||
{
|
||||
const auto expr = constraint->as<ASTConstraintDeclaration>()->expr->clone();
|
||||
|
||||
constraint_data.constraints[expr->getTreeHash().second].push_back(expr);
|
||||
const auto cnf = TreeCNFConverter::toCNF(constraint->as<ASTConstraintDeclaration>()->expr->ptr())
|
||||
.pullNotOutFunctions();
|
||||
for (const auto & group : cnf.getStatements())
|
||||
constraint_data.emplace_back(std::begin(group), std::end(group));
|
||||
}
|
||||
|
||||
return constraint_data;
|
||||
@ -29,21 +31,134 @@ WhereConstraintsOptimizer::WhereConstraintsOptimizer(
|
||||
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns_*/,
|
||||
const StorageMetadataPtr & metadata_snapshot_)
|
||||
: select_query(select_query_)
|
||||
/* , aliases(aliases_)
|
||||
/* , aliases(aliases_)
|
||||
, source_columns_set(source_columns_set_)
|
||||
, tables_with_columns(tables_with_columns_)*/
|
||||
, metadata_snapshot(metadata_snapshot_)
|
||||
{
|
||||
}
|
||||
|
||||
bool checkIfGroupAlwaysTrue(const CNFQuery::OrGroup & group, const std::vector<std::vector<ASTPtr>> & constraints)
|
||||
{
|
||||
/// TODO: this is temporary; need to write more effective search
|
||||
/// TODO: go deeper into asts (a < b, a = b,...) with z3 or some visitor
|
||||
for (const auto & constraint : constraints) /// one constraint in group is enough,
|
||||
/// otherwise it's difficult to make judgements without using constraint solving (z3..)
|
||||
{
|
||||
bool group_always_true = true;
|
||||
for (const auto & constraint_ast : constraint)
|
||||
{
|
||||
bool found_match = false;
|
||||
for (const auto & group_ast : group)
|
||||
{
|
||||
bool match_means_ok = true;
|
||||
ASTPtr a = constraint_ast;
|
||||
ASTPtr b = group_ast;
|
||||
|
||||
{
|
||||
auto * func_a = a->as<ASTFunction>();
|
||||
if (func_a && func_a->name == "not")
|
||||
{
|
||||
a = func_a->arguments->children.front();
|
||||
match_means_ok ^= true;
|
||||
}
|
||||
}
|
||||
{
|
||||
auto * func_b = b->as<ASTFunction>();
|
||||
if (func_b && func_b->name == "not")
|
||||
{
|
||||
b = func_b->arguments->children.front();
|
||||
match_means_ok ^= true;
|
||||
}
|
||||
}
|
||||
|
||||
if (a->getTreeHash() == b->getTreeHash() &&
|
||||
a->getColumnName() == b->getColumnName())
|
||||
{
|
||||
found_match = match_means_ok;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found_match)
|
||||
{
|
||||
group_always_true = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (group_always_true)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool checkIfAtomAlwaysFalse(const ASTPtr & atom, const std::vector<std::vector<ASTPtr>> & constraints)
|
||||
{
|
||||
/// TODO: more efficient matching
|
||||
|
||||
for (const auto & constraint : constraints)
|
||||
{
|
||||
if (constraint.size() > 1)
|
||||
continue; /// TMP; Too hard to do something at current time (without more powerful instruments)
|
||||
|
||||
for (const auto & constraint_ast : constraint)
|
||||
{
|
||||
bool match_means_ok = true;
|
||||
ASTPtr a = constraint_ast;
|
||||
ASTPtr b = atom;
|
||||
|
||||
{
|
||||
auto * func_a = a->as<ASTFunction>();
|
||||
if (func_a && func_a->name == "not")
|
||||
{
|
||||
a = func_a->arguments->children.front();
|
||||
match_means_ok ^= true;
|
||||
}
|
||||
}
|
||||
{
|
||||
auto * func_b = b->as<ASTFunction>();
|
||||
if (func_b && func_b->name == "not")
|
||||
{
|
||||
b = func_b->arguments->children.front();
|
||||
match_means_ok ^= true;
|
||||
}
|
||||
}
|
||||
|
||||
Poco::Logger::get("MATCHER a").information(a->dumpTree());
|
||||
Poco::Logger::get("MATCHER b").information(b->dumpTree());
|
||||
Poco::Logger::get("MATCHER a>>").information(a->getColumnName());
|
||||
Poco::Logger::get("MATCHER b>>" ).information(b->getColumnName());
|
||||
if (a->getTreeHash() == b->getTreeHash() &&
|
||||
a->getColumnName() == b->getColumnName())
|
||||
{
|
||||
Poco::Logger::get("MATCH").information(std::to_string(static_cast<int>(match_means_ok)));
|
||||
return !match_means_ok;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void WhereConstraintsOptimizer::perform()
|
||||
{
|
||||
auto constraint_data = getConstraintData(metadata_snapshot);
|
||||
|
||||
/// TODO: split != -> NOT =, >= -> NOT <, ...
|
||||
|
||||
if (select_query->where())
|
||||
ConstraintMatcherVisitor(constraint_data).visit(select_query->refWhere());
|
||||
{
|
||||
auto cnf = TreeCNFConverter::toCNF(select_query->where());
|
||||
|
||||
cnf.pullNotOutFunctions()
|
||||
.filterGroups([&constraint_data](const auto & group)
|
||||
{ return !checkIfGroupAlwaysTrue(group, constraint_data); }) /// remove always true functions in CNF
|
||||
.filterAtoms([&constraint_data](const auto & ast)
|
||||
{ return !checkIfAtomAlwaysFalse(ast, constraint_data); }) /// TODO: remove always false atoms in CNF
|
||||
.pushNotInFuntions();
|
||||
|
||||
//ConstraintMatcherVisitor(constraint_data).visit(select_query->refWhere());
|
||||
Poco::Logger::get("AFTER OPT").information(cnf.dump());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WHERE, TreeCNFConverter::fromCNF(cnf));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,7 @@
|
||||
4
|
||||
0
|
||||
3
|
||||
0
|
||||
0
|
||||
4
|
||||
4
|
||||
0
|
||||
|
@ -4,18 +4,22 @@ DROP TABLE IF EXISTS constraint_test.constrained;
|
||||
SET optimize_using_constraints = 1;
|
||||
|
||||
CREATE DATABASE constraint_test;
|
||||
CREATE TABLE constraint_test.assumption (URL String, CONSTRAINT is_yandex ASSUME domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog;
|
||||
CREATE TABLE constraint_test.assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT c2 ASSUME URL > 'zzz', CONSTRAINT c3 CHECK isValidUTF8(URL)) ENGINE = TinyLog;
|
||||
|
||||
--- Add wrong rows in order to check optimization
|
||||
INSERT INTO constraint_test.assumption (URL) VALUES ('1');
|
||||
INSERT INTO constraint_test.assumption (URL) VALUES ('2');
|
||||
INSERT INTO constraint_test.assumption (URL) VALUES ('yandex.ru');
|
||||
INSERT INTO constraint_test.assumption (URL) VALUES ('3');
|
||||
INSERT INTO constraint_test.assumption (URL, a) VALUES ('1', 1);
|
||||
INSERT INTO constraint_test.assumption (URL, a) VALUES ('2', 2);
|
||||
INSERT INTO constraint_test.assumption (URL, a) VALUES ('yandex.ru', 3);
|
||||
INSERT INTO constraint_test.assumption (URL, a) VALUES ('3', 4);
|
||||
|
||||
SELECT count() FROM constraint_test.assumption WHERE domainWithoutWWW(URL) = 'yandex.ru'; --- assumption -> 4
|
||||
SELECT count() FROM constraint_test.assumption WHERE NOT (domainWithoutWWW(URL) = 'yandex.ru'); --- assumption -> 0
|
||||
SELECT count() FROM constraint_test.assumption WHERE domainWithoutWWW(URL) != 'yandex.ru'; --- not optimized -> 3
|
||||
SELECT count() FROM constraint_test.assumption WHERE domainWithoutWWW(URL) != 'yandex.ru'; --- assumption -> 0
|
||||
SELECT count() FROM constraint_test.assumption WHERE domainWithoutWWW(URL) = 'nothing'; --- not optimized -> 0
|
||||
|
||||
SELECT count() FROM constraint_test.assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz'); ---> assumption -> 4
|
||||
SELECT count() FROM constraint_test.assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4
|
||||
SELECT count() FROM constraint_test.assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL = '111'); ---> assumption & no assumption -> 0
|
||||
|
||||
DROP TABLE constraint_test.assumption;
|
||||
DROP DATABASE constraint_test;
|
Loading…
Reference in New Issue
Block a user