This commit is contained in:
Nikita Vasilev 2021-04-26 14:26:54 +03:00
parent 2896f9aa75
commit 5663db33b1
3 changed files with 152 additions and 48 deletions

View File

@ -203,25 +203,39 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con
}
std::vector<ASTPtr> ComparisonGraph::getEqual(const ASTPtr & ast) const
{
const auto res = getComponentId(ast);
if (!res)
return {};
else
return getComponent(res.value());
}
std::optional<std::size_t> ComparisonGraph::getComponentId(const ASTPtr & ast) const
{
const auto hash_it = graph.ast_hash_to_component.find(ast->getTreeHash());
if (hash_it == std::end(graph.ast_hash_to_component))
return {};
const size_t index = hash_it->second;
if (std::any_of(
std::cbegin(graph.vertexes[index].asts),
std::cend(graph.vertexes[index].asts),
[ast](const ASTPtr & constraint_ast)
{
return constraint_ast->getTreeHash() == ast->getTreeHash() &&
constraint_ast->getColumnName() == ast->getColumnName();
})) {
return graph.vertexes[index].asts;
std::cbegin(graph.vertexes[index].asts),
std::cend(graph.vertexes[index].asts),
[ast](const ASTPtr & constraint_ast)
{
return constraint_ast->getTreeHash() == ast->getTreeHash() &&
constraint_ast->getColumnName() == ast->getColumnName();
})) {
return index;
} else {
return {};
}
}
std::vector<ASTPtr> ComparisonGraph::getComponent(const std::size_t id) const
{
return graph.vertexes[id].asts;
}
bool ComparisonGraph::EqualComponent::hasConstant() const {
return constant_index != -1;
}

View File

@ -35,6 +35,9 @@ public:
std::vector<ASTPtr> getEqual(const ASTPtr & ast) const;
std::optional<ASTPtr> getEqualConst(const ASTPtr & ast) const;
std::optional<std::size_t> getComponentId(const ASTPtr & ast) const;
std::vector<ASTPtr> getComponent(const std::size_t id) const;
/// Find constants lessOrEqual and greaterOrEqual.
/// For int and double linear programming can be applied here.
/// Returns: {constant, is strict less/greater}

View File

@ -1,10 +1,9 @@
#include <Storages/MergeTree/SubstituteColumnOptimizer.h>
#include <Parsers/ASTConstraintDeclaration.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTConstraintDeclaration.h>
#include <Parsers/ASTIdentifier.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Parsers/ASTSelectQuery.h>
#include <Poco/Logger.h>
@ -21,52 +20,118 @@ namespace ErrorCodes
namespace
{
class SubstituteColumnMatcher
const String COMPONENT = "__constraint_component_";
class ComponentMatcher
{
public:
using Visitor = InDepthNodeVisitor<SubstituteColumnMatcher, true>;
using Visitor = InDepthNodeVisitor<ComponentMatcher, true>;
struct Data
{
const ComparisonGraph & graph;
ConstStoragePtr storage;
Data(const ComparisonGraph & graph_, const ConstStoragePtr & storage_)
: graph(graph_), storage(storage_)
Data(const ComparisonGraph & graph_)
: graph(graph_)
{
}
};
static void visit(ASTPtr & ast, Data & data)
{
const auto column_sizes = data.storage->getColumnSizes();
const auto id = data.graph.getComponentId(ast);
if (id)
ast = std::make_shared<ASTIdentifier>(COMPONENT + std::to_string(id.value()));
}
// like TreeRewriter
struct ColumnSizeTuple
static bool needChildVisit(const ASTPtr &, const ASTPtr &)
{
return true;
}
};
using ComponentVisitor = ComponentMatcher::Visitor;
class IdentifierSetMatcher
{
public:
using Visitor = InDepthNodeVisitor<IdentifierSetMatcher, true>;
struct Data
{
std::unordered_set<String> identifiers;
};
static void visit(ASTPtr & ast, Data & data)
{
const auto * identifier = ast->as<ASTIdentifier>();
if (identifier)
data.identifiers.insert(identifier->name());
}
static bool needChildVisit(const ASTPtr &, const ASTPtr &)
{
return true;
}
};
using IdentifierSetVisitor = IdentifierSetMatcher::Visitor;
class SubstituteColumnMatcher
{
public:
using Visitor = InDepthNodeVisitor<SubstituteColumnMatcher, false>;
struct Data
{
const ComparisonGraph & graph;
const std::unordered_set<String> & identifiers;
ConstStoragePtr storage;
Data(const ComparisonGraph & graph_,
const std::unordered_set<String> & identifiers_,
const ConstStoragePtr & storage_)
: graph(graph_)
, identifiers(identifiers_)
, storage(storage_)
{
size_t compressed_size;
size_t uncompressed_size;
const ASTPtr & ast;
bool operator<(const ColumnSizeTuple & that) const
{
return std::tie(compressed_size, uncompressed_size)
< std::tie(that.compressed_size, that.uncompressed_size);
}
};
std::vector<ColumnSizeTuple> columns;
for (const auto & equal_ast : data.graph.getEqual(ast))
{
if (const auto it = column_sizes.find(equal_ast->getColumnName()); it != std::end(column_sizes))
columns.push_back({
it->second.data_compressed,
it->second.data_uncompressed,
equal_ast});
}
};
if (!columns.empty())
ast = std::min_element(std::begin(columns), std::end(columns))->ast->clone();
static void visit(ASTPtr & ast, Data & data)
{
const auto * identifier = ast->as<ASTIdentifier>();
if (identifier && identifier->name().starts_with(COMPONENT))
{
const std::size_t id = std::stoll(identifier->name().substr(COMPONENT.size(), identifier->name().size()));
// like TreeRewriter
struct ColumnSizeTuple
{
size_t compressed_size;
size_t uncompressed_size;
const ASTPtr & ast;
bool operator<(const ColumnSizeTuple & that) const
{
return std::tie(compressed_size, uncompressed_size) < std::tie(that.compressed_size, that.uncompressed_size);
}
};
const auto column_sizes = data.storage->getColumnSizes();
std::vector<ColumnSizeTuple> columns;
for (const auto & equal_ast : data.graph.getComponent(id))
{
if (const auto it = column_sizes.find(equal_ast->getColumnName()); it != std::end(column_sizes))
columns.push_back({it->second.data_compressed, it->second.data_uncompressed, equal_ast});
}
if (!columns.empty())
ast = std::min_element(std::begin(columns), std::end(columns))->ast->clone();
}
}
static bool needChildVisit(const ASTPtr &, const ASTPtr &)
@ -78,6 +143,7 @@ public:
using SubstituteColumnVisitor = SubstituteColumnMatcher::Visitor;
}
SubstituteColumnOptimizer::SubstituteColumnOptimizer(
ASTSelectQuery * select_query_,
Aliases & /*aliases_*/,
@ -99,15 +165,36 @@ void SubstituteColumnOptimizer::perform()
if (!storage)
return;
const auto compare_graph = metadata_snapshot->getConstraints().getGraph();
SubstituteColumnVisitor::Data data(compare_graph, storage);
if (select_query->where())
SubstituteColumnVisitor(data).visit(select_query->refWhere());
if (select_query->prewhere())
SubstituteColumnVisitor(data).visit(select_query->refPrewhere());
if (select_query->select())
SubstituteColumnVisitor(data).visit(select_query->refSelect());
if (select_query->having())
SubstituteColumnVisitor(data).visit(select_query->refHaving());
auto run_for_all = [&](const auto func) {
if (select_query->where())
func(select_query->refWhere());
if (select_query->prewhere())
func(select_query->refPrewhere());
if (select_query->select())
func(select_query->refSelect());
if (select_query->having())
func(select_query->refHaving());
};
ComponentVisitor::Data component_data(compare_graph);
IdentifierSetVisitor::Data identifier_data;
auto preprocess = [&](ASTPtr & ast) {
ComponentVisitor(component_data).visit(ast);
IdentifierSetVisitor(identifier_data).visit(ast);
};
auto process = [&](ASTPtr & ast) {
SubstituteColumnVisitor::Data substitute_data(compare_graph, identifier_data.identifiers, storage);
SubstituteColumnVisitor(substitute_data).visit(ast);
};
ASTPtr old_query = select_query->clone();
run_for_all(preprocess);
run_for_all(process);
}
}