From c95bec1cf48230a26d6e4a8554aa9f5602a1bfe8 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 11 Feb 2015 19:09:41 +0300 Subject: [PATCH] dbms: Server: feature development [#METR-14875] --- .../DB/Interpreters/ExpressionAnalyzer.h | 5 + dbms/src/Interpreters/ExpressionAnalyzer.cpp | 203 ++++++++++++++++++ 2 files changed, 208 insertions(+) diff --git a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h index c4b01ffe0ba..cabe2513a68 100644 --- a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h +++ b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h @@ -215,6 +215,11 @@ private: void normalizeTree(); void normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias); + /** Заменить все цепочки вида c = x1 OR c = x2 OR ... OR c = xN на выражения c IN (x1, x2, ..., xN), + * где c - произвольное выражение; x1, x2, ... xN - литералы одного типа. + */ + void optimizeOrChains(); + /// Eliminates injective function calls and constant expressions from group by statement void optimizeGroupBy(); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 5c51269dfcc..34aa8bc8c2c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -77,6 +77,8 @@ void ExpressionAnalyzer::init() /// Common subexpression elimination. Rewrite rules. normalizeTree(); + optimizeOrChains(); + /// GROUP BY injective function elimination. optimizeGroupBy(); @@ -462,6 +464,207 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as finished_asts[initial_ast] = ast; } +// XXX Temporary design during development phase. + +struct OrWithIdentifier +{ + OrWithIdentifier(ASTFunction * or_function_, IAST * parent_, ASTIdentifier * identifier_) + : or_function(or_function_), parent(parent_), identifier(identifier_) + { + } + + ASTFunction * or_function; + IAST * parent; + ASTIdentifier * identifier; +}; + +bool operator<(const OrWithIdentifier & lhs, const OrWithIdentifier & rhs) +{ + if (lhs.or_function < rhs.or_function) + return true; + if (lhs.or_function > rhs.or_function) + return false; + + if (lhs.parent < rhs.parent) + return true; + if (lhs.parent > rhs.parent) + return false; + + if (lhs.identifier < rhs.identifier) + return true; + if (lhs.identifier > rhs.identifier) + return false; + + return false; +} + +using EqualFunctionList = std::vector; +using EqualFunctionMap = std::map; +using ASTFunctionPtr = Poco::SharedPtr; + +/// Создать новое выражение IN на основе цепочки из выражения OR. +ASTFunctionPtr createInExpression(const OrWithIdentifier & or_with_identifier, const EqualFunctionList & equal_function_list) +{ + ASTPtr value_list = new ASTExpressionList; + for (auto function : equal_function_list) + { + auto literal = static_cast(&*(function->children[1])); + value_list->children.push_back(literal->clone()); + } + + ASTFunctionPtr tuple_function = new ASTFunction; + tuple_function->name = "tuple"; + tuple_function->arguments = value_list; + tuple_function->children.push_back(tuple_function->arguments); + + ASTPtr identifier = or_with_identifier.identifier->clone(); + + ASTPtr in_expr = new ASTExpressionList; + in_expr->children.push_back(identifier); + in_expr->children.push_back(tuple_function); + + ASTFunctionPtr in_function = new ASTFunction; + in_function->name = "in"; + in_function->arguments = in_expr; + in_function->children.push_back(in_function->arguments); + + return in_function; +} + +void ExpressionAnalyzer::optimizeOrChains() +{ + EqualFunctionMap equal_function_map; + + /// XXX Temporary hack during development phase. + UInt64 mutation_threshold = 3; + + /** 1. Поиск кандидатов + */ + + /// (node, parent node) + //// XXX Лучше бы IAST имел атрибут parent. + std::deque > to_visit; + to_visit.push_back(std::make_pair(ast, ASTPtr())); + + while (!to_visit.empty()) + { + auto node_with_parent = to_visit.front(); + auto & node = node_with_parent.first; + auto & parent = node_with_parent.second; + to_visit.pop_front(); + + bool found = false; + + ASTFunction * function = typeid_cast(&*node); + if ((function == nullptr) || (function->name != "or") || (function->children.size() != 1)) + continue; + + ASTExpressionList * expression_list = typeid_cast(&*(function->children[0])); + if (expression_list == nullptr) + continue; + + /// Цепочка элементов выражения OR. + for (auto child : expression_list->children) + { + ASTFunction * equals = typeid_cast(&*child); + if ((equals == nullptr) || (equals->name != "equals") || (equals->children.size() != 1)) + continue; + + ASTExpressionList * equals_expression_list = typeid_cast(&*(equals->children[0])); + if ((equals_expression_list == nullptr) || (equals_expression_list->children.size() != 2)) + continue; + + // Равенство c = xk + ASTIdentifier * identifier = typeid_cast(&*(equals_expression_list->children[0])); + if (identifier == nullptr) + continue; + + ASTLiteral * literal = typeid_cast(&*(equals_expression_list->children[1])); + if (literal == nullptr) + continue; + + OrWithIdentifier pp(function, parent.get(), identifier); + equal_function_map[pp].push_back(equals); + found = true; + } + + if (!found) + for (auto & child : node->children) + if (typeid_cast(&*child) == nullptr) + to_visit.push_back(std::make_pair(child, node)); + } + + for (auto & e : equal_function_map) + { + EqualFunctionList & equal_function_list = e.second; + std::sort(equal_function_list.begin(), equal_function_list.end()); + } + + /** 2. Заменяем длинные цепочки на выражения IN. + */ + for (const auto & e : equal_function_map) + { + const OrWithIdentifier & pp = e.first; + const EqualFunctionList & equal_function_list = e.second; + + /** Пропускать цепочку, если она слишком коротка или содержит данные разних типов. + */ + + if (equal_function_list.size() < mutation_threshold) + continue; + + bool check = true; + auto first_literal = static_cast(&*(equal_function_list[0]->children[1])); + for (size_t i = 1; i < equal_function_list.size(); ++i) + { + auto literal = static_cast(&*(equal_function_list[i]->children[1])); + if (literal->type != first_literal->type) + check = false; + } + if (!check) + continue; + + /** Создать новое выражение IN. + */ + + auto in_expr = createInExpression(pp, equal_function_list); + + /** Вставить это выражение в запрос. + */ + + ASTFunction * or_function = pp.or_function; + ASTExpressionList * expression_list = static_cast(&*(or_function->children[0])); + auto & children = expression_list->children; + + children.push_back(in_expr); + + auto it = std::remove_if(children.begin(), children.end(), [&](const ASTPtr & node) + { + return std::binary_search(equal_function_list.begin(), equal_function_list.end(), node.get()); + }); + children.erase(it, children.end()); + } + + /** 3. Удалить узлы OR, которые имеют только один узел типа Function. + */ + for (const auto & e : equal_function_map) + { + const OrWithIdentifier & pp = e.first; + const EqualFunctionList & equal_function_list = e.second; + + ASTFunction * or_function = pp.or_function; + IAST * parent = pp.parent; + ASTExpressionList * expression_list = static_cast(&*(or_function->children[0])); + auto & children = expression_list->children; + + if ((parent != nullptr) && (children.size() == 1)) + { + parent->children.push_back(children[0]); + auto it = std::remove(parent->children.begin(), parent->children.end(), or_function); + parent->children.erase(it, parent->children.end()); + } + } +} void ExpressionAnalyzer::optimizeGroupBy() {