Reworking ExpressionAnalyzer (preparations) [#METR-20307].

This commit is contained in:
Alexey Milovidov 2017-01-07 01:54:05 +03:00
parent 2f83193390
commit 2436d43637
6 changed files with 230 additions and 3 deletions

View File

@ -17,7 +17,8 @@ class TypeAndConstantInference;
* Remove constant expressions (like ORDER BY concat('hello', 'world')).
* For GROUP BY, unwrap injective functions (like GROUP BY toString(x) -> GROUP BY x).
* For GROUP BY, remove deterministic functions of another keys (like GROUP BY x + 1, x -> GROUP BY x).
* For ORDER BY, remove deterministic functions of previous keys (like ORDER BY num, toString(num) -> ORDER BY num)
* TODO For ORDER BY, remove deterministic functions of previous keys (like ORDER BY num, toString(num) -> ORDER BY num),
* but only if no collation has specified.
* As a special case, remove duplicate keys.
* For LIMIT BY, apply all the same as for GROUP BY.
*
@ -25,6 +26,7 @@ class TypeAndConstantInference;
* but keys for DISTINCT are specified implicitly (as whole SELECT expression list).
*
* This should be run after CollectAliases, because some aliases will be lost from AST during this transformation.
* This should be run after TranslatePositionalArguments for positional arguments like ORDER BY 1, 2 not to be confused with constants.
*/
struct OptimizeGroupOrderLimitBy
{

View File

@ -2,6 +2,8 @@
#include <DB/Analyzers/TypeAndConstantInference.h>
#include <DB/Interpreters/Context.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Functions/IFunction.h>
namespace DB
@ -14,7 +16,140 @@ namespace ErrorCodes
}
//void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info)
static bool isInjectiveFunction(
const ASTFunction * ast_function,
const TypeAndConstantInference::ExpressionInfo & function_info,
const TypeAndConstantInference::Info & all_info)
{
if (!function_info.function)
return false;
Block block_with_constants;
const ASTs & children = ast_function->arguments->children;
for (const auto & child : children)
{
String child_name = child->getColumnName();
const TypeAndConstantInference::ExpressionInfo & child_info = all_info.at(child_name);
block_with_constants.insert(ColumnWithTypeAndName(
child_info.is_constant_expression ? child_info.data_type->createConstColumn(1, child_info.value) : nullptr,
child_info.data_type,
child_name));
}
return function_info.function->isInjective(block_with_constants);
}
static bool isDeterministicFunctionOfKeys(
const ASTFunction * ast_function,
const TypeAndConstantInference::ExpressionInfo & function_info,
const TypeAndConstantInference::Info & all_info,
const ASTs & keys)
{
if (!function_info.function || !function_info.function->isDeterministicInScopeOfQuery())
return false;
for (const auto & child : ast_function->arguments->children)
{
String child_name = child->getColumnName();
const TypeAndConstantInference::ExpressionInfo & child_info = all_info.at(child_name);
/// Function argument is constant.
if (child_info.is_constant_expression)
continue;
/// Function argument is one of keys.
if (keys.end() != std::find_if(keys.begin(), keys.end(),
[&child_name](const auto & key) { return key->getColumnName() == child_name; }))
continue;
/// Function argument is a function, that deterministically depend on keys.
if (const ASTFunction * child_function = typeid_cast<const ASTFunction *>(child.get()))
{
if (isDeterministicFunctionOfKeys(child_function, child_info, all_info, keys))
continue;
}
return false;
}
return true;
}
static void processGroupByLikeList(ASTPtr & ast, TypeAndConstantInference & expression_info)
{
if (!ast)
return;
ASTs & elems = ast->children;
std::unordered_set<std::string> unique_keys;
size_t i = 0;
auto restart = [&]
{
i = 0;
unique_keys.clear();
};
/// Always leave last element in GROUP BY, even if it is constant.
while (i < elems.size() && elems.size() > 1)
{
ASTPtr & elem = elems[i];
String column_name = elem->getColumnName(); /// TODO canonicalization of names
auto it = expression_info.info.find(column_name);
if (it == expression_info.info.end())
throw Exception("Type inference was not done for " + column_name, ErrorCodes::LOGICAL_ERROR);
const TypeAndConstantInference::ExpressionInfo & info = it->second;
/// Removing constant expressions.
/// Removing duplicate keys.
if (info.is_constant_expression
|| !unique_keys.emplace(column_name).second)
{
elems.erase(elems.begin() + i);
continue;
}
if (info.function && !elem->children.empty())
{
const ASTFunction * ast_function = typeid_cast<const ASTFunction *>(elem.get());
if (!ast_function)
throw Exception("Column is marked as function during type inference, but corresponding AST node "
+ column_name + " is not a function", ErrorCodes::LOGICAL_ERROR);
/// Unwrap injective functions.
if (isInjectiveFunction(ast_function, info, expression_info.info))
{
auto args = ast_function->arguments;
elems.erase(elems.begin() + i);
elems.insert(elems.begin() + i, args->children.begin(), args->children.end());
restart(); /// Previous keys may become deterministic function of newly added keys.
continue;
}
/// Remove deterministic functions of another keys.
ASTs other_keys;
other_keys.reserve(elems.size() - 1);
for (size_t j = 0, size = elems.size(); j < size; ++j)
if (j != i)
other_keys.emplace_back(elems[j]);
if (isDeterministicFunctionOfKeys(ast_function, info, expression_info.info, other_keys))
{
elems.erase(elems.begin() + i);
continue;
}
}
++i;
}
}
void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info)
@ -25,7 +160,8 @@ void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference &
if (!select->select_expression_list)
throw Exception("SELECT query doesn't have select_expression_list", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
processGroupByLikeList(select->group_expression_list, expression_info);
processGroupByLikeList(select->limit_by_expression_list, expression_info);
}

View File

@ -15,3 +15,6 @@ target_link_libraries(analyze_result_of_query dbms)
add_executable(translate_positional_arguments translate_positional_arguments.cpp)
target_link_libraries(translate_positional_arguments dbms)
add_executable(optimize_group_order_limit_by optimize_group_order_limit_by.cpp)
target_link_libraries(optimize_group_order_limit_by dbms)

View File

@ -0,0 +1,72 @@
#include <DB/Analyzers/CollectAliases.h>
#include <DB/Analyzers/CollectTables.h>
#include <DB/Analyzers/AnalyzeColumns.h>
#include <DB/Analyzers/TypeAndConstantInference.h>
#include <DB/Analyzers/TranslatePositionalArguments.h>
#include <DB/Analyzers/OptimizeGroupOrderLimitBy.h>
#include <DB/Parsers/parseQuery.h>
#include <DB/Parsers/ParserSelectQuery.h>
#include <DB/Parsers/formatAST.h>
#include <DB/IO/WriteBufferFromFileDescriptor.h>
#include <DB/IO/ReadBufferFromFileDescriptor.h>
#include <DB/IO/ReadHelpers.h>
#include <DB/Common/Exception.h>
#include <DB/Interpreters/Context.h>
#include <DB/Storages/System/StorageSystemOne.h>
#include <DB/Storages/System/StorageSystemNumbers.h>
#include <DB/Databases/DatabaseMemory.h>
/// Parses query from stdin and print same query with optimized GROUP BY, ORDER BY, LIMIT BY.
int main(int argc, char ** argv)
try
{
using namespace DB;
ReadBufferFromFileDescriptor in(STDIN_FILENO);
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
String query;
readStringUntilEOF(query, in);
ParserSelectQuery parser;
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "query");
Context context;
auto system_database = std::make_shared<DatabaseMemory>("system");
context.addDatabase("system", system_database);
system_database->attachTable("one", StorageSystemOne::create("one"));
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
context.setCurrentDatabase("system");
CollectAliases collect_aliases;
collect_aliases.process(ast);
CollectTables collect_tables;
collect_tables.process(ast, context, collect_aliases);
AnalyzeColumns analyze_columns;
analyze_columns.process(ast, collect_aliases, collect_tables);
TypeAndConstantInference inference;
inference.process(ast, context, collect_aliases, analyze_columns);
TranslatePositionalArguments translation;
translation.process(ast);
OptimizeGroupOrderLimitBy optimizer;
optimizer.process(ast, inference);
formatAST(*ast, std::cout, 0, false);
std::cout << "\n";
return 0;
return 0;
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
return 1;
}

View File

@ -0,0 +1,11 @@
SELECT
number,
materialize('abc')
FROM
(
SELECT
number,
10 AS b
FROM system.numbers
)
GROUP BY number

View File

@ -0,0 +1,3 @@
#!/bin/sh
echo "SELECT number, materialize('abc') FROM (SELECT number, 10 AS b FROM system.numbers) GROUP BY number, toString(number + 1), number + number, 1, 2, 'Hello', b" | ./optimize_group_order_limit_by