mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Reworking ExpressionAnalyzer (preparations) [#METR-20307].
This commit is contained in:
parent
2f83193390
commit
2436d43637
@ -17,7 +17,8 @@ class TypeAndConstantInference;
|
||||
* Remove constant expressions (like ORDER BY concat('hello', 'world')).
|
||||
* For GROUP BY, unwrap injective functions (like GROUP BY toString(x) -> GROUP BY x).
|
||||
* For GROUP BY, remove deterministic functions of another keys (like GROUP BY x + 1, x -> GROUP BY x).
|
||||
* For ORDER BY, remove deterministic functions of previous keys (like ORDER BY num, toString(num) -> ORDER BY num)
|
||||
* TODO For ORDER BY, remove deterministic functions of previous keys (like ORDER BY num, toString(num) -> ORDER BY num),
|
||||
* but only if no collation has specified.
|
||||
* As a special case, remove duplicate keys.
|
||||
* For LIMIT BY, apply all the same as for GROUP BY.
|
||||
*
|
||||
@ -25,6 +26,7 @@ class TypeAndConstantInference;
|
||||
* but keys for DISTINCT are specified implicitly (as whole SELECT expression list).
|
||||
*
|
||||
* This should be run after CollectAliases, because some aliases will be lost from AST during this transformation.
|
||||
* This should be run after TranslatePositionalArguments for positional arguments like ORDER BY 1, 2 not to be confused with constants.
|
||||
*/
|
||||
struct OptimizeGroupOrderLimitBy
|
||||
{
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <DB/Analyzers/TypeAndConstantInference.h>
|
||||
#include <DB/Interpreters/Context.h>
|
||||
#include <DB/Parsers/ASTSelectQuery.h>
|
||||
#include <DB/Parsers/ASTFunction.h>
|
||||
#include <DB/Functions/IFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -14,7 +16,140 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
//void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info)
|
||||
static bool isInjectiveFunction(
|
||||
const ASTFunction * ast_function,
|
||||
const TypeAndConstantInference::ExpressionInfo & function_info,
|
||||
const TypeAndConstantInference::Info & all_info)
|
||||
{
|
||||
if (!function_info.function)
|
||||
return false;
|
||||
|
||||
Block block_with_constants;
|
||||
|
||||
const ASTs & children = ast_function->arguments->children;
|
||||
for (const auto & child : children)
|
||||
{
|
||||
String child_name = child->getColumnName();
|
||||
const TypeAndConstantInference::ExpressionInfo & child_info = all_info.at(child_name);
|
||||
|
||||
block_with_constants.insert(ColumnWithTypeAndName(
|
||||
child_info.is_constant_expression ? child_info.data_type->createConstColumn(1, child_info.value) : nullptr,
|
||||
child_info.data_type,
|
||||
child_name));
|
||||
}
|
||||
|
||||
return function_info.function->isInjective(block_with_constants);
|
||||
}
|
||||
|
||||
|
||||
static bool isDeterministicFunctionOfKeys(
|
||||
const ASTFunction * ast_function,
|
||||
const TypeAndConstantInference::ExpressionInfo & function_info,
|
||||
const TypeAndConstantInference::Info & all_info,
|
||||
const ASTs & keys)
|
||||
{
|
||||
if (!function_info.function || !function_info.function->isDeterministicInScopeOfQuery())
|
||||
return false;
|
||||
|
||||
for (const auto & child : ast_function->arguments->children)
|
||||
{
|
||||
String child_name = child->getColumnName();
|
||||
const TypeAndConstantInference::ExpressionInfo & child_info = all_info.at(child_name);
|
||||
|
||||
/// Function argument is constant.
|
||||
if (child_info.is_constant_expression)
|
||||
continue;
|
||||
|
||||
/// Function argument is one of keys.
|
||||
if (keys.end() != std::find_if(keys.begin(), keys.end(),
|
||||
[&child_name](const auto & key) { return key->getColumnName() == child_name; }))
|
||||
continue;
|
||||
|
||||
/// Function argument is a function, that deterministically depend on keys.
|
||||
if (const ASTFunction * child_function = typeid_cast<const ASTFunction *>(child.get()))
|
||||
{
|
||||
if (isDeterministicFunctionOfKeys(child_function, child_info, all_info, keys))
|
||||
continue;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void processGroupByLikeList(ASTPtr & ast, TypeAndConstantInference & expression_info)
|
||||
{
|
||||
if (!ast)
|
||||
return;
|
||||
|
||||
ASTs & elems = ast->children;
|
||||
|
||||
std::unordered_set<std::string> unique_keys;
|
||||
size_t i = 0;
|
||||
|
||||
auto restart = [&]
|
||||
{
|
||||
i = 0;
|
||||
unique_keys.clear();
|
||||
};
|
||||
|
||||
/// Always leave last element in GROUP BY, even if it is constant.
|
||||
while (i < elems.size() && elems.size() > 1)
|
||||
{
|
||||
ASTPtr & elem = elems[i];
|
||||
|
||||
String column_name = elem->getColumnName(); /// TODO canonicalization of names
|
||||
auto it = expression_info.info.find(column_name);
|
||||
if (it == expression_info.info.end())
|
||||
throw Exception("Type inference was not done for " + column_name, ErrorCodes::LOGICAL_ERROR);
|
||||
const TypeAndConstantInference::ExpressionInfo & info = it->second;
|
||||
|
||||
/// Removing constant expressions.
|
||||
/// Removing duplicate keys.
|
||||
if (info.is_constant_expression
|
||||
|| !unique_keys.emplace(column_name).second)
|
||||
{
|
||||
elems.erase(elems.begin() + i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (info.function && !elem->children.empty())
|
||||
{
|
||||
const ASTFunction * ast_function = typeid_cast<const ASTFunction *>(elem.get());
|
||||
if (!ast_function)
|
||||
throw Exception("Column is marked as function during type inference, but corresponding AST node "
|
||||
+ column_name + " is not a function", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Unwrap injective functions.
|
||||
if (isInjectiveFunction(ast_function, info, expression_info.info))
|
||||
{
|
||||
auto args = ast_function->arguments;
|
||||
elems.erase(elems.begin() + i);
|
||||
elems.insert(elems.begin() + i, args->children.begin(), args->children.end());
|
||||
|
||||
restart(); /// Previous keys may become deterministic function of newly added keys.
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Remove deterministic functions of another keys.
|
||||
ASTs other_keys;
|
||||
other_keys.reserve(elems.size() - 1);
|
||||
for (size_t j = 0, size = elems.size(); j < size; ++j)
|
||||
if (j != i)
|
||||
other_keys.emplace_back(elems[j]);
|
||||
|
||||
if (isDeterministicFunctionOfKeys(ast_function, info, expression_info.info, other_keys))
|
||||
{
|
||||
elems.erase(elems.begin() + i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info)
|
||||
@ -25,7 +160,8 @@ void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference &
|
||||
if (!select->select_expression_list)
|
||||
throw Exception("SELECT query doesn't have select_expression_list", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
|
||||
processGroupByLikeList(select->group_expression_list, expression_info);
|
||||
processGroupByLikeList(select->limit_by_expression_list, expression_info);
|
||||
}
|
||||
|
||||
|
||||
|
@ -15,3 +15,6 @@ target_link_libraries(analyze_result_of_query dbms)
|
||||
|
||||
add_executable(translate_positional_arguments translate_positional_arguments.cpp)
|
||||
target_link_libraries(translate_positional_arguments dbms)
|
||||
|
||||
add_executable(optimize_group_order_limit_by optimize_group_order_limit_by.cpp)
|
||||
target_link_libraries(optimize_group_order_limit_by dbms)
|
||||
|
72
dbms/src/Analyzers/tests/optimize_group_order_limit_by.cpp
Normal file
72
dbms/src/Analyzers/tests/optimize_group_order_limit_by.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
#include <DB/Analyzers/CollectAliases.h>
|
||||
#include <DB/Analyzers/CollectTables.h>
|
||||
#include <DB/Analyzers/AnalyzeColumns.h>
|
||||
#include <DB/Analyzers/TypeAndConstantInference.h>
|
||||
#include <DB/Analyzers/TranslatePositionalArguments.h>
|
||||
#include <DB/Analyzers/OptimizeGroupOrderLimitBy.h>
|
||||
#include <DB/Parsers/parseQuery.h>
|
||||
#include <DB/Parsers/ParserSelectQuery.h>
|
||||
#include <DB/Parsers/formatAST.h>
|
||||
#include <DB/IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <DB/IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <DB/IO/ReadHelpers.h>
|
||||
#include <DB/Common/Exception.h>
|
||||
#include <DB/Interpreters/Context.h>
|
||||
#include <DB/Storages/System/StorageSystemOne.h>
|
||||
#include <DB/Storages/System/StorageSystemNumbers.h>
|
||||
#include <DB/Databases/DatabaseMemory.h>
|
||||
|
||||
|
||||
/// Parses query from stdin and print same query with optimized GROUP BY, ORDER BY, LIMIT BY.
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
try
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
||||
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
|
||||
|
||||
String query;
|
||||
readStringUntilEOF(query, in);
|
||||
|
||||
ParserSelectQuery parser;
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "query");
|
||||
|
||||
Context context;
|
||||
|
||||
auto system_database = std::make_shared<DatabaseMemory>("system");
|
||||
context.addDatabase("system", system_database);
|
||||
system_database->attachTable("one", StorageSystemOne::create("one"));
|
||||
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers"));
|
||||
context.setCurrentDatabase("system");
|
||||
|
||||
CollectAliases collect_aliases;
|
||||
collect_aliases.process(ast);
|
||||
|
||||
CollectTables collect_tables;
|
||||
collect_tables.process(ast, context, collect_aliases);
|
||||
|
||||
AnalyzeColumns analyze_columns;
|
||||
analyze_columns.process(ast, collect_aliases, collect_tables);
|
||||
|
||||
TypeAndConstantInference inference;
|
||||
inference.process(ast, context, collect_aliases, analyze_columns);
|
||||
|
||||
TranslatePositionalArguments translation;
|
||||
translation.process(ast);
|
||||
|
||||
OptimizeGroupOrderLimitBy optimizer;
|
||||
optimizer.process(ast, inference);
|
||||
|
||||
formatAST(*ast, std::cout, 0, false);
|
||||
std::cout << "\n";
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
|
||||
return 1;
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
SELECT
|
||||
number,
|
||||
materialize('abc')
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
number,
|
||||
10 AS b
|
||||
FROM system.numbers
|
||||
)
|
||||
GROUP BY number
|
3
dbms/src/Analyzers/tests/optimize_group_order_limit_by.sh
Executable file
3
dbms/src/Analyzers/tests/optimize_group_order_limit_by.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "SELECT number, materialize('abc') FROM (SELECT number, 10 AS b FROM system.numbers) GROUP BY number, toString(number + 1), number + number, 1, 2, 'Hello', b" | ./optimize_group_order_limit_by
|
Loading…
Reference in New Issue
Block a user