mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 10:10:50 +00:00
Merge pull request #3428 from 4ertus2/joins
Мore ExpressionAnalyser refactoring, add unit-test for it
This commit is contained in:
commit
72b3ec1ffa
@ -2,6 +2,8 @@
|
||||
|
||||
#include <Parsers/StringRange.h>
|
||||
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,9 +12,6 @@ class Context;
|
||||
class ASTFunction;
|
||||
struct ProjectionManipulatorBase;
|
||||
|
||||
class ExpressionActions;
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
||||
|
||||
class Set;
|
||||
using SetPtr = std::shared_ptr<Set>;
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/String.h>
|
||||
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
@ -161,12 +163,10 @@ ExpressionAnalyzer::ExpressionAnalyzer(
|
||||
const Names & required_result_columns_,
|
||||
size_t subquery_depth_,
|
||||
bool do_global_,
|
||||
const SubqueriesForSets & subqueries_for_set_)
|
||||
: query(query_), context(context_), settings(context.getSettings()),
|
||||
subquery_depth(subquery_depth_),
|
||||
source_columns(source_columns_), required_result_columns(required_result_columns_),
|
||||
storage(storage_),
|
||||
do_global(do_global_), subqueries_for_sets(subqueries_for_set_)
|
||||
const SubqueriesForSets & subqueries_for_sets_)
|
||||
: ExpressionAnalyzerData(source_columns_, required_result_columns_, subqueries_for_sets_),
|
||||
query(query_), context(context_), settings(context.getSettings()), storage(storage_),
|
||||
subquery_depth(subquery_depth_), do_global(do_global_)
|
||||
{
|
||||
select_query = typeid_cast<ASTSelectQuery *>(query.get());
|
||||
|
||||
@ -210,7 +210,7 @@ ExpressionAnalyzer::ExpressionAnalyzer(
|
||||
InJoinSubqueriesPreprocessor(context).process(select_query);
|
||||
|
||||
/// Optimizes logical expressions.
|
||||
LogicalExpressionsOptimizer(select_query, settings).perform();
|
||||
LogicalExpressionsOptimizer(select_query, settings.min_equality_disjunction_chain_length).perform();
|
||||
|
||||
/// Creates a dictionary `aliases`: alias -> ASTPtr
|
||||
{
|
||||
@ -868,7 +868,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
|
||||
{
|
||||
BlockIO res = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {})->execute();
|
||||
|
||||
SetPtr set = std::make_shared<Set>(getSetSizeLimits(settings), true);
|
||||
SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true);
|
||||
|
||||
set->setHeader(res.in->getHeader());
|
||||
while (Block block = res.in->read())
|
||||
@ -925,7 +925,8 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block &
|
||||
|
||||
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
|
||||
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
|
||||
makeExplicitSet(func, sample_block_with_calculated_columns, true, context, getSetSizeLimits(settings), prepared_sets);
|
||||
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
|
||||
settings.size_limits_for_set, prepared_sets);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1048,7 +1049,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries,
|
||||
bool is_conditional_tree = !isThereArrayJoin(ast) && settings.enable_conditional_computation && !only_consts;
|
||||
|
||||
LogAST log;
|
||||
ActionsVisitor actions_visitor(context, getSetSizeLimits(settings), is_conditional_tree, subquery_depth,
|
||||
ActionsVisitor actions_visitor(context, settings.size_limits_for_set, is_conditional_tree, subquery_depth,
|
||||
source_columns, actions, prepared_sets, subqueries_for_sets,
|
||||
no_subqueries, only_consts, !isRemoteStorage(), log.stream());
|
||||
actions_visitor.visit(ast);
|
||||
@ -1062,7 +1063,7 @@ void ExpressionAnalyzer::getActionsFromJoinKeys(const ASTTableJoin & table_join,
|
||||
bool is_conditional_tree = !isThereArrayJoin(query) && settings.enable_conditional_computation && !only_consts;
|
||||
|
||||
LogAST log;
|
||||
ActionsVisitor actions_visitor(context, getSetSizeLimits(settings), is_conditional_tree, subquery_depth,
|
||||
ActionsVisitor actions_visitor(context, settings.size_limits_for_set, is_conditional_tree, subquery_depth,
|
||||
source_columns, actions, prepared_sets, subqueries_for_sets,
|
||||
no_subqueries, only_consts, !isRemoteStorage(), log.stream());
|
||||
|
||||
@ -1320,9 +1321,9 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
|
||||
if (join_params.strictness == ASTTableJoin::Strictness::Unspecified && join_params.kind != ASTTableJoin::Kind::Cross)
|
||||
{
|
||||
if (settings.join_default_strictness.toString() == "ANY")
|
||||
if (settings.join_default_strictness == "ANY")
|
||||
join_params.strictness = ASTTableJoin::Strictness::Any;
|
||||
else if (settings.join_default_strictness.toString() == "ALL")
|
||||
else if (settings.join_default_strictness == "ALL")
|
||||
join_params.strictness = ASTTableJoin::Strictness::All;
|
||||
else
|
||||
throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", DB::ErrorCodes::EXPECTED_ALL_OR_ANY);
|
||||
@ -1364,7 +1365,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
{
|
||||
JoinPtr join = std::make_shared<Join>(
|
||||
analyzed_join.key_names_left, analyzed_join.key_names_right, analyzed_join.columns_added_by_join_from_right_keys,
|
||||
settings.join_use_nulls, SizeLimits(settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode),
|
||||
settings.join_use_nulls, settings.size_limits_for_join,
|
||||
join_params.kind, join_params.strictness);
|
||||
|
||||
/** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
|
||||
|
@ -2,22 +2,22 @@
|
||||
|
||||
#include <Interpreters/AggregateDescription.h>
|
||||
#include <Interpreters/Settings.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/ActionsVisitor.h>
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Block;
|
||||
class Context;
|
||||
|
||||
class ExpressionActions;
|
||||
struct ExpressionActionsChain;
|
||||
class ExpressionActions;
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
||||
class IAST;
|
||||
using ASTPtr = std::shared_ptr<IAST>;
|
||||
using ASTs = std::vector<ASTPtr>;
|
||||
struct ASTTableJoin;
|
||||
|
||||
class IBlockInputStream;
|
||||
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
|
||||
@ -31,20 +31,104 @@ class ASTExpressionList;
|
||||
class ASTSelectQuery;
|
||||
|
||||
|
||||
inline SizeLimits getSetSizeLimits(const Settings & settings)
|
||||
/// ExpressionAnalyzers sources, intermediates and results. It splits data and logic, allows to test them separately.
|
||||
/// If you are not writing a test you probably don't need it. Use ExpressionAnalyzer itself.
|
||||
struct ExpressionAnalyzerData
|
||||
{
|
||||
return SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode);
|
||||
}
|
||||
/// Original columns.
|
||||
/// First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted.
|
||||
NamesAndTypesList source_columns;
|
||||
|
||||
/// If non-empty, ignore all expressions in not from this list.
|
||||
Names required_result_columns;
|
||||
|
||||
SubqueriesForSets subqueries_for_sets;
|
||||
PreparedSets prepared_sets;
|
||||
|
||||
/// Columns after ARRAY JOIN, JOIN, and/or aggregation.
|
||||
NamesAndTypesList aggregated_columns;
|
||||
NamesAndTypesList array_join_columns;
|
||||
|
||||
bool has_aggregation = false;
|
||||
NamesAndTypesList aggregation_keys;
|
||||
AggregateDescriptions aggregate_descriptions;
|
||||
|
||||
bool has_global_subqueries = false;
|
||||
|
||||
using Aliases = std::unordered_map<String, ASTPtr>;
|
||||
Aliases aliases;
|
||||
|
||||
/// Which column is needed to be ARRAY-JOIN'ed to get the specified.
|
||||
/// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
|
||||
NameToNameMap array_join_result_to_source;
|
||||
|
||||
/// For the ARRAY JOIN section, mapping from the alias to the full column name.
|
||||
/// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here.
|
||||
NameToNameMap array_join_alias_to_name;
|
||||
|
||||
/// The backward mapping for array_join_alias_to_name.
|
||||
NameToNameMap array_join_name_to_alias;
|
||||
|
||||
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
|
||||
Tables external_tables;
|
||||
|
||||
/// Predicate optimizer overrides the sub queries
|
||||
bool rewrite_subqueries = false;
|
||||
|
||||
protected:
|
||||
ExpressionAnalyzerData(const NamesAndTypesList & source_columns_,
|
||||
const Names & required_result_columns_,
|
||||
const SubqueriesForSets & subqueries_for_sets_)
|
||||
: source_columns(source_columns_),
|
||||
required_result_columns(required_result_columns_),
|
||||
subqueries_for_sets(subqueries_for_sets_)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
|
||||
*
|
||||
* NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
|
||||
*/
|
||||
class ExpressionAnalyzer : private boost::noncopyable
|
||||
class ExpressionAnalyzer : private ExpressionAnalyzerData, private boost::noncopyable
|
||||
{
|
||||
private:
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
/// Extracts settings to enlight which are used (and avoid copy of others).
|
||||
struct ExtractedSettings
|
||||
{
|
||||
/// for QueryNormalizer
|
||||
const UInt64 max_ast_depth;
|
||||
const UInt64 max_expanded_ast_elements;
|
||||
const String count_distinct_implementation;
|
||||
|
||||
/// for PredicateExpressionsOptimizer
|
||||
const bool enable_optimize_predicate_expression;
|
||||
|
||||
/// for ExpressionAnalyzer
|
||||
const bool asterisk_left_columns_only;
|
||||
const bool use_index_for_in_with_subqueries;
|
||||
const bool enable_conditional_computation;
|
||||
const bool join_use_nulls;
|
||||
const SizeLimits size_limits_for_set;
|
||||
const SizeLimits size_limits_for_join;
|
||||
const String join_default_strictness;
|
||||
const UInt64 min_equality_disjunction_chain_length;
|
||||
|
||||
ExtractedSettings(const Settings & settings)
|
||||
: max_ast_depth(settings.max_ast_depth),
|
||||
max_expanded_ast_elements(settings.max_expanded_ast_elements),
|
||||
count_distinct_implementation(settings.count_distinct_implementation),
|
||||
enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression),
|
||||
asterisk_left_columns_only(settings.asterisk_left_columns_only),
|
||||
use_index_for_in_with_subqueries(settings.use_index_for_in_with_subqueries),
|
||||
enable_conditional_computation(settings.enable_conditional_computation),
|
||||
join_use_nulls(settings.join_use_nulls),
|
||||
size_limits_for_set(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode),
|
||||
size_limits_for_join(settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode),
|
||||
join_default_strictness(settings.join_default_strictness.toString()),
|
||||
min_equality_disjunction_chain_length(settings.optimize_min_equality_disjunction_chain_length)
|
||||
{}
|
||||
};
|
||||
|
||||
public:
|
||||
ExpressionAnalyzer(
|
||||
@ -126,6 +210,9 @@ public:
|
||||
*/
|
||||
const Tables & getExternalTables() const { return external_tables; }
|
||||
|
||||
/// Get intermediates for tests
|
||||
const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
|
||||
|
||||
/// Create Set-s that we can from IN section to use the index on them.
|
||||
void makeSetsForIndex();
|
||||
|
||||
@ -137,37 +224,10 @@ private:
|
||||
ASTPtr query;
|
||||
ASTSelectQuery * select_query;
|
||||
const Context & context;
|
||||
const Settings settings;
|
||||
const ExtractedSettings settings;
|
||||
StoragePtr storage; /// The main table in FROM clause, if exists.
|
||||
size_t subquery_depth;
|
||||
|
||||
/** Original columns.
|
||||
* First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted.
|
||||
*/
|
||||
NamesAndTypesList source_columns;
|
||||
|
||||
/** If non-empty, ignore all expressions in not from this list.
|
||||
*/
|
||||
Names required_result_columns;
|
||||
|
||||
/// Columns after ARRAY JOIN, JOIN, and/or aggregation.
|
||||
NamesAndTypesList aggregated_columns;
|
||||
|
||||
NamesAndTypesList array_join_columns;
|
||||
|
||||
/// The main table in FROM clause, if exists.
|
||||
StoragePtr storage;
|
||||
|
||||
bool has_aggregation = false;
|
||||
NamesAndTypesList aggregation_keys;
|
||||
AggregateDescriptions aggregate_descriptions;
|
||||
|
||||
/// Do I need to prepare for execution global subqueries when analyzing the query.
|
||||
bool do_global;
|
||||
bool has_global_subqueries = false;
|
||||
|
||||
SubqueriesForSets subqueries_for_sets;
|
||||
|
||||
PreparedSets prepared_sets;
|
||||
bool do_global; /// Do I need to prepare for execution global subqueries when analyzing the query.
|
||||
|
||||
struct AnalyzedJoin
|
||||
{
|
||||
@ -228,29 +288,6 @@ private:
|
||||
|
||||
AnalyzedJoin analyzed_join;
|
||||
|
||||
using Aliases = std::unordered_map<String, ASTPtr>;
|
||||
Aliases aliases;
|
||||
|
||||
using SetOfASTs = std::set<const IAST *>;
|
||||
using MapOfASTs = std::map<ASTPtr, ASTPtr>;
|
||||
|
||||
/// Which column is needed to be ARRAY-JOIN'ed to get the specified.
|
||||
/// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
|
||||
NameToNameMap array_join_result_to_source;
|
||||
|
||||
/// For the ARRAY JOIN section, mapping from the alias to the full column name.
|
||||
/// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here.
|
||||
NameToNameMap array_join_alias_to_name;
|
||||
|
||||
/// The backward mapping for array_join_alias_to_name.
|
||||
NameToNameMap array_join_name_to_alias;
|
||||
|
||||
|
||||
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
|
||||
Tables external_tables;
|
||||
|
||||
/// Predicate optimizer overrides the sub queries
|
||||
bool rewrite_subqueries = false;
|
||||
|
||||
/** Remove all unnecessary columns from the list of all available columns of the table (`columns`).
|
||||
* At the same time, form a set of unknown columns (`unknown_required_source_columns`),
|
||||
|
@ -443,7 +443,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
{
|
||||
if (create.storage)
|
||||
{
|
||||
if (create.is_temporary && create.storage->engine->name != "Memory")
|
||||
if (create.temporary && create.storage->engine->name != "Memory")
|
||||
throw Exception(
|
||||
"Temporary tables can only be created with ENGINE = Memory, not " + create.storage->engine->name,
|
||||
ErrorCodes::INCORRECT_QUERY);
|
||||
@ -451,7 +451,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
return;
|
||||
}
|
||||
|
||||
if (create.is_temporary)
|
||||
if (create.temporary)
|
||||
{
|
||||
auto engine_ast = std::make_shared<ASTFunction>();
|
||||
engine_ast->name = "Memory";
|
||||
@ -546,7 +546,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
String data_path;
|
||||
DatabasePtr database;
|
||||
|
||||
if (!create.is_temporary)
|
||||
if (!create.temporary)
|
||||
{
|
||||
database = context.getDatabase(database_name);
|
||||
data_path = database->getDataPath();
|
||||
@ -578,7 +578,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
create.attach,
|
||||
false);
|
||||
|
||||
if (create.is_temporary)
|
||||
if (create.temporary)
|
||||
context.getSessionContext().addExternalTable(table_name, res, query_ptr);
|
||||
else
|
||||
database->createTable(context, table_name, res, query_ptr);
|
||||
@ -601,17 +601,17 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
{
|
||||
auto insert = std::make_shared<ASTInsertQuery>();
|
||||
|
||||
if (!create.is_temporary)
|
||||
if (!create.temporary)
|
||||
insert->database = database_name;
|
||||
|
||||
insert->table = table_name;
|
||||
insert->select = create.select->clone();
|
||||
|
||||
if (create.is_temporary && !context.getSessionContext().hasQueryContext())
|
||||
if (create.temporary && !context.getSessionContext().hasQueryContext())
|
||||
context.getSessionContext().setQueryContext(context.getSessionContext());
|
||||
|
||||
return InterpreterInsertQuery(insert,
|
||||
create.is_temporary ? context.getSessionContext() : context,
|
||||
create.temporary ? context.getSessionContext() : context,
|
||||
context.getSettingsRef().insert_allow_materialized_columns).execute();
|
||||
}
|
||||
|
||||
@ -657,7 +657,7 @@ void InterpreterCreateQuery::checkAccess(const ASTCreateQuery & create)
|
||||
throw Exception("Cannot create database. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
|
||||
}
|
||||
|
||||
if (create.is_temporary && readonly >= 2)
|
||||
if (create.temporary && readonly >= 2)
|
||||
return;
|
||||
|
||||
if (readonly)
|
||||
|
@ -30,7 +30,7 @@ bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpres
|
||||
return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression);
|
||||
}
|
||||
|
||||
LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const Settings & settings_)
|
||||
LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, ExtractedSettings && settings_)
|
||||
: select_query(select_query_), settings(settings_)
|
||||
{
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ struct Settings;
|
||||
class ASTFunction;
|
||||
class ASTSelectQuery;
|
||||
|
||||
|
||||
/** This class provides functions for optimizing boolean expressions within queries.
|
||||
*
|
||||
* For simplicity, we call a homogeneous OR-chain any expression having the following structure:
|
||||
@ -24,9 +25,18 @@ class ASTSelectQuery;
|
||||
*/
|
||||
class LogicalExpressionsOptimizer final
|
||||
{
|
||||
struct ExtractedSettings
|
||||
{
|
||||
const UInt64 optimize_min_equality_disjunction_chain_length;
|
||||
|
||||
ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_)
|
||||
: optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_)
|
||||
{}
|
||||
};
|
||||
|
||||
public:
|
||||
/// Constructor. Accepts the root of the query DAG.
|
||||
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const Settings & settings_);
|
||||
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, ExtractedSettings && settings_);
|
||||
|
||||
/** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
|
||||
* on the expressions `expr` IN (x1, ..., xN).
|
||||
@ -90,7 +100,7 @@ private:
|
||||
|
||||
private:
|
||||
ASTSelectQuery * select_query;
|
||||
const Settings & settings;
|
||||
const ExtractedSettings settings;
|
||||
/// Information about the OR-chains inside the query.
|
||||
DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
|
||||
/// Number of processed OR-chains.
|
||||
|
@ -16,7 +16,7 @@ namespace DB
|
||||
static constexpr auto and_function_name = "and";
|
||||
|
||||
PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
|
||||
ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_)
|
||||
ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_)
|
||||
: ast_select(ast_select_), settings(settings_), context(context_)
|
||||
{
|
||||
}
|
||||
|
@ -41,14 +41,34 @@ using IdentifiersWithQualifiedNameSet = std::vector<IdentifierWithQualifiedName>
|
||||
*/
|
||||
class PredicateExpressionsOptimizer
|
||||
{
|
||||
/// Extracts settings, mostly to show which are used and which are not.
|
||||
struct ExtractedSettings
|
||||
{
|
||||
/// QueryNormalizer settings
|
||||
const UInt64 max_ast_depth;
|
||||
const UInt64 max_expanded_ast_elements;
|
||||
const String count_distinct_implementation;
|
||||
|
||||
/// for PredicateExpressionsOptimizer
|
||||
const bool enable_optimize_predicate_expression;
|
||||
|
||||
template<typename T>
|
||||
ExtractedSettings(const T & settings)
|
||||
: max_ast_depth(settings.max_ast_depth),
|
||||
max_expanded_ast_elements(settings.max_expanded_ast_elements),
|
||||
count_distinct_implementation(settings.count_distinct_implementation),
|
||||
enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression)
|
||||
{}
|
||||
};
|
||||
|
||||
public:
|
||||
PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_);
|
||||
PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_);
|
||||
|
||||
bool optimize();
|
||||
|
||||
private:
|
||||
ASTSelectQuery * ast_select;
|
||||
const Settings & settings;
|
||||
const ExtractedSettings settings;
|
||||
const Context & context;
|
||||
|
||||
enum OptimizeKind
|
||||
|
@ -9,7 +9,8 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Parsers/ASTQualifiedAsterisk.h>
|
||||
#include <iostream>
|
||||
//#include <iostream>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -22,9 +23,9 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
QueryNormalizer::QueryNormalizer(ASTPtr & query, const QueryNormalizer::Aliases & aliases,
|
||||
const Settings & settings, const Names & all_column_names,
|
||||
ExtractedSettings && settings_, const Names & all_column_names,
|
||||
const TableNamesAndColumnNames & table_names_and_column_names)
|
||||
: query(query), aliases(aliases), settings(settings), all_column_names(all_column_names),
|
||||
: query(query), aliases(aliases), settings(settings_), all_column_names(all_column_names),
|
||||
table_names_and_column_names(table_names_and_column_names)
|
||||
{
|
||||
}
|
||||
@ -52,7 +53,7 @@ void QueryNormalizer::perform()
|
||||
void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level)
|
||||
{
|
||||
if (level > settings.max_ast_depth)
|
||||
throw Exception("Normalized AST is too deep. Maximum: " + settings.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST);
|
||||
throw Exception("Normalized AST is too deep. Maximum: " + toString(settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST);
|
||||
|
||||
if (finished_asts.count(ast))
|
||||
{
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <Core/Names.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/Settings.h>
|
||||
#include <Interpreters/evaluateQualified.h>
|
||||
|
||||
namespace DB
|
||||
@ -22,12 +21,28 @@ inline bool functionIsInOrGlobalInOperator(const String & name)
|
||||
using TableNameAndColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
|
||||
using TableNamesAndColumnNames = std::vector<TableNameAndColumnNames>;
|
||||
|
||||
|
||||
class QueryNormalizer
|
||||
{
|
||||
/// Extracts settings, mostly to show which are used and which are not.
|
||||
struct ExtractedSettings
|
||||
{
|
||||
const UInt64 max_ast_depth;
|
||||
const UInt64 max_expanded_ast_elements;
|
||||
const String count_distinct_implementation;
|
||||
|
||||
template <typename T>
|
||||
ExtractedSettings(const T & settings)
|
||||
: max_ast_depth(settings.max_ast_depth),
|
||||
max_expanded_ast_elements(settings.max_expanded_ast_elements),
|
||||
count_distinct_implementation(settings.count_distinct_implementation)
|
||||
{}
|
||||
};
|
||||
|
||||
public:
|
||||
using Aliases = std::unordered_map<String, ASTPtr>;
|
||||
|
||||
QueryNormalizer(ASTPtr & query, const Aliases & aliases, const Settings & settings, const Names & all_columns_name,
|
||||
QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, const Names & all_columns_name,
|
||||
const TableNamesAndColumnNames & table_names_and_column_names);
|
||||
|
||||
void perform();
|
||||
@ -38,7 +53,7 @@ private:
|
||||
|
||||
ASTPtr & query;
|
||||
const Aliases & aliases;
|
||||
const Settings & settings;
|
||||
const ExtractedSettings settings;
|
||||
const Names & all_column_names;
|
||||
const TableNamesAndColumnNames & table_names_and_column_names;
|
||||
|
||||
|
@ -45,6 +45,10 @@ add_executable (in_join_subqueries_preprocessor in_join_subqueries_preprocessor.
|
||||
target_link_libraries (in_join_subqueries_preprocessor dbms)
|
||||
add_check(in_join_subqueries_preprocessor)
|
||||
|
||||
add_executable (expression_analyzer expression_analyzer.cpp)
|
||||
target_link_libraries (expression_analyzer dbms clickhouse_storages_system)
|
||||
add_check(expression_analyzer)
|
||||
|
||||
add_executable (users users.cpp)
|
||||
target_link_libraries (users dbms ${Boost_FILESYSTEM_LIBRARY})
|
||||
|
||||
|
148
dbms/src/Interpreters/tests/expression_analyzer.cpp
Normal file
148
dbms/src/Interpreters/tests/expression_analyzer.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Storages/System/StorageSystemOne.h>
|
||||
#include <Storages/System/StorageSystemNumbers.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
|
||||
#include <Parsers/ParserSelectQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
|
||||
#include <Analyzers/CollectAliases.h>
|
||||
#include <Analyzers/ExecuteTableFunctions.h>
|
||||
#include <Analyzers/CollectTables.h>
|
||||
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
using namespace DB;
|
||||
|
||||
struct TestEntry
|
||||
{
|
||||
String query;
|
||||
std::unordered_map<String, String> expected_aliases; /// alias -> AST.getID()
|
||||
NamesAndTypesList source_columns = {};
|
||||
Names required_result_columns = {};
|
||||
|
||||
bool check(const Context & context)
|
||||
{
|
||||
ASTPtr ast = parse(query);
|
||||
|
||||
ExpressionAnalyzer analyzer(ast, context, {}, source_columns, required_result_columns);
|
||||
|
||||
const ExpressionAnalyzerData & data = analyzer.getAnalyzedData();
|
||||
|
||||
if (!checkAliases(data))
|
||||
{
|
||||
collectWithAnalysers(context, ast);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
bool checkAliases(const ExpressionAnalyzerData & data)
|
||||
{
|
||||
for (const auto & alias : data.aliases)
|
||||
{
|
||||
const String & alias_name = alias.first;
|
||||
if (expected_aliases.count(alias_name) == 0 ||
|
||||
expected_aliases[alias_name] != alias.second->getID())
|
||||
{
|
||||
std::cout << "unexpected alias: " << alias_name << ' ' << alias.second->getID() << std::endl;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
expected_aliases.erase(alias_name);
|
||||
}
|
||||
|
||||
if (!expected_aliases.empty())
|
||||
{
|
||||
std::cout << "missing aliases: " << expected_aliases.size() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ASTPtr parse(const std::string & query)
|
||||
{
|
||||
ParserSelectQuery parser;
|
||||
std::string message;
|
||||
auto text = query.data();
|
||||
if (ASTPtr ast = tryParseQuery(parser, text, text + query.size(), message, false, "", false, 0))
|
||||
return ast;
|
||||
throw Exception(message);
|
||||
}
|
||||
|
||||
void collectWithAnalysers(const Context & context, ASTPtr ast) const
|
||||
{
|
||||
ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
||||
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
|
||||
|
||||
CollectAliases collect_aliases;
|
||||
collect_aliases.process(ast);
|
||||
|
||||
ExecuteTableFunctions execute_table_functions;
|
||||
execute_table_functions.process(ast, context);
|
||||
|
||||
CollectTables collect_tables;
|
||||
collect_tables.process(ast, context, collect_aliases, execute_table_functions);
|
||||
collect_tables.dump(out);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
std::vector<TestEntry> queries =
|
||||
{
|
||||
{
|
||||
"SELECT number AS n FROM system.numbers LIMIT 0",
|
||||
{{"n", "Identifier_number"}},
|
||||
{ NameAndTypePair("number", std::make_shared<DataTypeUInt64>()) }
|
||||
},
|
||||
|
||||
{
|
||||
"SELECT number AS n FROM system.numbers LIMIT 0",
|
||||
{{"n", "Identifier_number"}}
|
||||
}
|
||||
};
|
||||
|
||||
Context context = Context::createGlobal();
|
||||
|
||||
auto system_database = std::make_shared<DatabaseMemory>("system");
|
||||
context.addDatabase("system", system_database);
|
||||
//context.setCurrentDatabase("system");
|
||||
system_database->attachTable("one", StorageSystemOne::create("one"));
|
||||
system_database->attachTable("numbers", StorageSystemNumbers::create("numbers", false));
|
||||
|
||||
size_t success = 0;
|
||||
for (auto & entry : queries)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (entry.check(context))
|
||||
{
|
||||
++success;
|
||||
std::cout << "[OK] " << entry.query << std::endl;
|
||||
}
|
||||
else
|
||||
std::cout << "[Failed] " << entry.query << std::endl;
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << "[Error] " << entry.query << std::endl << e.displayText() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return success != queries.size();
|
||||
}
|
@ -213,10 +213,7 @@ TestResult check(const TestEntry & entry)
|
||||
|
||||
auto select_query = typeid_cast<DB::ASTSelectQuery *>(&*ast_input);
|
||||
|
||||
DB::Settings settings;
|
||||
settings.optimize_min_equality_disjunction_chain_length = entry.limit;
|
||||
|
||||
DB::LogicalExpressionsOptimizer optimizer(select_query, settings);
|
||||
DB::LogicalExpressionsOptimizer optimizer(select_query, entry.limit);
|
||||
optimizer.perform();
|
||||
|
||||
/// Parse the expected result.
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
@ -113,12 +113,9 @@ protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
};
|
||||
|
||||
class ASTAlterQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
|
||||
class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String database;
|
||||
String table;
|
||||
|
||||
ASTAlterCommandList * command_list = nullptr;
|
||||
|
||||
String getID() const override;
|
||||
|
@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ASTCheckQuery : public ASTQueryWithOutput
|
||||
struct ASTCheckQuery : public ASTQueryWithTableAndOutput
|
||||
{
|
||||
/** Get the text that identifies this element. */
|
||||
String getID() const override { return ("CheckQuery_" + database + "_" + table); }
|
||||
@ -18,9 +18,6 @@ struct ASTCheckQuery : public ASTQueryWithOutput
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string database;
|
||||
std::string table;
|
||||
|
||||
protected:
|
||||
void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked frame) const override
|
||||
{
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
@ -74,7 +74,7 @@ public:
|
||||
|
||||
|
||||
/// CREATE TABLE or ATTACH TABLE query
|
||||
class ASTCreateQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
|
||||
class ASTCreateQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
bool attach{false}; /// Query ATTACH TABLE, not CREATE TABLE.
|
||||
@ -82,9 +82,6 @@ public:
|
||||
bool is_view{false};
|
||||
bool is_materialized_view{false};
|
||||
bool is_populate{false};
|
||||
bool is_temporary{false};
|
||||
String database;
|
||||
String table;
|
||||
ASTExpressionList * columns = nullptr;
|
||||
String to_database; /// For CREATE MATERIALIZED VIEW mv TO table.
|
||||
String to_table;
|
||||
@ -155,7 +152,7 @@ protected:
|
||||
settings.ostr
|
||||
<< (settings.hilite ? hilite_keyword : "")
|
||||
<< (attach ? "ATTACH " : "CREATE ")
|
||||
<< (is_temporary ? "TEMPORARY " : "")
|
||||
<< (temporary ? "TEMPORARY " : "")
|
||||
<< what << " "
|
||||
<< (if_not_exists ? "IF NOT EXISTS " : "")
|
||||
<< (settings.hilite ? hilite_none : "")
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ namespace DB
|
||||
|
||||
/** DROP query
|
||||
*/
|
||||
class ASTDropQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
|
||||
class ASTDropQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
enum Kind
|
||||
@ -21,9 +21,6 @@ public:
|
||||
|
||||
Kind kind;
|
||||
bool if_exists{false};
|
||||
bool temporary{false};
|
||||
String database;
|
||||
String table;
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID() const override;
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
namespace DB
|
||||
@ -10,12 +10,9 @@ namespace DB
|
||||
|
||||
/** OPTIMIZE query
|
||||
*/
|
||||
class ASTOptimizeQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster
|
||||
class ASTOptimizeQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String database;
|
||||
String table;
|
||||
|
||||
/// The partition to optimize can be specified.
|
||||
ASTPtr partition;
|
||||
/// A flag can be specified - perform optimization "to the end" instead of one step.
|
||||
@ -44,7 +41,6 @@ public:
|
||||
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const std::string &new_database) const override;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -391,7 +391,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
query->is_view = is_view;
|
||||
query->is_materialized_view = is_materialized_view;
|
||||
query->is_populate = is_populate;
|
||||
query->is_temporary = is_temporary;
|
||||
query->temporary = is_temporary;
|
||||
|
||||
if (database)
|
||||
query->database = typeid_cast<ASTIdentifier &>(*database).name;
|
||||
|
Loading…
Reference in New Issue
Block a user