ClickHouse/dbms/src/Interpreters/ExpressionAnalyzer.h

263 lines
11 KiB
C++
Raw Normal View History

#pragma once
#include <Core/Settings.h>
#include <DataStreams/IBlockStream_fwd.h>
2019-03-11 14:01:45 +00:00
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/AggregateDescription.h>
2018-11-02 18:53:23 +00:00
#include <Interpreters/SyntaxAnalyzer.h>
2019-03-11 14:01:45 +00:00
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage_fwd.h>
2019-03-11 14:01:45 +00:00
namespace DB
{
class Block;
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
class Context;
2016-01-13 00:32:59 +00:00
struct ExpressionActionsChain;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
2016-01-13 00:32:59 +00:00
struct ASTTableJoin;
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
class ASTFunction;
class ASTExpressionList;
class ASTSelectQuery;
struct SyntaxAnalyzerResult;
using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately.
/// If you are not writing a test you probably don't need it. Use ExpressionAnalyzer itself.
struct ExpressionAnalyzerData
2018-04-24 07:16:39 +00:00
{
/// Original columns.
/// First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted.
NamesAndTypesList source_columns;
/// If non-empty, ignore all expressions in not from this list.
NameSet required_result_columns;
SubqueriesForSets subqueries_for_sets;
PreparedSets prepared_sets;
/// Columns after ARRAY JOIN, JOIN, and/or aggregation.
NamesAndTypesList aggregated_columns;
NamesAndTypesList array_join_columns;
bool has_aggregation = false;
NamesAndTypesList aggregation_keys;
AggregateDescriptions aggregate_descriptions;
bool has_global_subqueries = false;
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
Tables external_tables;
/// Predicate optimizer overrides the sub queries
bool rewrite_subqueries = false;
2018-11-08 09:00:25 +00:00
/// Columns will be added to block by join.
JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns
protected:
ExpressionAnalyzerData(const NamesAndTypesList & source_columns_,
const NameSet & required_result_columns_,
const SubqueriesForSets & subqueries_for_sets_)
: source_columns(source_columns_),
required_result_columns(required_result_columns_),
subqueries_for_sets(subqueries_for_sets_)
{}
};
2018-04-24 07:16:39 +00:00
2017-06-02 21:37:28 +00:00
/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
*
2017-06-02 21:37:28 +00:00
* NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
*/
class ExpressionAnalyzer : private ExpressionAnalyzerData, private boost::noncopyable
{
2016-01-13 00:32:59 +00:00
private:
/// Extracts settings to enlight which are used (and avoid copy of others).
struct ExtractedSettings
{
2018-10-19 15:42:47 +00:00
/// for QueryNormalizer
const UInt64 max_ast_depth;
const UInt64 max_expanded_ast_elements;
const String count_distinct_implementation;
/// for PredicateExpressionsOptimizer
const bool enable_optimize_predicate_expression;
/// for ExpressionAnalyzer
const bool asterisk_left_columns_only;
const bool use_index_for_in_with_subqueries;
const bool join_use_nulls;
const SizeLimits size_limits_for_set;
const SizeLimits size_limits_for_join;
const String join_default_strictness;
const UInt64 min_equality_disjunction_chain_length;
ExtractedSettings(const Settings & settings)
2018-10-19 15:42:47 +00:00
: max_ast_depth(settings.max_ast_depth),
max_expanded_ast_elements(settings.max_expanded_ast_elements),
count_distinct_implementation(settings.count_distinct_implementation),
enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression),
asterisk_left_columns_only(settings.asterisk_left_columns_only),
use_index_for_in_with_subqueries(settings.use_index_for_in_with_subqueries),
join_use_nulls(settings.join_use_nulls),
size_limits_for_set(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode),
size_limits_for_join(settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode),
join_default_strictness(settings.join_default_strictness.toString()),
2018-10-19 15:42:47 +00:00
min_equality_disjunction_chain_length(settings.optimize_min_equality_disjunction_chain_length)
{}
};
2016-01-13 00:32:59 +00:00
public:
ExpressionAnalyzer(
2018-08-27 17:58:43 +00:00
const ASTPtr & query_,
const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
const Context & context_,
const NamesAndTypesList & additional_source_columns = {},
const NameSet & required_result_columns_ = {},
size_t subquery_depth_ = 0,
2018-02-08 17:10:35 +00:00
bool do_global_ = false,
const SubqueriesForSets & subqueries_for_set_ = {});
2017-06-02 21:37:28 +00:00
/// Does the expression have aggregate functions or a GROUP BY or HAVING section.
bool hasAggregation() const { return has_aggregation; }
2017-06-02 21:37:28 +00:00
/// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY.
void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const;
2017-06-02 21:37:28 +00:00
/** Get a set of columns that are enough to read from the table to evaluate the expression.
* Columns added from another table by JOIN are not counted.
*/
2018-12-26 18:56:21 +00:00
Names getRequiredSourceColumns() const { return source_columns.getNames(); }
2017-06-02 21:37:28 +00:00
/** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
*
2017-06-02 21:37:28 +00:00
* Example usage:
* ExpressionActionsChain chain;
* analyzer.appendWhere(chain);
* chain.addStep();
* analyzer.appendSelect(chain);
* analyzer.appendOrderBy(chain);
* chain.finalize();
*
2017-06-02 21:37:28 +00:00
* If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
* shouldn't be executed, they are only needed to get a list of columns with their types.
*/
2017-06-02 21:37:28 +00:00
/// Before aggregation:
bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
bool appendJoin(ExpressionActionsChain & chain, bool only_types);
2018-04-23 19:05:46 +00:00
/// remove_filter is set in ExpressionActionsChain::finalize();
/// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
2017-06-02 21:37:28 +00:00
/// After aggregation:
bool appendHaving(ExpressionActionsChain & chain, bool only_types);
void appendSelect(ExpressionActionsChain & chain, bool only_types);
bool appendOrderBy(ExpressionActionsChain & chain, bool only_types);
bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
2017-06-02 21:37:28 +00:00
/// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
2017-12-01 21:13:25 +00:00
void appendProjectResult(ExpressionActionsChain & chain) const;
void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);
2017-06-02 21:37:28 +00:00
/// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
/// If add_aliases, only the calculated values in the desired order and add aliases.
/// If also project_result, than only aliases remain in the output block.
2017-06-02 21:37:28 +00:00
/// Otherwise, only temporary columns will be deleted from the block.
ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true);
2017-06-02 21:37:28 +00:00
/// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
/// Does not execute subqueries.
ExpressionActionsPtr getConstActions();
2017-06-02 21:37:28 +00:00
/** Sets that require a subquery to be create.
* Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
* That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
* and create all the returned sets before performing the actions.
*/
const SubqueriesForSets & getSubqueriesForSets() const { return subqueries_for_sets; }
const PreparedSets & getPreparedSets() const { return prepared_sets; }
2017-06-02 21:37:28 +00:00
/** Tables that will need to be sent to remote servers for distributed query processing.
*/
const Tables & getExternalTables() const { return external_tables; }
/// Get intermediates for tests
const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
2017-06-02 21:37:28 +00:00
/// Create Set-s that we can from IN section to use the index on them.
void makeSetsForIndex();
2018-08-20 15:49:39 +00:00
bool isRewriteSubqueriesPredicate() { return rewrite_subqueries; }
2018-04-24 07:16:39 +00:00
bool hasGlobalSubqueries() { return has_global_subqueries; }
2014-06-12 21:12:47 +00:00
private:
2018-08-27 17:58:43 +00:00
ASTPtr query;
const Context & context;
const ExtractedSettings settings;
StoragePtr storage; /// The main table in FROM clause, if exists.
size_t subquery_depth;
bool do_global; /// Do I need to prepare for execution global subqueries when analyzing the query.
SyntaxAnalyzerResultPtr syntax;
2018-11-09 17:23:48 +00:00
const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; }
2017-06-02 21:37:28 +00:00
/** Remove all unnecessary columns from the list of all available columns of the table (`columns`).
* At the same time, form a set of columns added by JOIN (`columns_added_by_join`).
*/
void collectUsedColumns();
2017-06-02 21:37:28 +00:00
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables();
void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
void addJoinAction(ExpressionActionsPtr & actions, bool only_types) const;
/// If ast is ASTSelectQuery with JOIN, add actions for JOIN key columns.
void getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, ExpressionActionsPtr & actions);
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
void getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries);
2017-06-02 21:37:28 +00:00
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
* Create a set of columns aggregated_columns resulting after the aggregation, if any,
* or after all the actions that are normally performed before aggregation.
* Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
*/
void analyzeAggregation();
void getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions);
void assertNoAggregates(const ASTPtr & ast, const char * description);
2017-06-02 21:37:28 +00:00
/// columns - the columns that are present before the transformations begin.
void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const;
void assertSelect() const;
void assertAggregation() const;
2018-01-23 08:18:12 +00:00
/**
2018-12-19 03:43:37 +00:00
* Create Set from a subuquery or a table expression in the query. The created set is suitable for using the index.
2018-01-23 08:18:12 +00:00
* The set will not be created if its size hits the limit.
*/
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
void makeSetsForIndexImpl(const ASTPtr & node);
2018-10-17 11:06:46 +00:00
bool isRemoteStorage() const;
};
}