2013-05-24 10:49:19 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/AggregateDescription.h>
|
|
|
|
#include <Interpreters/Settings.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Interpreters/ActionsVisitor.h>
|
2018-11-02 18:53:23 +00:00
|
|
|
#include <Interpreters/SyntaxAnalyzer.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2018-10-18 15:03:14 +00:00
|
|
|
class Block;
|
2017-01-14 09:00:19 +00:00
|
|
|
class Context;
|
2014-07-06 19:48:39 +00:00
|
|
|
|
2016-01-13 00:32:59 +00:00
|
|
|
struct ExpressionActionsChain;
|
2018-10-18 15:03:14 +00:00
|
|
|
class ExpressionActions;
|
|
|
|
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
2016-01-13 00:32:59 +00:00
|
|
|
|
2017-01-14 09:00:19 +00:00
|
|
|
class IAST;
|
|
|
|
using ASTPtr = std::shared_ptr<IAST>;
|
2018-10-18 15:03:14 +00:00
|
|
|
using ASTs = std::vector<ASTPtr>;
|
|
|
|
struct ASTTableJoin;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
|
|
|
class IBlockInputStream;
|
|
|
|
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
|
|
|
|
|
|
|
|
class IStorage;
|
|
|
|
using StoragePtr = std::shared_ptr<IStorage>;
|
|
|
|
using Tables = std::map<String, StoragePtr>;
|
|
|
|
|
|
|
|
class ASTFunction;
|
|
|
|
class ASTExpressionList;
|
|
|
|
class ASTSelectQuery;
|
|
|
|
|
2014-07-06 19:48:39 +00:00
|
|
|
|
2018-10-18 15:03:14 +00:00
|
|
|
/// ExpressionAnalyzers sources, intermediates and results. It splits data and logic, allows to test them separately.
|
|
|
|
/// If you are not writing a test you probably don't need it. Use ExpressionAnalyzer itself.
|
|
|
|
struct ExpressionAnalyzerData
|
2018-04-24 07:16:39 +00:00
|
|
|
{
|
2018-10-18 15:03:14 +00:00
|
|
|
/// Original columns.
|
|
|
|
/// First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted.
|
|
|
|
NamesAndTypesList source_columns;
|
|
|
|
|
|
|
|
/// If non-empty, ignore all expressions in not from this list.
|
|
|
|
Names required_result_columns;
|
|
|
|
|
|
|
|
SubqueriesForSets subqueries_for_sets;
|
|
|
|
PreparedSets prepared_sets;
|
|
|
|
|
|
|
|
/// Columns after ARRAY JOIN, JOIN, and/or aggregation.
|
|
|
|
NamesAndTypesList aggregated_columns;
|
|
|
|
NamesAndTypesList array_join_columns;
|
|
|
|
|
|
|
|
bool has_aggregation = false;
|
|
|
|
NamesAndTypesList aggregation_keys;
|
|
|
|
AggregateDescriptions aggregate_descriptions;
|
|
|
|
|
|
|
|
bool has_global_subqueries = false;
|
|
|
|
|
|
|
|
/// Which column is needed to be ARRAY-JOIN'ed to get the specified.
|
|
|
|
/// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
|
|
|
|
NameToNameMap array_join_result_to_source;
|
|
|
|
|
|
|
|
/// For the ARRAY JOIN section, mapping from the alias to the full column name.
|
|
|
|
/// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here.
|
|
|
|
NameToNameMap array_join_alias_to_name;
|
|
|
|
|
|
|
|
/// The backward mapping for array_join_alias_to_name.
|
|
|
|
NameToNameMap array_join_name_to_alias;
|
|
|
|
|
|
|
|
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
|
|
|
|
Tables external_tables;
|
|
|
|
|
|
|
|
/// Predicate optimizer overrides the sub queries
|
|
|
|
bool rewrite_subqueries = false;
|
|
|
|
|
2018-11-08 09:00:25 +00:00
|
|
|
/// Columns will be added to block by join.
|
|
|
|
JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns
|
|
|
|
|
|
|
|
/// Actions which need to be calculated on joined block.
|
|
|
|
ExpressionActionsPtr joined_block_actions;
|
|
|
|
|
|
|
|
/// Columns which will be used in query from joined table. Duplicate names are qualified.
|
|
|
|
NameSet required_columns_from_joined_table;
|
|
|
|
|
|
|
|
/// Such columns will be copied from left join keys during join.
|
|
|
|
/// Example: select right from tab1 join tab2 on left + 1 = right
|
|
|
|
NameSet columns_added_by_join_from_right_keys;
|
|
|
|
|
2018-10-18 15:03:14 +00:00
|
|
|
protected:
|
|
|
|
ExpressionAnalyzerData(const NamesAndTypesList & source_columns_,
|
|
|
|
const Names & required_result_columns_,
|
|
|
|
const SubqueriesForSets & subqueries_for_sets_)
|
|
|
|
: source_columns(source_columns_),
|
|
|
|
required_result_columns(required_result_columns_),
|
|
|
|
subqueries_for_sets(subqueries_for_sets_)
|
|
|
|
{}
|
|
|
|
};
|
2018-04-24 07:16:39 +00:00
|
|
|
|
2014-07-06 19:48:39 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
|
2014-03-19 11:44:41 +00:00
|
|
|
*
|
2017-06-02 21:37:28 +00:00
|
|
|
* NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
|
2013-05-24 10:49:19 +00:00
|
|
|
*/
|
2018-10-18 15:03:14 +00:00
|
|
|
class ExpressionAnalyzer : private ExpressionAnalyzerData, private boost::noncopyable
|
2013-05-24 10:49:19 +00:00
|
|
|
{
|
2016-01-13 00:32:59 +00:00
|
|
|
private:
|
2018-10-18 15:03:14 +00:00
|
|
|
/// Extracts settings to enlight which are used (and avoid copy of others).
|
|
|
|
struct ExtractedSettings
|
|
|
|
{
|
2018-10-19 15:42:47 +00:00
|
|
|
/// for QueryNormalizer
|
|
|
|
const UInt64 max_ast_depth;
|
|
|
|
const UInt64 max_expanded_ast_elements;
|
|
|
|
const String count_distinct_implementation;
|
|
|
|
|
|
|
|
/// for PredicateExpressionsOptimizer
|
|
|
|
const bool enable_optimize_predicate_expression;
|
|
|
|
|
|
|
|
/// for ExpressionAnalyzer
|
2018-10-18 15:03:14 +00:00
|
|
|
const bool asterisk_left_columns_only;
|
|
|
|
const bool use_index_for_in_with_subqueries;
|
|
|
|
const bool enable_conditional_computation;
|
|
|
|
const bool join_use_nulls;
|
|
|
|
const SizeLimits size_limits_for_set;
|
|
|
|
const SizeLimits size_limits_for_join;
|
|
|
|
const String join_default_strictness;
|
|
|
|
const UInt64 min_equality_disjunction_chain_length;
|
|
|
|
|
|
|
|
ExtractedSettings(const Settings & settings)
|
2018-10-19 15:42:47 +00:00
|
|
|
: max_ast_depth(settings.max_ast_depth),
|
|
|
|
max_expanded_ast_elements(settings.max_expanded_ast_elements),
|
|
|
|
count_distinct_implementation(settings.count_distinct_implementation),
|
|
|
|
enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression),
|
|
|
|
asterisk_left_columns_only(settings.asterisk_left_columns_only),
|
2018-10-18 15:03:14 +00:00
|
|
|
use_index_for_in_with_subqueries(settings.use_index_for_in_with_subqueries),
|
|
|
|
enable_conditional_computation(settings.enable_conditional_computation),
|
|
|
|
join_use_nulls(settings.join_use_nulls),
|
|
|
|
size_limits_for_set(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode),
|
|
|
|
size_limits_for_join(settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode),
|
|
|
|
join_default_strictness(settings.join_default_strictness.toString()),
|
2018-10-19 15:42:47 +00:00
|
|
|
min_equality_disjunction_chain_length(settings.optimize_min_equality_disjunction_chain_length)
|
2018-10-18 15:03:14 +00:00
|
|
|
{}
|
|
|
|
};
|
2016-01-13 00:32:59 +00:00
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionAnalyzer(
|
2018-08-27 17:58:43 +00:00
|
|
|
const ASTPtr & query_,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Context & context_,
|
2017-09-08 03:47:27 +00:00
|
|
|
const StoragePtr & storage_,
|
2018-02-28 01:29:55 +00:00
|
|
|
const NamesAndTypesList & source_columns_ = {},
|
2018-02-26 09:05:06 +00:00
|
|
|
const Names & required_result_columns_ = {},
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t subquery_depth_ = 0,
|
2018-02-08 17:10:35 +00:00
|
|
|
bool do_global_ = false,
|
|
|
|
const SubqueriesForSets & subqueries_for_set_ = {});
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Does the expression have aggregate functions or a GROUP BY or HAVING section.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool hasAggregation() const { return has_aggregation; }
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY.
|
2017-04-01 07:20:54 +00:00
|
|
|
void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const;
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Get a set of columns that are enough to read from the table to evaluate the expression.
|
|
|
|
* Columns added from another table by JOIN are not counted.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2018-02-26 09:05:06 +00:00
|
|
|
Names getRequiredSourceColumns() const;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-06-02 21:37:28 +00:00
|
|
|
* Example usage:
|
2017-04-01 07:20:54 +00:00
|
|
|
* ExpressionActionsChain chain;
|
|
|
|
* analyzer.appendWhere(chain);
|
|
|
|
* chain.addStep();
|
|
|
|
* analyzer.appendSelect(chain);
|
|
|
|
* analyzer.appendOrderBy(chain);
|
|
|
|
* chain.finalize();
|
|
|
|
*
|
2017-06-02 21:37:28 +00:00
|
|
|
* If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
|
|
|
|
* shouldn't be executed, they are only needed to get a list of columns with their types.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Before aggregation:
|
2017-04-01 07:20:54 +00:00
|
|
|
bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
|
|
|
|
bool appendJoin(ExpressionActionsChain & chain, bool only_types);
|
2018-04-23 19:05:46 +00:00
|
|
|
/// remove_filter is set in ExpressionActionsChain::finalize();
|
2018-10-04 08:58:19 +00:00
|
|
|
/// sampling_expression and primary_expression are needed in order to not remove columns are used in it.
|
|
|
|
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types,
|
|
|
|
const ASTPtr & sampling_expression, const ASTPtr & primary_expression);
|
2017-04-01 07:20:54 +00:00
|
|
|
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
|
|
|
|
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
|
|
|
|
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// After aggregation:
|
2017-04-01 07:20:54 +00:00
|
|
|
bool appendHaving(ExpressionActionsChain & chain, bool only_types);
|
|
|
|
void appendSelect(ExpressionActionsChain & chain, bool only_types);
|
|
|
|
bool appendOrderBy(ExpressionActionsChain & chain, bool only_types);
|
2018-03-01 05:24:56 +00:00
|
|
|
bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
|
2017-12-01 21:13:25 +00:00
|
|
|
void appendProjectResult(ExpressionActionsChain & chain) const;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-04 13:45:39 +00:00
|
|
|
void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);
|
2018-09-03 13:36:58 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
|
2018-09-03 17:24:46 +00:00
|
|
|
/// If add_aliases, only the calculated values in the desired order and add aliases.
|
|
|
|
/// If also project_result, than only aliases remain in the output block.
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Otherwise, only temporary columns will be deleted from the block.
|
2018-09-03 17:24:46 +00:00
|
|
|
ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
|
|
|
|
/// Does not execute subqueries.
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsPtr getConstActions();
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Sets that require a subquery to be create.
|
|
|
|
* Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
|
|
|
|
* That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
|
|
|
|
* and create all the returned sets before performing the actions.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2018-09-04 11:38:41 +00:00
|
|
|
const SubqueriesForSets & getSubqueriesForSets() const { return subqueries_for_sets; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-04 11:38:41 +00:00
|
|
|
const PreparedSets & getPreparedSets() const { return prepared_sets; }
|
2017-07-14 00:33:37 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Tables that will need to be sent to remote servers for distributed query processing.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
const Tables & getExternalTables() const { return external_tables; }
|
|
|
|
|
2018-10-19 15:33:40 +00:00
|
|
|
/// Get intermediates for tests
|
|
|
|
const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
|
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Create Set-s that we can from IN section to use the index on them.
|
2017-04-01 07:20:54 +00:00
|
|
|
void makeSetsForIndex();
|
2014-06-12 18:41:09 +00:00
|
|
|
|
2018-08-20 15:49:39 +00:00
|
|
|
bool isRewriteSubqueriesPredicate() { return rewrite_subqueries; }
|
2018-04-24 07:16:39 +00:00
|
|
|
|
2018-10-10 17:07:21 +00:00
|
|
|
bool hasGlobalSubqueries() { return has_global_subqueries; }
|
|
|
|
|
2014-06-12 21:12:47 +00:00
|
|
|
private:
|
2018-08-27 17:58:43 +00:00
|
|
|
ASTPtr query;
|
2017-04-01 07:20:54 +00:00
|
|
|
ASTSelectQuery * select_query;
|
|
|
|
const Context & context;
|
2018-10-18 15:03:14 +00:00
|
|
|
const ExtractedSettings settings;
|
|
|
|
StoragePtr storage; /// The main table in FROM clause, if exists.
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t subquery_depth;
|
2018-10-18 15:03:14 +00:00
|
|
|
bool do_global; /// Do I need to prepare for execution global subqueries when analyzing the query.
|
2017-07-14 00:33:37 +00:00
|
|
|
|
2018-07-30 13:57:50 +00:00
|
|
|
AnalyzedJoin analyzed_join;
|
2014-06-12 04:04:47 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Remove all unnecessary columns from the list of all available columns of the table (`columns`).
|
2018-02-26 09:05:06 +00:00
|
|
|
* At the same time, form a set of unknown columns (`unknown_required_source_columns`),
|
2017-06-02 21:37:28 +00:00
|
|
|
* as well as the columns added by JOIN (`columns_added_by_join`).
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void collectUsedColumns();
|
2014-06-12 18:41:09 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
|
2017-04-01 07:20:54 +00:00
|
|
|
void initGlobalSubqueriesAndExternalTables();
|
2014-07-04 20:30:06 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void addMultipleArrayJoinAction(ExpressionActionsPtr & actions) const;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void addJoinAction(ExpressionActionsPtr & actions, bool only_types) const;
|
2014-06-13 02:05:05 +00:00
|
|
|
|
2018-04-24 07:16:39 +00:00
|
|
|
bool isThereArrayJoin(const ASTPtr & ast);
|
|
|
|
|
2018-07-24 12:41:35 +00:00
|
|
|
/// If ast is ASTSelectQuery with JOIN, add actions for JOIN key columns.
|
2018-10-16 12:34:20 +00:00
|
|
|
void getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, ExpressionActionsPtr & actions);
|
2018-07-24 12:41:35 +00:00
|
|
|
|
2018-10-16 12:34:20 +00:00
|
|
|
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
2017-09-08 03:47:27 +00:00
|
|
|
void getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries);
|
2014-06-12 18:41:09 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
|
|
|
|
* Create a set of columns aggregated_columns resulting after the aggregation, if any,
|
|
|
|
* or after all the actions that are normally performed before aggregation.
|
|
|
|
* Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void analyzeAggregation();
|
|
|
|
void getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions);
|
|
|
|
void assertNoAggregates(const ASTPtr & ast, const char * description);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// columns - the columns that are present before the transformations begin.
|
2017-12-25 21:57:29 +00:00
|
|
|
void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const;
|
2014-06-12 18:41:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void assertSelect() const;
|
|
|
|
void assertAggregation() const;
|
2014-03-31 14:49:43 +00:00
|
|
|
|
2018-01-23 08:18:12 +00:00
|
|
|
/**
|
|
|
|
* Create Set from a subuqery or a table expression in the query. The created set is suitable for using the index.
|
|
|
|
* The set will not be created if its size hits the limit.
|
|
|
|
*/
|
2018-07-02 19:37:04 +00:00
|
|
|
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
|
2018-01-21 07:30:07 +00:00
|
|
|
|
2017-07-14 00:33:37 +00:00
|
|
|
void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block);
|
2017-06-19 02:14:18 +00:00
|
|
|
|
2018-10-17 11:06:46 +00:00
|
|
|
bool isRemoteStorage() const;
|
2013-05-24 10:49:19 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|