2015-09-04 21:39:10 +00:00
|
|
|
#include <Poco/Util/Application.h>
|
2016-11-20 12:43:20 +00:00
|
|
|
#include <Poco/String.h>
|
2015-09-04 21:39:10 +00:00
|
|
|
|
2018-10-18 15:03:14 +00:00
|
|
|
#include <Core/Block.h>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
2017-06-19 03:05:05 +00:00
|
|
|
#include <Parsers/ASTQualifiedAsterisk.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2018-02-26 09:05:06 +00:00
|
|
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTSubquery.h>
|
|
|
|
#include <Parsers/ASTOrderByElement.h>
|
2018-02-05 14:03:01 +00:00
|
|
|
#include <Parsers/formatAST.h>
|
2018-10-09 14:32:11 +00:00
|
|
|
#include <Parsers/DumpASTNode.h>
|
2013-05-24 10:49:19 +00:00
|
|
|
|
2017-12-25 00:38:45 +00:00
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
2017-12-25 18:58:39 +00:00
|
|
|
#include <DataTypes/NestedUtils.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2019-04-05 16:45:41 +00:00
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2018-08-28 14:40:07 +00:00
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2013-05-24 10:49:19 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
|
|
|
#include <Interpreters/ExpressionActions.h>
|
|
|
|
#include <Interpreters/LogicalExpressionsOptimizer.h>
|
2019-09-26 10:41:33 +00:00
|
|
|
#include <Interpreters/ExternalDictionariesLoader.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/Set.h>
|
2020-04-07 09:48:47 +00:00
|
|
|
#include <Interpreters/TableJoin.h>
|
2020-02-11 18:27:52 +00:00
|
|
|
#include <Interpreters/JoinSwitcher.h>
|
2020-04-07 09:48:47 +00:00
|
|
|
#include <Interpreters/HashJoin.h>
|
2020-02-17 17:21:03 +00:00
|
|
|
#include <Interpreters/MergeJoin.h>
|
2020-04-08 18:59:52 +00:00
|
|
|
#include <Interpreters/DictionaryReader.h>
|
2013-05-24 10:49:19 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
2017-07-10 23:30:17 +00:00
|
|
|
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
|
2015-04-16 06:12:35 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/StorageDistributed.h>
|
2020-04-08 18:59:52 +00:00
|
|
|
#include <Storages/StorageDictionary.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/StorageJoin.h>
|
2014-03-14 14:52:48 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/copyData.h>
|
2019-05-17 14:34:25 +00:00
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2014-03-14 14:52:48 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Dictionaries/IDictionary.h>
|
2020-04-08 19:58:27 +00:00
|
|
|
#include <Dictionaries/DictionaryStructure.h>
|
2015-05-13 16:11:07 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2018-01-15 19:07:47 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
2014-06-26 00:58:14 +00:00
|
|
|
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/range.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataTypes/DataTypeFactory.h>
|
2018-02-06 19:34:53 +00:00
|
|
|
#include <Functions/FunctionsMiscellaneous.h>
|
2018-07-26 12:58:23 +00:00
|
|
|
#include <Parsers/ExpressionListParsers.h>
|
|
|
|
#include <Parsers/parseQuery.h>
|
2018-07-27 16:21:43 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Interpreters/interpretSubquery.h>
|
2018-10-30 16:31:21 +00:00
|
|
|
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
2019-10-23 13:59:03 +00:00
|
|
|
#include <Interpreters/misc.h>
|
2018-10-16 19:00:05 +00:00
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Interpreters/ActionsVisitor.h>
|
2018-10-16 19:00:05 +00:00
|
|
|
|
|
|
|
#include <Interpreters/GlobalSubqueriesVisitor.h>
|
2019-08-12 19:27:09 +00:00
|
|
|
#include <Interpreters/GetAggregatesVisitor.h>
|
2013-05-24 10:49:19 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2018-10-10 16:23:27 +00:00
|
|
|
using LogAST = DebugASTLog<false>; /// set to true to enable logs
|
2018-10-09 14:32:11 +00:00
|
|
|
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int UNKNOWN_TYPE_OF_AST_NODE;
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int UNKNOWN_IDENTIFIER;
|
2020-02-10 15:50:12 +00:00
|
|
|
extern const int ILLEGAL_PREWHERE;
|
2019-08-16 16:50:30 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
2020-02-10 15:50:12 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
/// Check if there is an ignore function. It's used for disabling constant folding in query
|
|
|
|
/// predicates because some performance tests use ignore function as a non-optimize guard.
|
2020-02-10 19:55:13 +00:00
|
|
|
bool allowEarlyConstantFolding(const ExpressionActions & actions, const Settings & settings)
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
2020-02-10 19:55:13 +00:00
|
|
|
if (!settings.enable_early_constant_folding)
|
2020-02-10 15:50:12 +00:00
|
|
|
return false;
|
|
|
|
|
2020-04-22 06:01:33 +00:00
|
|
|
for (const auto & action : actions.getActions())
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
if (action.type == action.APPLY_FUNCTION && action.function_base)
|
|
|
|
{
|
|
|
|
auto name = action.function_base->getName();
|
|
|
|
if (name == "ignore")
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
bool sanitizeBlock(Block & block)
|
|
|
|
{
|
|
|
|
for (auto & col : block)
|
|
|
|
{
|
|
|
|
if (!col.column)
|
|
|
|
{
|
|
|
|
if (isNotCreatable(col.type->getTypeId()))
|
|
|
|
return false;
|
|
|
|
col.column = col.type->createColumn();
|
|
|
|
}
|
|
|
|
else if (isColumnConst(*col.column) && !col.column->empty())
|
|
|
|
col.column = col.column->cloneEmpty();
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-01-14 09:00:19 +00:00
|
|
|
ExpressionAnalyzer::ExpressionAnalyzer(
|
2018-08-27 17:58:43 +00:00
|
|
|
const ASTPtr & query_,
|
2018-11-08 15:43:14 +00:00
|
|
|
const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Context & context_,
|
|
|
|
size_t subquery_depth_,
|
2019-08-13 13:48:09 +00:00
|
|
|
bool do_global)
|
2019-08-14 19:30:30 +00:00
|
|
|
: query(query_), context(context_), settings(context.getSettings())
|
2019-08-13 13:48:09 +00:00
|
|
|
, subquery_depth(subquery_depth_)
|
2018-11-09 17:23:48 +00:00
|
|
|
, syntax(syntax_analyzer_result_)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/// external_tables, subqueries_for_sets for global subqueries.
|
|
|
|
/// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
|
2019-08-13 13:48:09 +00:00
|
|
|
initGlobalSubqueriesAndExternalTables(do_global);
|
2015-10-15 11:07:39 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns.
|
2017-04-02 17:37:49 +00:00
|
|
|
/// This analysis should be performed after processing global subqueries, because otherwise,
|
|
|
|
/// if the aggregate function contains a global subquery, then `analyzeAggregation` method will save
|
|
|
|
/// in `aggregate_descriptions` the information about the parameters of this aggregate function, among which
|
|
|
|
/// global subquery. Then, when you call `initGlobalSubqueriesAndExternalTables` method, this
|
|
|
|
/// the global subquery will be replaced with a temporary table, resulting in aggregate_descriptions
|
|
|
|
/// will contain out-of-date information, which will lead to an error when the query is executed.
|
2017-04-01 07:20:54 +00:00
|
|
|
analyzeAggregation();
|
2014-07-04 19:13:38 +00:00
|
|
|
}
|
|
|
|
|
2018-10-17 11:06:46 +00:00
|
|
|
bool ExpressionAnalyzer::isRemoteStorage() const
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-09 14:50:04 +00:00
|
|
|
return storage() && storage()->isRemote();
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-04 19:13:38 +00:00
|
|
|
void ExpressionAnalyzer::analyzeAggregation()
|
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions),
|
|
|
|
* as well as a set of columns obtained after the aggregation, if any,
|
|
|
|
* or after all the actions that are usually performed before aggregation (aggregated_columns).
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-04-02 17:37:49 +00:00
|
|
|
* Everything below (compiling temporary ExpressionActions) - only for the purpose of query analysis (type output).
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
|
2019-03-11 13:22:51 +00:00
|
|
|
auto * select_query = query->as<ASTSelectQuery>();
|
2019-03-11 12:49:39 +00:00
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (select_query)
|
|
|
|
{
|
2018-12-19 13:13:51 +00:00
|
|
|
bool is_array_join_left;
|
2020-03-23 02:12:31 +00:00
|
|
|
ASTPtr array_join_expression_list = select_query->arrayJoinExpressionList(is_array_join_left);
|
2018-12-19 13:13:51 +00:00
|
|
|
if (array_join_expression_list)
|
|
|
|
{
|
2020-01-27 21:57:44 +00:00
|
|
|
getRootActionsNoMakeSet(array_join_expression_list, true, temp_actions, false);
|
2018-12-19 13:13:51 +00:00
|
|
|
addMultipleArrayJoinAction(temp_actions, is_array_join_left);
|
2019-07-08 20:06:17 +00:00
|
|
|
|
|
|
|
array_join_columns.clear();
|
|
|
|
for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList())
|
|
|
|
if (syntax->array_join_result_to_source.count(column.name))
|
|
|
|
array_join_columns.emplace_back(column);
|
2018-12-19 13:13:51 +00:00
|
|
|
}
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const ASTTablesInSelectQueryElement * join = select_query->join();
|
|
|
|
if (join)
|
|
|
|
{
|
2020-01-27 21:57:44 +00:00
|
|
|
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, temp_actions, false);
|
2019-09-02 19:58:45 +00:00
|
|
|
addJoinAction(temp_actions);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-13 12:39:03 +00:00
|
|
|
has_aggregation = makeAggregateDescriptions(temp_actions);
|
|
|
|
if (select_query && (select_query->groupBy() || select_query->having()))
|
|
|
|
has_aggregation = true;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (has_aggregation)
|
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
getSelectQuery(); /// assertSelect()
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// Find out aggregation keys.
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select_query->groupBy())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
NameSet unique_keys;
|
2019-04-09 14:22:35 +00:00
|
|
|
ASTs & group_asts = select_query->groupBy()->children;
|
2017-04-01 07:20:54 +00:00
|
|
|
for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
|
|
|
|
{
|
|
|
|
ssize_t size = group_asts.size();
|
2020-01-27 21:57:44 +00:00
|
|
|
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
const auto & column_name = group_asts[i]->getColumnName();
|
|
|
|
const auto & block = temp_actions->getSampleBlock();
|
|
|
|
|
|
|
|
if (!block.has(column_name))
|
|
|
|
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
|
|
|
|
|
|
const auto & col = block.getByName(column_name);
|
|
|
|
|
|
|
|
/// Constant expressions have non-null column pointer at this stage.
|
2019-06-27 19:28:52 +00:00
|
|
|
if (col.column && isColumnConst(*col.column))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// But don't remove last key column if no aggregate functions, otherwise aggregation will not work.
|
|
|
|
if (!aggregate_descriptions.empty() || size > 1)
|
|
|
|
{
|
|
|
|
if (i + 1 < static_cast<ssize_t>(size))
|
|
|
|
group_asts[i] = std::move(group_asts.back());
|
|
|
|
|
|
|
|
group_asts.pop_back();
|
|
|
|
|
|
|
|
--i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-25 21:57:29 +00:00
|
|
|
NameAndTypePair key{column_name, col.type};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// Aggregation keys are uniqued.
|
|
|
|
if (!unique_keys.count(key.name))
|
|
|
|
{
|
|
|
|
unique_keys.insert(key.name);
|
2018-04-04 18:56:30 +00:00
|
|
|
aggregation_keys.push_back(key);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// Key is no longer needed, therefore we can save a little by moving it.
|
2018-04-04 18:56:30 +00:00
|
|
|
aggregated_columns.push_back(std::move(key));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (group_asts.empty())
|
|
|
|
{
|
2019-04-09 14:22:35 +00:00
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
|
2020-03-08 23:48:08 +00:00
|
|
|
has_aggregation = select_query->having() || !aggregate_descriptions.empty();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-08 23:48:08 +00:00
|
|
|
for (const auto & desc : aggregate_descriptions)
|
2018-04-04 18:56:30 +00:00
|
|
|
aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType());
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-04-04 18:56:30 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
|
|
|
|
}
|
2013-05-24 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-08-13 13:48:09 +00:00
|
|
|
void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
|
2014-07-04 20:30:06 +00:00
|
|
|
{
|
2018-10-17 10:59:05 +00:00
|
|
|
if (do_global)
|
|
|
|
{
|
2018-12-10 13:02:45 +00:00
|
|
|
GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(),
|
2018-10-17 10:59:05 +00:00
|
|
|
external_tables, subqueries_for_sets, has_global_subqueries);
|
2018-12-07 15:36:54 +00:00
|
|
|
GlobalSubqueriesVisitor(subqueries_data).visit(query);
|
2018-10-17 10:59:05 +00:00
|
|
|
}
|
2015-11-04 22:02:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-20 13:15:17 +00:00
|
|
|
NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const
|
|
|
|
{
|
|
|
|
auto result_columns = sourceColumns();
|
|
|
|
result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end());
|
|
|
|
result_columns.insert(result_columns.end(),
|
|
|
|
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
|
|
|
|
return result_columns;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
|
2018-01-21 07:30:07 +00:00
|
|
|
{
|
2019-01-22 12:33:56 +00:00
|
|
|
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
|
2019-11-22 15:45:03 +00:00
|
|
|
|
2019-01-22 12:33:56 +00:00
|
|
|
if (prepared_sets.count(set_key))
|
|
|
|
return; /// Already prepared.
|
2018-01-21 07:30:07 +00:00
|
|
|
|
2019-11-22 15:45:03 +00:00
|
|
|
if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name))
|
|
|
|
{
|
|
|
|
prepared_sets.insert({set_key, set_ptr_from_storage_set});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-02-20 02:56:20 +00:00
|
|
|
auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, {}, query_options);
|
2019-01-22 12:33:56 +00:00
|
|
|
BlockIO res = interpreter_subquery->execute();
|
2018-01-21 07:30:07 +00:00
|
|
|
|
2020-04-09 08:27:55 +00:00
|
|
|
SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true, context.getSettingsRef().transform_null_in);
|
2018-04-19 21:34:04 +00:00
|
|
|
set->setHeader(res.in->getHeader());
|
2019-01-22 12:33:56 +00:00
|
|
|
|
2019-07-11 13:51:54 +00:00
|
|
|
res.in->readPrefix();
|
2018-01-21 07:30:07 +00:00
|
|
|
while (Block block = res.in->read())
|
|
|
|
{
|
|
|
|
/// If the limits have been exceeded, give up and let the default subquery processing actions take place.
|
2018-07-02 18:57:14 +00:00
|
|
|
if (!set->insertFromBlock(block))
|
2018-01-21 07:30:07 +00:00
|
|
|
return;
|
|
|
|
}
|
2019-11-01 10:58:29 +00:00
|
|
|
|
|
|
|
set->finishInsert();
|
2019-07-11 13:51:54 +00:00
|
|
|
res.in->readSuffix();
|
2018-01-21 07:30:07 +00:00
|
|
|
|
2019-01-22 12:33:56 +00:00
|
|
|
prepared_sets[set_key] = std::move(set);
|
2018-01-21 07:30:07 +00:00
|
|
|
}
|
|
|
|
|
2019-11-22 15:45:03 +00:00
|
|
|
SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name)
|
2019-11-13 12:43:55 +00:00
|
|
|
{
|
|
|
|
const auto * table = subquery_or_table_name->as<ASTIdentifier>();
|
|
|
|
if (!table)
|
2019-11-22 15:45:03 +00:00
|
|
|
return nullptr;
|
2020-03-13 10:30:55 +00:00
|
|
|
auto table_id = context.resolveStorageID(subquery_or_table_name);
|
2020-03-06 20:38:19 +00:00
|
|
|
const auto storage = DatabaseCatalog::instance().getTable(table_id);
|
2019-11-22 15:45:03 +00:00
|
|
|
if (storage->getName() != "Set")
|
|
|
|
return nullptr;
|
|
|
|
const auto storage_set = std::dynamic_pointer_cast<StorageSet>(storage);
|
|
|
|
return storage_set->getSet();
|
2019-11-13 12:43:55 +00:00
|
|
|
}
|
|
|
|
|
2018-01-21 07:30:07 +00:00
|
|
|
|
2020-01-11 09:50:41 +00:00
|
|
|
/// Performance optimisation for IN() if storage supports it.
|
2019-08-14 19:30:30 +00:00
|
|
|
void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
|
2014-03-31 14:49:43 +00:00
|
|
|
{
|
2019-08-12 19:27:09 +00:00
|
|
|
if (!node || !storage() || !storage()->supportsIndexForIn())
|
|
|
|
return;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (auto & child : node->children)
|
2018-02-14 17:39:16 +00:00
|
|
|
{
|
2019-01-22 12:33:56 +00:00
|
|
|
/// Don't descend into subqueries.
|
2019-03-11 13:22:51 +00:00
|
|
|
if (child->as<ASTSubquery>())
|
2018-03-16 06:33:23 +00:00
|
|
|
continue;
|
|
|
|
|
2019-01-22 12:33:56 +00:00
|
|
|
/// Don't descend into lambda functions
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * func = child->as<ASTFunction>();
|
2018-03-16 06:33:23 +00:00
|
|
|
if (func && func->name == "lambda")
|
|
|
|
continue;
|
|
|
|
|
2019-08-12 19:27:09 +00:00
|
|
|
makeSetsForIndex(child);
|
2018-02-14 17:39:16 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * func = node->as<ASTFunction>();
|
2018-02-24 01:31:42 +00:00
|
|
|
if (func && functionIsInOperator(func->name))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-07-14 00:33:37 +00:00
|
|
|
const IAST & args = *func->arguments;
|
2019-08-12 19:27:09 +00:00
|
|
|
const ASTPtr & left_in_operand = args.children.at(0);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-08-12 19:27:09 +00:00
|
|
|
if (storage()->mayBenefitFromIndexForIn(left_in_operand, context))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-03-16 06:39:32 +00:00
|
|
|
const ASTPtr & arg = args.children.at(1);
|
2019-03-11 13:22:51 +00:00
|
|
|
if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-11-22 15:45:03 +00:00
|
|
|
if (settings.use_index_for_in_with_subqueries)
|
2019-01-22 12:33:56 +00:00
|
|
|
tryMakeSetForIndexFromSubquery(arg);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-12-20 13:15:17 +00:00
|
|
|
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context);
|
2019-08-12 19:27:09 +00:00
|
|
|
getRootActions(left_in_operand, true, temp_actions);
|
2019-01-22 12:33:56 +00:00
|
|
|
|
|
|
|
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
|
2019-08-12 19:27:09 +00:00
|
|
|
if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName()))
|
2019-01-22 12:33:56 +00:00
|
|
|
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
|
|
|
|
settings.size_limits_for_set, prepared_sets);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-08-01 13:29:32 +00:00
|
|
|
}
|
|
|
|
|
2018-10-16 12:34:20 +00:00
|
|
|
|
|
|
|
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
|
2018-07-24 12:41:35 +00:00
|
|
|
{
|
2018-10-16 12:34:20 +00:00
|
|
|
LogAST log;
|
2019-08-15 13:54:59 +00:00
|
|
|
ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth,
|
2019-08-09 14:50:04 +00:00
|
|
|
sourceColumns(), actions, prepared_sets, subqueries_for_sets,
|
2020-01-27 21:57:44 +00:00
|
|
|
no_subqueries, false, only_consts, !isRemoteStorage());
|
|
|
|
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
|
|
|
visitor_data.updateActions(actions);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
|
|
|
|
{
|
|
|
|
LogAST log;
|
|
|
|
ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth,
|
|
|
|
sourceColumns(), actions, prepared_sets, subqueries_for_sets,
|
|
|
|
no_subqueries, true, only_consts, !isRemoteStorage());
|
2019-08-15 13:54:59 +00:00
|
|
|
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
|
|
|
visitor_data.updateActions(actions);
|
2018-10-16 12:34:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-08-13 12:39:03 +00:00
|
|
|
bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
|
2018-10-16 12:34:20 +00:00
|
|
|
{
|
2019-08-13 12:39:03 +00:00
|
|
|
for (const ASTFunction * node : aggregates())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
AggregateDescription aggregate;
|
|
|
|
aggregate.column_name = node->getColumnName();
|
|
|
|
|
|
|
|
const ASTs & arguments = node->arguments->children;
|
|
|
|
aggregate.argument_names.resize(arguments.size());
|
|
|
|
DataTypes types(arguments.size());
|
|
|
|
|
|
|
|
for (size_t i = 0; i < arguments.size(); ++i)
|
|
|
|
{
|
2018-10-16 12:34:20 +00:00
|
|
|
getRootActions(arguments[i], true, actions);
|
2017-04-01 07:20:54 +00:00
|
|
|
const std::string & name = arguments[i]->getColumnName();
|
2019-08-01 18:22:38 +00:00
|
|
|
types[i] = actions->getSampleBlock().getByName(name).type;
|
2017-04-01 07:20:54 +00:00
|
|
|
aggregate.argument_names[i] = name;
|
|
|
|
}
|
|
|
|
|
2017-07-10 23:30:17 +00:00
|
|
|
aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array();
|
|
|
|
aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
aggregate_descriptions.push_back(aggregate);
|
|
|
|
}
|
2016-03-05 02:30:20 +00:00
|
|
|
|
2019-08-13 12:39:03 +00:00
|
|
|
return !aggregates().empty();
|
2016-03-05 02:30:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-08-13 13:48:09 +00:00
|
|
|
const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * select_query = query->as<ASTSelectQuery>();
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!select_query)
|
|
|
|
throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
|
2019-08-13 13:48:09 +00:00
|
|
|
return select_query;
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
2013-06-14 16:38:54 +00:00
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const
|
2013-05-24 10:49:19 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!has_aggregation)
|
|
|
|
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
|
2019-08-13 13:48:09 +00:00
|
|
|
return getSelectQuery();
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
2013-06-14 16:38:54 +00:00
|
|
|
|
2017-12-25 21:57:29 +00:00
|
|
|
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const
|
2013-05-28 14:24:20 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (chain.steps.empty())
|
|
|
|
{
|
2018-08-30 16:31:20 +00:00
|
|
|
chain.steps.emplace_back(std::make_shared<ExpressionActions>(columns, context));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-05-28 14:24:20 +00:00
|
|
|
}
|
2013-05-28 11:54:37 +00:00
|
|
|
|
2016-07-23 02:25:09 +00:00
|
|
|
/// "Big" ARRAY JOIN.
|
2018-12-19 13:13:51 +00:00
|
|
|
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const
|
2013-10-17 13:32:32 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
NameSet result_columns;
|
2018-11-08 15:43:14 +00:00
|
|
|
for (const auto & result_source : syntax->array_join_result_to_source)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// Assign new names to columns, if needed.
|
|
|
|
if (result_source.first != result_source.second)
|
|
|
|
actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first));
|
2015-10-22 20:56:52 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
|
2017-04-01 07:20:54 +00:00
|
|
|
result_columns.insert(result_source.first);
|
|
|
|
}
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2018-12-19 13:13:51 +00:00
|
|
|
actions->add(ExpressionAction::arrayJoin(result_columns, array_join_is_left, context));
|
2013-10-17 13:32:32 +00:00
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)
|
2013-07-26 16:33:05 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2018-12-19 13:13:51 +00:00
|
|
|
bool is_array_join_left;
|
2020-03-23 02:12:31 +00:00
|
|
|
ASTPtr array_join_expression_list = select_query->arrayJoinExpressionList(is_array_join_left);
|
2018-12-19 13:13:51 +00:00
|
|
|
if (!array_join_expression_list)
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2018-12-19 13:13:51 +00:00
|
|
|
getRootActions(array_join_expression_list, only_types, step.actions);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2018-12-19 13:13:51 +00:00
|
|
|
addMultipleArrayJoinAction(step.actions, is_array_join_left);
|
2013-10-21 11:33:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2013-07-26 16:33:05 +00:00
|
|
|
}
|
|
|
|
|
2019-09-16 12:37:46 +00:00
|
|
|
void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const
|
2014-06-13 02:05:05 +00:00
|
|
|
{
|
2019-09-16 12:37:46 +00:00
|
|
|
actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join));
|
2019-08-16 16:50:30 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-08-16 16:50:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
|
|
|
|
{
|
|
|
|
const ASTTablesInSelectQueryElement * ast_join = getSelectQuery()->join();
|
|
|
|
if (!ast_join)
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
|
|
|
|
2019-09-16 12:37:46 +00:00
|
|
|
JoinPtr table_join = makeTableJoin(*ast_join);
|
2019-08-16 16:50:30 +00:00
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
|
|
|
|
2019-09-02 19:58:45 +00:00
|
|
|
getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions);
|
2019-09-16 12:37:46 +00:00
|
|
|
addJoinAction(step.actions, table_join);
|
2019-08-16 16:50:30 +00:00
|
|
|
return true;
|
|
|
|
}
|
2018-08-28 13:57:31 +00:00
|
|
|
|
2020-04-08 18:59:52 +00:00
|
|
|
static JoinPtr tryGetStorageJoin(std::shared_ptr<TableJoin> analyzed_join)
|
2019-08-16 16:50:30 +00:00
|
|
|
{
|
2020-04-08 18:59:52 +00:00
|
|
|
if (auto * table = analyzed_join->joined_storage.get())
|
|
|
|
if (auto * storage_join = dynamic_cast<StorageJoin *>(table))
|
|
|
|
return storage_join->getJoin(analyzed_join);
|
2019-08-16 16:50:30 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
static ExpressionActionsPtr createJoinedBlockActions(const Context & context, const TableJoin & analyzed_join)
|
2019-09-04 16:37:05 +00:00
|
|
|
{
|
|
|
|
ASTPtr expression_list = analyzed_join.rightKeysList();
|
2020-02-26 19:33:09 +00:00
|
|
|
auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, analyzed_join.columnsFromJoinedTable());
|
2019-09-04 16:37:05 +00:00
|
|
|
return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false);
|
|
|
|
}
|
|
|
|
|
2020-04-09 20:00:57 +00:00
|
|
|
static bool allowDictJoin(StoragePtr joined_storage, const Context & context, String & dict_name, String & key_name)
|
2020-04-08 19:58:27 +00:00
|
|
|
{
|
2020-04-22 06:01:33 +00:00
|
|
|
const auto * dict = dynamic_cast<const StorageDictionary *>(joined_storage.get());
|
2020-04-08 19:58:27 +00:00
|
|
|
if (!dict)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
dict_name = dict->dictionaryName();
|
|
|
|
auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name);
|
|
|
|
if (!dictionary)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const DictionaryStructure & structure = dictionary->getStructure();
|
2020-04-09 20:00:57 +00:00
|
|
|
if (structure.id)
|
|
|
|
{
|
|
|
|
key_name = structure.id->name;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2020-04-08 19:58:27 +00:00
|
|
|
}
|
|
|
|
|
2020-04-09 20:00:57 +00:00
|
|
|
static std::shared_ptr<IJoin> makeJoin(std::shared_ptr<TableJoin> analyzed_join, const Block & sample_block, const Context & context)
|
2020-02-17 17:08:31 +00:00
|
|
|
{
|
2020-02-20 11:26:00 +00:00
|
|
|
bool allow_merge_join = analyzed_join->allowMergeJoin();
|
|
|
|
|
2020-04-09 20:00:57 +00:00
|
|
|
/// HashJoin with Dictionary optimisation
|
2020-04-08 19:58:27 +00:00
|
|
|
String dict_name;
|
2020-04-09 20:00:57 +00:00
|
|
|
String key_name;
|
|
|
|
if (analyzed_join->joined_storage && allowDictJoin(analyzed_join->joined_storage, context, dict_name, key_name))
|
2020-04-08 18:59:52 +00:00
|
|
|
{
|
2020-04-09 20:00:57 +00:00
|
|
|
Names original_names;
|
|
|
|
NamesAndTypesList result_columns;
|
|
|
|
if (analyzed_join->allowDictJoin(key_name, sample_block, original_names, result_columns))
|
|
|
|
{
|
|
|
|
analyzed_join->dictionary_reader = std::make_shared<DictionaryReader>(dict_name, original_names, result_columns, context);
|
|
|
|
return std::make_shared<HashJoin>(analyzed_join, sample_block);
|
|
|
|
}
|
2020-04-08 18:59:52 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 11:26:00 +00:00
|
|
|
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
|
2020-04-07 09:48:47 +00:00
|
|
|
return std::make_shared<HashJoin>(analyzed_join, sample_block);
|
2020-02-20 11:26:00 +00:00
|
|
|
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
|
2020-02-17 17:08:31 +00:00
|
|
|
return std::make_shared<MergeJoin>(analyzed_join, sample_block);
|
|
|
|
return std::make_shared<JoinSwitcher>(analyzed_join, sample_block);
|
|
|
|
}
|
|
|
|
|
2019-09-16 12:37:46 +00:00
|
|
|
JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQueryElement & join_element)
|
2019-08-16 16:50:30 +00:00
|
|
|
{
|
|
|
|
/// Two JOINs are not supported with the same subquery, but different USINGs.
|
|
|
|
auto join_hash = join_element.getTreeHash();
|
|
|
|
String join_subquery_id = toString(join_hash.first) + "_" + toString(join_hash.second);
|
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
SubqueryForSet & subquery_for_join = subqueries_for_sets[join_subquery_id];
|
2019-08-16 16:50:30 +00:00
|
|
|
|
2020-04-08 18:59:52 +00:00
|
|
|
/// Use StorageJoin if any.
|
2019-09-09 19:43:37 +00:00
|
|
|
if (!subquery_for_join.join)
|
2020-04-08 18:59:52 +00:00
|
|
|
subquery_for_join.join = tryGetStorageJoin(syntax->analyzed_join);
|
2018-07-30 13:57:50 +00:00
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
if (!subquery_for_join.join)
|
2019-09-04 16:20:02 +00:00
|
|
|
{
|
|
|
|
/// Actions which need to be calculated on joined block.
|
2019-09-04 16:37:05 +00:00
|
|
|
ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin());
|
2019-09-04 16:20:02 +00:00
|
|
|
|
2020-04-08 18:59:52 +00:00
|
|
|
Names original_right_columns;
|
2019-09-09 19:43:37 +00:00
|
|
|
if (!subquery_for_join.source)
|
|
|
|
{
|
2020-04-08 18:59:52 +00:00
|
|
|
NamesWithAliases required_columns_with_aliases = analyzedJoin().getRequiredColumns(
|
|
|
|
joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns());
|
|
|
|
for (auto & pr : required_columns_with_aliases)
|
|
|
|
original_right_columns.push_back(pr.first);
|
|
|
|
|
|
|
|
/** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
|
|
|
|
* - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1,
|
|
|
|
* in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
|
|
|
|
* - this function shows the expression JOIN _data1.
|
|
|
|
*/
|
|
|
|
auto interpreter = interpretSubquery(join_element.table_expression, context, original_right_columns, query_options);
|
|
|
|
|
|
|
|
subquery_for_join.makeSource(interpreter, std::move(required_columns_with_aliases));
|
2019-09-09 19:43:37 +00:00
|
|
|
}
|
2018-07-30 13:57:50 +00:00
|
|
|
|
2019-09-04 16:20:02 +00:00
|
|
|
/// TODO You do not need to set this up when JOIN is only needed on remote servers.
|
2019-09-09 19:43:37 +00:00
|
|
|
subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside
|
2020-04-09 20:00:57 +00:00
|
|
|
subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block, context);
|
2020-04-08 18:59:52 +00:00
|
|
|
|
|
|
|
/// Do not make subquery for join over dictionary.
|
|
|
|
if (syntax->analyzed_join->dictionary_reader)
|
|
|
|
{
|
|
|
|
JoinPtr join = subquery_for_join.join;
|
|
|
|
subqueries_for_sets.erase(join_subquery_id);
|
|
|
|
return join;
|
|
|
|
}
|
2019-09-04 16:20:02 +00:00
|
|
|
}
|
|
|
|
|
2019-09-16 12:37:46 +00:00
|
|
|
return subquery_for_join.join;
|
2019-08-16 16:50:30 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendPrewhere(
|
2018-11-08 16:39:43 +00:00
|
|
|
ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
|
2018-04-06 13:58:06 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2018-04-06 13:58:06 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select_query->prewhere())
|
2018-04-06 13:58:06 +00:00
|
|
|
return false;
|
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2018-04-12 09:45:24 +00:00
|
|
|
auto & step = chain.getLastStep();
|
2019-04-09 14:22:35 +00:00
|
|
|
getRootActions(select_query->prewhere(), only_types, step.actions);
|
|
|
|
String prewhere_column_name = select_query->prewhere()->getColumnName();
|
2018-04-12 09:45:24 +00:00
|
|
|
step.required_output.push_back(prewhere_column_name);
|
2018-06-29 11:42:44 +00:00
|
|
|
step.can_remove_required_output.push_back(true);
|
2018-04-12 09:45:24 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
/// Remove unused source_columns from prewhere actions.
|
2019-08-09 14:50:04 +00:00
|
|
|
auto tmp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
|
2019-04-09 14:22:35 +00:00
|
|
|
getRootActions(select_query->prewhere(), only_types, tmp_actions);
|
2018-04-12 09:45:24 +00:00
|
|
|
tmp_actions->finalize({prewhere_column_name});
|
|
|
|
auto required_columns = tmp_actions->getRequiredColumns();
|
|
|
|
NameSet required_source_columns(required_columns.begin(), required_columns.end());
|
|
|
|
|
2018-10-04 08:58:19 +00:00
|
|
|
/// Add required columns to required output in order not to remove them after prewhere execution.
|
|
|
|
/// TODO: add sampling and final execution to common chain.
|
2018-11-08 16:39:43 +00:00
|
|
|
for (const auto & column : additional_required_columns)
|
2018-09-10 10:06:12 +00:00
|
|
|
{
|
|
|
|
if (required_source_columns.count(column))
|
|
|
|
{
|
|
|
|
step.required_output.push_back(column);
|
|
|
|
step.can_remove_required_output.push_back(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-12 09:45:24 +00:00
|
|
|
auto names = step.actions->getSampleBlock().getNames();
|
|
|
|
NameSet name_set(names.begin(), names.end());
|
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
for (const auto & column : sourceColumns())
|
2018-04-12 09:45:24 +00:00
|
|
|
if (required_source_columns.count(column.name) == 0)
|
|
|
|
name_set.erase(column.name);
|
|
|
|
|
|
|
|
Names required_output(name_set.begin(), name_set.end());
|
|
|
|
step.actions->finalize(required_output);
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
/// Add empty action with input = {prewhere actions output} + {unused source columns}
|
|
|
|
/// Reasons:
|
|
|
|
/// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution.
|
|
|
|
/// Example: select A prewhere B > 0. B can be removed at prewhere step.
|
|
|
|
/// 2. Store side columns which were calculated during prewhere actions execution if they are used.
|
|
|
|
/// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step.
|
2018-06-25 13:08:35 +00:00
|
|
|
/// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN.
|
2018-04-12 09:45:24 +00:00
|
|
|
ColumnsWithTypeAndName columns = step.actions->getSampleBlock().getColumnsWithTypeAndName();
|
|
|
|
auto required_columns = step.actions->getRequiredColumns();
|
|
|
|
NameSet prewhere_input_names(required_columns.begin(), required_columns.end());
|
|
|
|
NameSet unused_source_columns;
|
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
for (const auto & column : sourceColumns())
|
2018-04-12 09:45:24 +00:00
|
|
|
{
|
|
|
|
if (prewhere_input_names.count(column.name) == 0)
|
|
|
|
{
|
|
|
|
columns.emplace_back(column.type, column.name);
|
|
|
|
unused_source_columns.emplace(column.name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-30 16:31:20 +00:00
|
|
|
chain.steps.emplace_back(std::make_shared<ExpressionActions>(std::move(columns), context));
|
2018-04-12 09:45:24 +00:00
|
|
|
chain.steps.back().additional_input = std::move(unused_source_columns);
|
|
|
|
}
|
2018-04-06 13:58:06 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2016-07-22 20:39:28 +00:00
|
|
|
|
2019-10-03 11:58:52 +00:00
|
|
|
void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name)
|
|
|
|
{
|
|
|
|
initChain(chain, sourceColumns());
|
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
|
|
|
|
2019-10-05 19:34:57 +00:00
|
|
|
// FIXME: assert(filter_info);
|
2019-10-03 11:58:52 +00:00
|
|
|
step.actions = std::move(actions);
|
|
|
|
step.required_output.push_back(std::move(column_name));
|
|
|
|
step.can_remove_required_output = {true};
|
|
|
|
|
|
|
|
chain.addStep();
|
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select_query->where())
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
step.required_output.push_back(select_query->where()->getColumnName());
|
2018-06-29 11:42:44 +00:00
|
|
|
step.can_remove_required_output = {true};
|
2018-04-23 19:05:46 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
getRootActions(select_query->where(), only_types, step.actions);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getAggregatingQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select_query->groupBy())
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
ASTs asts = select_query->groupBy()->children;
|
2020-03-08 23:48:08 +00:00
|
|
|
for (const auto & ast : asts)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-03-08 23:48:08 +00:00
|
|
|
step.required_output.emplace_back(ast->getColumnName());
|
|
|
|
getRootActions(ast, only_types, step.actions);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getAggregatingQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2020-03-08 23:48:08 +00:00
|
|
|
for (const auto & desc : aggregate_descriptions)
|
|
|
|
for (const auto & name : desc.argument_names)
|
|
|
|
step.required_output.emplace_back(name);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-08-13 12:39:03 +00:00
|
|
|
/// Collect aggregates removing duplicates by node.getColumnName()
|
|
|
|
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
|
|
|
|
/// @note The original recollection logic didn't remove duplicates.
|
|
|
|
GetAggregatesVisitor::Data data;
|
|
|
|
GetAggregatesVisitor(data).visit(select_query->select());
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select_query->having())
|
2019-08-13 12:39:03 +00:00
|
|
|
GetAggregatesVisitor(data).visit(select_query->having());
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select_query->orderBy())
|
2019-08-13 12:39:03 +00:00
|
|
|
GetAggregatesVisitor(data).visit(select_query->orderBy());
|
|
|
|
|
|
|
|
/// TODO: data.aggregates -> aggregates()
|
|
|
|
for (const ASTFunction * node : data.aggregates)
|
|
|
|
for (auto & argument : node->arguments->children)
|
|
|
|
getRootActions(argument, only_types, step.actions);
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getAggregatingQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select_query->having())
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
initChain(chain, aggregated_columns);
|
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
step.required_output.push_back(select_query->having()->getColumnName());
|
|
|
|
getRootActions(select_query->having(), only_types, step.actions);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2013-05-24 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types)
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
initChain(chain, aggregated_columns);
|
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
getRootActions(select_query->select(), only_types, step.actions);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
for (const auto & child : select_query->select()->children)
|
2018-03-01 01:25:06 +00:00
|
|
|
step.required_output.push_back(child->getColumnName());
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
2013-05-24 10:49:19 +00:00
|
|
|
|
2020-02-10 19:55:13 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order,
|
|
|
|
ManyExpressionActions & order_by_elements_actions)
|
2013-05-24 10:49:19 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select_query->orderBy())
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
initChain(chain, aggregated_columns);
|
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
getRootActions(select_query->orderBy(), only_types, step.actions);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
for (auto & child : select_query->orderBy()->children)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-04-09 14:22:35 +00:00
|
|
|
const auto * ast = child->as<ASTOrderByElement>();
|
2020-03-08 23:48:08 +00:00
|
|
|
if (!ast || ast->children.empty())
|
2017-04-01 07:20:54 +00:00
|
|
|
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
|
|
|
|
ASTPtr order_expression = ast->children.at(0);
|
|
|
|
step.required_output.push_back(order_expression->getColumnName());
|
|
|
|
}
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-12-20 13:15:17 +00:00
|
|
|
if (optimize_read_in_order)
|
|
|
|
{
|
|
|
|
auto all_columns = sourceWithJoinedColumns();
|
|
|
|
for (auto & child : select_query->orderBy()->children)
|
2019-12-10 23:18:24 +00:00
|
|
|
{
|
2019-12-20 13:15:17 +00:00
|
|
|
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
|
2019-12-10 23:18:24 +00:00
|
|
|
getRootActions(child, only_types, order_by_elements_actions.back());
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2013-05-28 11:54:37 +00:00
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
|
2018-03-01 05:24:56 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2018-03-01 05:24:56 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select_query->limitBy())
|
2018-03-01 05:24:56 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
initChain(chain, aggregated_columns);
|
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
getRootActions(select_query->limitBy(), only_types, step.actions);
|
2018-03-01 05:24:56 +00:00
|
|
|
|
2019-05-23 16:25:53 +00:00
|
|
|
NameSet aggregated_names;
|
|
|
|
for (const auto & column : aggregated_columns)
|
|
|
|
{
|
2019-05-25 10:51:48 +00:00
|
|
|
step.required_output.push_back(column.name);
|
|
|
|
aggregated_names.insert(column.name);
|
2019-05-23 16:25:53 +00:00
|
|
|
}
|
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
for (const auto & child : select_query->limitBy()->children)
|
2019-05-25 10:51:48 +00:00
|
|
|
{
|
|
|
|
auto child_name = child->getColumnName();
|
|
|
|
if (!aggregated_names.count(child_name))
|
|
|
|
step.required_output.push_back(std::move(child_name));
|
|
|
|
}
|
2018-03-01 05:24:56 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-14 19:30:30 +00:00
|
|
|
void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
|
2013-05-28 11:54:37 +00:00
|
|
|
{
|
2019-08-13 13:48:09 +00:00
|
|
|
const auto * select_query = getSelectQuery();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
initChain(chain, aggregated_columns);
|
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
NamesWithAliases result_columns;
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
ASTs asts = select_query->select()->children;
|
2020-03-08 23:48:08 +00:00
|
|
|
for (const auto & ast : asts)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-03-08 23:48:08 +00:00
|
|
|
String result_name = ast->getAliasOrColumnName();
|
2019-01-30 15:51:39 +00:00
|
|
|
if (required_result_columns.empty() || required_result_columns.count(result_name))
|
2018-02-26 09:05:06 +00:00
|
|
|
{
|
2020-04-01 12:16:39 +00:00
|
|
|
std::string source_name = ast->getColumnName();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For temporary columns created by ExpressionAnalyzer for literals,
|
|
|
|
* use the correct source column. Using the default display name
|
|
|
|
* returned by getColumnName is not enough, and we have to use the
|
|
|
|
* column id set by EA. In principle, this logic applies to all kinds
|
|
|
|
* of columns, not only literals. Literals are especially problematic
|
|
|
|
* for two reasons:
|
|
|
|
* 1) confusing different literal columns leads to weird side
|
|
|
|
* effects (see 01101_literal_columns_clash);
|
|
|
|
* 2) the disambiguation mechanism in SyntaxAnalyzer, that, among
|
|
|
|
* other things, creates unique aliases for columns with same
|
|
|
|
* names from different tables, is applied before these temporary
|
|
|
|
* columns are created by ExpressionAnalyzer.
|
|
|
|
* Similar problems should also manifest for function columns, which
|
|
|
|
* are likewise created at a later stage by EA.
|
|
|
|
* In general, we need to have explicit separation between display
|
|
|
|
* names and identifiers for columns. This code is a workaround for
|
|
|
|
* a particular subclass of problems, and not a proper solution.
|
|
|
|
*/
|
2020-04-22 06:01:33 +00:00
|
|
|
if (const auto * as_literal = ast->as<ASTLiteral>())
|
2020-04-01 12:16:39 +00:00
|
|
|
{
|
|
|
|
source_name = as_literal->unique_column_name;
|
|
|
|
assert(!source_name.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
result_columns.emplace_back(source_name, result_name);
|
2018-02-26 09:05:06 +00:00
|
|
|
step.required_output.push_back(result_columns.back().second);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
step.actions->add(ExpressionAction::project(result_columns));
|
2013-05-24 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-09-04 13:45:39 +00:00
|
|
|
void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types)
|
2018-09-03 13:36:58 +00:00
|
|
|
{
|
2019-08-09 14:50:04 +00:00
|
|
|
initChain(chain, sourceColumns());
|
2018-09-03 13:36:58 +00:00
|
|
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
2018-10-16 12:34:20 +00:00
|
|
|
getRootActions(expr, only_types, step.actions);
|
2018-09-03 13:36:58 +00:00
|
|
|
step.required_output.push_back(expr->getColumnName());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-09-03 17:24:46 +00:00
|
|
|
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
|
2013-05-24 10:49:19 +00:00
|
|
|
{
|
2019-08-09 14:50:04 +00:00
|
|
|
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
|
2017-04-01 07:20:54 +00:00
|
|
|
NamesWithAliases result_columns;
|
|
|
|
Names result_names;
|
|
|
|
|
|
|
|
ASTs asts;
|
|
|
|
|
2019-03-11 13:22:51 +00:00
|
|
|
if (const auto * node = query->as<ASTExpressionList>())
|
2017-04-01 07:20:54 +00:00
|
|
|
asts = node->children;
|
|
|
|
else
|
2018-08-27 17:58:43 +00:00
|
|
|
asts = ASTs(1, query);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-03-08 23:48:08 +00:00
|
|
|
for (const auto & ast : asts)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-03-08 23:48:08 +00:00
|
|
|
std::string name = ast->getColumnName();
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string alias;
|
2018-09-03 17:24:46 +00:00
|
|
|
if (add_aliases)
|
2020-03-08 23:48:08 +00:00
|
|
|
alias = ast->getAliasOrColumnName();
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
alias = name;
|
|
|
|
result_columns.emplace_back(name, alias);
|
|
|
|
result_names.push_back(alias);
|
2020-03-08 23:48:08 +00:00
|
|
|
getRootActions(ast, false, actions);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2018-09-03 17:24:46 +00:00
|
|
|
if (add_aliases)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-09-03 17:24:46 +00:00
|
|
|
if (project_result)
|
|
|
|
actions->add(ExpressionAction::project(result_columns));
|
|
|
|
else
|
|
|
|
actions->add(ExpressionAction::addAliases(result_columns));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-09-05 13:04:28 +00:00
|
|
|
|
|
|
|
if (!(add_aliases && project_result))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/// We will not delete the original columns.
|
2019-08-09 14:50:04 +00:00
|
|
|
for (const auto & column_name_type : sourceColumns())
|
2017-04-01 07:20:54 +00:00
|
|
|
result_names.push_back(column_name_type.name);
|
|
|
|
}
|
|
|
|
|
|
|
|
actions->finalize(result_names);
|
|
|
|
|
|
|
|
return actions;
|
2013-05-24 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
|
|
|
|
{
|
2018-08-30 16:31:20 +00:00
|
|
|
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context);
|
2013-10-17 13:32:32 +00:00
|
|
|
|
2018-10-16 12:34:20 +00:00
|
|
|
getRootActions(query, true, actions, true);
|
2017-04-01 07:20:54 +00:00
|
|
|
return actions;
|
2013-05-24 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
2020-02-10 15:50:12 +00:00
|
|
|
ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions()
|
|
|
|
{
|
|
|
|
ExpressionActionsChain new_chain(context);
|
|
|
|
appendSelect(new_chain, false);
|
|
|
|
return new_chain.getLastActions();
|
|
|
|
}
|
|
|
|
|
2020-02-10 19:55:13 +00:00
|
|
|
ExpressionAnalysisResult::ExpressionAnalysisResult(
|
2020-02-10 15:50:12 +00:00
|
|
|
SelectQueryExpressionAnalyzer & query_analyzer,
|
|
|
|
bool first_stage_,
|
|
|
|
bool second_stage_,
|
|
|
|
bool only_types,
|
|
|
|
const FilterInfoPtr & filter_info_,
|
|
|
|
const Block & source_header)
|
|
|
|
: first_stage(first_stage_)
|
|
|
|
, second_stage(second_stage_)
|
2020-02-10 19:55:13 +00:00
|
|
|
, need_aggregate(query_analyzer.hasAggregation())
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
/// first_stage: Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
|
|
|
|
/// second_stage: Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
|
|
|
|
|
|
|
|
/** First we compose a chain of actions and remember the necessary steps from it.
|
|
|
|
* Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and
|
|
|
|
* throw out unnecessary columns based on the entire query. In unnecessary parts of the query, we will not execute subqueries.
|
|
|
|
*/
|
|
|
|
|
2020-02-10 19:55:13 +00:00
|
|
|
const ASTSelectQuery & query = *query_analyzer.getSelectQuery();
|
|
|
|
const Context & context = query_analyzer.context;
|
|
|
|
const Settings & settings = context.getSettingsRef();
|
2020-03-24 18:06:55 +00:00
|
|
|
const ConstStoragePtr & storage = query_analyzer.storage();
|
2020-02-10 19:55:13 +00:00
|
|
|
|
2020-02-10 15:50:12 +00:00
|
|
|
bool finalized = false;
|
|
|
|
size_t where_step_num = 0;
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
auto finalize_chain = [&](ExpressionActionsChain & chain)
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
2020-04-16 18:03:27 +00:00
|
|
|
chain.finalize();
|
|
|
|
|
2020-02-10 15:50:12 +00:00
|
|
|
if (!finalized)
|
|
|
|
{
|
|
|
|
finalize(chain, context, where_step_num);
|
2020-04-16 18:03:27 +00:00
|
|
|
finalized = true;
|
2020-02-10 15:50:12 +00:00
|
|
|
}
|
2020-04-16 18:03:27 +00:00
|
|
|
|
|
|
|
chain.clear();
|
2020-02-10 15:50:12 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
{
|
|
|
|
ExpressionActionsChain chain(context);
|
|
|
|
Names additional_required_columns_after_prewhere;
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1))
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
Names columns_for_sampling = storage->getColumnsRequiredForSampling();
|
|
|
|
additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
|
|
|
|
columns_for_sampling.begin(), columns_for_sampling.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (storage && query.final())
|
|
|
|
{
|
|
|
|
Names columns_for_final = storage->getColumnsRequiredForFinal();
|
|
|
|
additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
|
|
|
|
columns_for_final.begin(), columns_for_final.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (storage && filter_info_)
|
|
|
|
{
|
|
|
|
filter_info = filter_info_;
|
|
|
|
query_analyzer.appendPreliminaryFilter(chain, filter_info->actions, filter_info->column_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
|
|
|
|
{
|
|
|
|
prewhere_info = std::make_shared<PrewhereInfo>(
|
|
|
|
chain.steps.front().actions, query.prewhere()->getColumnName());
|
|
|
|
|
2020-02-10 19:55:13 +00:00
|
|
|
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
Block before_prewhere_sample = source_header;
|
|
|
|
if (sanitizeBlock(before_prewhere_sample))
|
|
|
|
{
|
|
|
|
prewhere_info->prewhere_actions->execute(before_prewhere_sample);
|
|
|
|
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
|
|
|
|
/// If the filter column is a constant, record it.
|
|
|
|
if (column_elem.column)
|
|
|
|
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
chain.addStep();
|
|
|
|
}
|
|
|
|
|
|
|
|
query_analyzer.appendArrayJoin(chain, only_types || !first_stage);
|
|
|
|
|
|
|
|
if (query_analyzer.appendJoin(chain, only_types || !first_stage))
|
|
|
|
{
|
|
|
|
before_join = chain.getLastActions();
|
|
|
|
if (!hasJoin())
|
|
|
|
throw Exception("No expected JOIN", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
chain.addStep();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (query_analyzer.appendWhere(chain, only_types || !first_stage))
|
|
|
|
{
|
|
|
|
where_step_num = chain.steps.size() - 1;
|
|
|
|
before_where = chain.getLastActions();
|
2020-02-10 19:55:13 +00:00
|
|
|
if (allowEarlyConstantFolding(*before_where, settings))
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
Block before_where_sample;
|
|
|
|
if (chain.steps.size() > 1)
|
|
|
|
before_where_sample = chain.steps[chain.steps.size() - 2].actions->getSampleBlock();
|
|
|
|
else
|
|
|
|
before_where_sample = source_header;
|
|
|
|
if (sanitizeBlock(before_where_sample))
|
|
|
|
{
|
|
|
|
before_where->execute(before_where_sample);
|
|
|
|
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
|
|
|
|
/// If the filter column is a constant, record it.
|
|
|
|
if (column_elem.column)
|
|
|
|
where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
chain.addStep();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (need_aggregate)
|
|
|
|
{
|
|
|
|
query_analyzer.appendGroupBy(chain, only_types || !first_stage);
|
|
|
|
query_analyzer.appendAggregateFunctionsArguments(chain, only_types || !first_stage);
|
|
|
|
before_aggregation = chain.getLastActions();
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
finalize_chain(chain);
|
2020-02-10 15:50:12 +00:00
|
|
|
|
|
|
|
if (query_analyzer.appendHaving(chain, only_types || !second_stage))
|
|
|
|
{
|
|
|
|
before_having = chain.getLastActions();
|
|
|
|
chain.addStep();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool has_stream_with_non_joned_rows = (before_join && before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
|
|
|
|
optimize_read_in_order =
|
2020-02-10 19:55:13 +00:00
|
|
|
settings.optimize_read_in_order
|
2020-02-10 15:50:12 +00:00
|
|
|
&& storage && query.orderBy()
|
|
|
|
&& !query_analyzer.hasAggregation()
|
|
|
|
&& !query.final()
|
|
|
|
&& !has_stream_with_non_joned_rows;
|
|
|
|
|
|
|
|
/// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
|
|
|
|
query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage));
|
|
|
|
selected_columns = chain.getLastStep().required_output;
|
2020-02-10 19:55:13 +00:00
|
|
|
has_order_by = query_analyzer.appendOrderBy(chain, only_types || (need_aggregate ? !second_stage : !first_stage),
|
|
|
|
optimize_read_in_order, order_by_elements_actions);
|
2020-02-10 15:50:12 +00:00
|
|
|
before_order_and_select = chain.getLastActions();
|
|
|
|
chain.addStep();
|
|
|
|
|
|
|
|
if (query_analyzer.appendLimitBy(chain, only_types || !second_stage))
|
|
|
|
{
|
|
|
|
before_limit_by = chain.getLastActions();
|
|
|
|
chain.addStep();
|
|
|
|
}
|
|
|
|
|
|
|
|
query_analyzer.appendProjectResult(chain);
|
|
|
|
final_projection = chain.getLastActions();
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
finalize_chain(chain);
|
2020-02-10 15:50:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Before executing WHERE and HAVING, remove the extra columns from the block (mostly the aggregation keys).
|
|
|
|
removeExtraColumns();
|
|
|
|
|
|
|
|
checkActions();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, const Context & context_, size_t where_step_num)
|
|
|
|
{
|
|
|
|
if (hasPrewhere())
|
|
|
|
{
|
|
|
|
const ExpressionActionsChain::Step & step = chain.steps.at(0);
|
|
|
|
prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0);
|
|
|
|
|
|
|
|
Names columns_to_remove;
|
|
|
|
for (size_t i = 1; i < step.required_output.size(); ++i)
|
|
|
|
{
|
|
|
|
if (step.can_remove_required_output[i])
|
|
|
|
columns_to_remove.push_back(step.required_output[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!columns_to_remove.empty())
|
|
|
|
{
|
|
|
|
auto columns = prewhere_info->prewhere_actions->getSampleBlock().getNamesAndTypesList();
|
|
|
|
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(columns, context_);
|
|
|
|
for (const auto & column : columns_to_remove)
|
|
|
|
actions->add(ExpressionAction::removeColumn(column));
|
|
|
|
|
|
|
|
prewhere_info->remove_columns_actions = std::move(actions);
|
|
|
|
}
|
|
|
|
|
|
|
|
columns_to_remove_after_prewhere = std::move(columns_to_remove);
|
|
|
|
}
|
|
|
|
else if (hasFilter())
|
|
|
|
{
|
|
|
|
/// Can't have prewhere and filter set simultaneously
|
|
|
|
filter_info->do_remove_column = chain.steps.at(0).can_remove_required_output.at(0);
|
|
|
|
}
|
|
|
|
if (hasWhere())
|
|
|
|
remove_where_filter = chain.steps.at(where_step_num).can_remove_required_output.at(0);
|
|
|
|
}
|
|
|
|
|
2020-04-22 06:01:33 +00:00
|
|
|
void ExpressionAnalysisResult::removeExtraColumns() const
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
if (hasFilter())
|
|
|
|
filter_info->actions->prependProjectInput();
|
|
|
|
if (hasWhere())
|
|
|
|
before_where->prependProjectInput();
|
|
|
|
if (hasHaving())
|
|
|
|
before_having->prependProjectInput();
|
|
|
|
}
|
|
|
|
|
2020-04-22 06:01:33 +00:00
|
|
|
void ExpressionAnalysisResult::checkActions() const
|
2020-02-10 15:50:12 +00:00
|
|
|
{
|
|
|
|
/// Check that PREWHERE doesn't contain unusual actions. Unusual actions are that can change number of rows.
|
|
|
|
if (hasPrewhere())
|
|
|
|
{
|
|
|
|
auto check_actions = [](const ExpressionActionsPtr & actions)
|
|
|
|
{
|
|
|
|
if (actions)
|
|
|
|
for (const auto & action : actions->getActions())
|
|
|
|
if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
|
|
|
|
throw Exception("PREWHERE cannot contain ARRAY JOIN or JOIN action", ErrorCodes::ILLEGAL_PREWHERE);
|
|
|
|
};
|
|
|
|
|
|
|
|
check_actions(prewhere_info->prewhere_actions);
|
|
|
|
check_actions(prewhere_info->alias_actions);
|
|
|
|
check_actions(prewhere_info->remove_columns_actions);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-24 10:49:19 +00:00
|
|
|
}
|