diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 36fff0bbdb0..ea9caa8bc10 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -215,6 +215,9 @@ void ExpressionAnalyzer::init() /// array_join_alias_to_name, array_join_result_to_source. getArrayJoinedColumns(); + /// All selected columns in case of DISTINCT; columns that contain arrayJoin function inside. + calculateRequiredColumnsBeforeProjection(); + /// Delete the unnecessary from `columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); @@ -2491,11 +2494,9 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_ getRootActions(select_query->select_expression_list, only_types, false, step.actions); - ASTs asts = select_query->select_expression_list->children; - for (size_t i = 0; i < asts.size(); ++i) - { - step.required_output.push_back(asts[i]->getColumnName()); - } + for (const auto & child : select_query->select_expression_list->children) + if (required_columns_before_projection.count(child->getColumnName())) + step.required_output.push_back(child->getColumnName()); } bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types) @@ -2797,9 +2798,8 @@ void ExpressionAnalyzer::getRequiredSourceColumnsInSelectImpl( return; } - /// TODO: DISTINCT, arrayJoin for (const auto & child : select_query->select_expression_list->children) - if (required_result_columns.empty() || required_result_columns.count(child->getAliasOrColumnName())) + if (required_columns_before_projection.count(child->getColumnName())) getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, ignored_names, available_joined_columns, required_joined_columns); @@ -2896,4 +2896,32 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, } } + +static bool hasArrayJoin(const ASTPtr & ast) +{ + if (const ASTFunction * function = typeid_cast(&*ast)) + if (function->name == "arrayJoin") + return true; + + for (const auto & child : ast->children) + if (!typeid_cast(child.get()) && hasArrayJoin(child)) + return true; + + return false; +} + + +void ExpressionAnalyzer::calculateRequiredColumnsBeforeProjection() +{ + if (!select_query) + return; + + for (const auto & child : select_query->select_expression_list->children) + if (required_result_columns.empty() + || select_query->distinct + || hasArrayJoin(child) + || required_result_columns.count(child->getAliasOrColumnName())) + required_columns_before_projection.insert(child->getColumnName()); +} + } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 15be363ac38..e01871ac141 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -335,6 +335,12 @@ private: */ void translateQualifiedNames(); void translateQualifiedNamesImpl(ASTPtr & node, const String & database_name, const String & table_name, const String & alias); + + /** Sometimes we have to calculate more columns in SELECT clause than will be returned from query. + * This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. + */ + NameSet required_columns_before_projection; + void calculateRequiredColumnsBeforeProjection(); }; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 9e310ec745b..4361a17dedf 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -449,8 +449,6 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt executeOrder(pipeline); } - executeProjection(pipeline, expressions.final_projection); - /// At this stage, we can calculate the minimums and maximums, if necessary. if (settings.extremes) { @@ -487,9 +485,14 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass) executeDistinct(pipeline, false, Names()); + executeProjection(pipeline, expressions.final_projection); executeLimitBy(pipeline); executeLimit(pipeline); } + else + { + executeProjection(pipeline, expressions.final_projection); + } } }