diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index f0150fe663f..e886027683f 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -52,10 +52,6 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( if (!num_children) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - /// This is required for UNION to match headers correctly. - if (num_children > 1) - options.reorderColumns(); - /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, /// because names could be different. @@ -163,7 +159,22 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( { Blocks headers(num_children); for (size_t query_num = 0; query_num < num_children; ++query_num) + { headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); + const auto & current_required_result_column_names = required_result_column_names_for_other_selects[query_num]; + if (!current_required_result_column_names.empty()) + { + const auto & header_columns = headers[query_num].getNames(); + if (current_required_result_column_names != header_columns) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Different order of columns in UNION subquery: {} and {}", + fmt::join(current_required_result_column_names, ", "), + fmt::join(header_columns, ", ")); + } + + } + } result_header = getCommonHeaderForUnion(headers); } diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 032befe1993..6b5a6a7f8eb 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -31,8 +31,6 @@ struct SelectQueryOptions bool only_analyze = false; bool modify_inplace = false; bool remove_duplicates = false; - /// This is required for UNION to match headers correctly. - bool reorder_columns_as_required_header = false; bool ignore_quota = false; bool ignore_limits = false; /// This flag is needed to analyze query ignoring table projections. @@ -99,12 +97,6 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & reorderColumns(bool value = true) - { - reorder_columns_as_required_header = value; - return *this; - } - SelectQueryOptions & noSubquery() { subquery_depth = 0; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 7fa1fe2f012..9a4a956f3dc 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -429,21 +429,28 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query) /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. /// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible. /// Also remove all INTERPOLATE columns which are not in SELECT anymore. -void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header) +void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups) { ASTs & elements = select_query->select()->children; - std::map required_columns_with_duplicate_count; + std::unordered_map required_columns_with_duplicate_count; + /// Order of output columns should match order in required_result_columns, + /// otherwise UNION queries may have incorrect header when subselect has duplicated columns. + /// + /// NOTE: multimap is required since there can be duplicated column names. + std::unordered_multimap output_columns_positions; if (!required_result_columns.empty()) { /// Some columns may be queried multiple times, like SELECT x, y, y FROM table. - for (const auto & name : required_result_columns) + for (size_t i = 0; i < required_result_columns.size(); ++i) { + const auto & name = required_result_columns[i]; if (remove_dups) required_columns_with_duplicate_count[name] = 1; else ++required_columns_with_duplicate_count[name]; + output_columns_positions.emplace(name, i); } } else if (remove_dups) @@ -455,49 +462,44 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const else return; - ASTs new_elements; - new_elements.reserve(elements.size()); + ASTs new_elements(elements.size() + output_columns_positions.size()); + size_t new_elements_size = 0; NameSet remove_columns; - /// Resort columns according to required_result_columns. - if (reorder_columns_as_required_header && !required_result_columns.empty()) - { - std::unordered_map name_pos; - { - size_t pos = 0; - for (const auto & name : required_result_columns) - name_pos[name] = pos++; - } - ::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs) - { - String lhs_name = lhs->getAliasOrColumnName(); - String rhs_name = rhs->getAliasOrColumnName(); - size_t lhs_pos = name_pos.size(); - size_t rhs_pos = name_pos.size(); - if (auto it = name_pos.find(lhs_name); it != name_pos.end()) - lhs_pos = it->second; - if (auto it = name_pos.find(rhs_name); it != name_pos.end()) - rhs_pos = it->second; - return lhs_pos < rhs_pos; - }); - } - for (const auto & elem : elements) { String name = elem->getAliasOrColumnName(); + /// Columns that are presented in output_columns_positions should + /// appears in the same order in the new_elements, hence default + /// result_index goes after all elements of output_columns_positions + /// (it is for columns that are not located in + /// output_columns_positions, i.e. untuple()) + size_t result_index = output_columns_positions.size() + new_elements_size; + + /// Note, order of duplicated columns is not important here (since they + /// are the same), only order for unique columns is important, so it is + /// fine to use multimap here. + if (auto it = output_columns_positions.find(name); it != output_columns_positions.end()) + { + result_index = it->second; + output_columns_positions.erase(it); + } + auto it = required_columns_with_duplicate_count.find(name); if (required_columns_with_duplicate_count.end() != it && it->second) { - new_elements.push_back(elem); + new_elements[result_index] = elem; --it->second; + ++new_elements_size; } else if (select_query->distinct || hasArrayJoin(elem)) { /// ARRAY JOIN cannot be optimized out since it may change number of rows, /// so as DISTINCT. - new_elements.push_back(elem); + new_elements[result_index] = elem; + ++new_elements_size; } else { @@ -508,13 +510,20 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const /// Never remove untuple. It's result column may be in required columns. /// It is not easy to analyze untuple here, because types were not calculated yet. if (func && func->name == "untuple") - new_elements.push_back(elem); - + { + new_elements[result_index] = elem; + ++new_elements_size; + } /// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong if (func && AggregateUtils::isAggregateFunction(*func) && !select_query->groupBy()) - new_elements.push_back(elem); + { + new_elements[result_index] = elem; + ++new_elements_size; + } } } + /// Remove empty nodes. + std::erase(new_elements, ASTPtr{}); if (select_query->interpolate()) { @@ -1193,7 +1202,6 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( size_t subquery_depth = select_options.subquery_depth; bool remove_duplicates = select_options.remove_duplicates; - bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header; const auto & settings = getContext()->getSettingsRef(); @@ -1245,7 +1253,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header); + removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze); diff --git a/tests/queries/0_stateless/00597_push_down_predicate_long.reference b/tests/queries/0_stateless/00597_push_down_predicate_long.reference index f6f1320c2f8..af4e9b5496e 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate_long.reference +++ b/tests/queries/0_stateless/00597_push_down_predicate_long.reference @@ -402,8 +402,8 @@ FROM ANY LEFT JOIN ( SELECT - date, id, + date, name, value FROM test_00597 @@ -472,8 +472,8 @@ FROM ANY LEFT JOIN ( SELECT - date, id, + date, name, value FROM test_00597 @@ -537,10 +537,10 @@ FROM ANY LEFT JOIN ( SELECT - date, - id, name, - value + value, + date, + id FROM test_00597 ) AS b ON id = b.id WHERE id = 1 @@ -567,8 +567,8 @@ FROM SEMI LEFT JOIN ( SELECT - date, id, + date, name, value FROM diff --git a/tests/queries/0_stateless/02340_union_header.reference b/tests/queries/0_stateless/02340_union_header.reference new file mode 100644 index 00000000000..b7d6846023d --- /dev/null +++ b/tests/queries/0_stateless/02340_union_header.reference @@ -0,0 +1,7 @@ +-- { echo } +SELECT a, b, c FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ORDER BY a; +-2 \N -2 +3 2147483647 1048575 +SELECT a, b, c, d FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ALL LEFT JOIN (SELECT 100 AS a, -9223372036854775808 AS b, NULL AS d UNION ALL SELECT 256, 256, NULL) AS js2 USING (a, b) ORDER BY a DESC NULLS FIRST, '-0.02' ASC, b ASC NULLS FIRST, c DESC NULLS FIRST, 1048575 ASC NULLS LAST, d DESC SETTINGS enable_positional_arguments=0; +3 2147483647 1048575 \N +-2 \N -2 \N diff --git a/tests/queries/0_stateless/02340_union_header.sql b/tests/queries/0_stateless/02340_union_header.sql new file mode 100644 index 00000000000..3481d51d669 --- /dev/null +++ b/tests/queries/0_stateless/02340_union_header.sql @@ -0,0 +1,3 @@ +-- { echo } +SELECT a, b, c FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ORDER BY a; +SELECT a, b, c, d FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ALL LEFT JOIN (SELECT 100 AS a, -9223372036854775808 AS b, NULL AS d UNION ALL SELECT 256, 256, NULL) AS js2 USING (a, b) ORDER BY a DESC NULLS FIRST, '-0.02' ASC, b ASC NULLS FIRST, c DESC NULLS FIRST, 1048575 ASC NULLS LAST, d DESC SETTINGS enable_positional_arguments=0;