mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge pull request #37887 from azat/union-fix
Fix incorrect columns order in subqueries of UNION
This commit is contained in:
commit
4f7967ab9e
@ -52,10 +52,6 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
if (!num_children)
|
||||
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// This is required for UNION to match headers correctly.
|
||||
if (num_children > 1)
|
||||
options.reorderColumns();
|
||||
|
||||
/// Note that we pass 'required_result_column_names' to first SELECT.
|
||||
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
|
||||
/// because names could be different.
|
||||
@ -163,7 +159,22 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
{
|
||||
Blocks headers(num_children);
|
||||
for (size_t query_num = 0; query_num < num_children; ++query_num)
|
||||
{
|
||||
headers[query_num] = nested_interpreters[query_num]->getSampleBlock();
|
||||
const auto & current_required_result_column_names = required_result_column_names_for_other_selects[query_num];
|
||||
if (!current_required_result_column_names.empty())
|
||||
{
|
||||
const auto & header_columns = headers[query_num].getNames();
|
||||
if (current_required_result_column_names != header_columns)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Different order of columns in UNION subquery: {} and {}",
|
||||
fmt::join(current_required_result_column_names, ", "),
|
||||
fmt::join(header_columns, ", "));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
result_header = getCommonHeaderForUnion(headers);
|
||||
}
|
||||
|
@ -31,8 +31,6 @@ struct SelectQueryOptions
|
||||
bool only_analyze = false;
|
||||
bool modify_inplace = false;
|
||||
bool remove_duplicates = false;
|
||||
/// This is required for UNION to match headers correctly.
|
||||
bool reorder_columns_as_required_header = false;
|
||||
bool ignore_quota = false;
|
||||
bool ignore_limits = false;
|
||||
/// This flag is needed to analyze query ignoring table projections.
|
||||
@ -99,12 +97,6 @@ struct SelectQueryOptions
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & reorderColumns(bool value = true)
|
||||
{
|
||||
reorder_columns_as_required_header = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & noSubquery()
|
||||
{
|
||||
subquery_depth = 0;
|
||||
|
@ -429,21 +429,28 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query)
|
||||
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
|
||||
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
|
||||
/// Also remove all INTERPOLATE columns which are not in SELECT anymore.
|
||||
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header)
|
||||
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
|
||||
{
|
||||
ASTs & elements = select_query->select()->children;
|
||||
|
||||
std::map<String, size_t> required_columns_with_duplicate_count;
|
||||
std::unordered_map<String, size_t> required_columns_with_duplicate_count;
|
||||
/// Order of output columns should match order in required_result_columns,
|
||||
/// otherwise UNION queries may have incorrect header when subselect has duplicated columns.
|
||||
///
|
||||
/// NOTE: multimap is required since there can be duplicated column names.
|
||||
std::unordered_multimap<String, size_t> output_columns_positions;
|
||||
|
||||
if (!required_result_columns.empty())
|
||||
{
|
||||
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
|
||||
for (const auto & name : required_result_columns)
|
||||
for (size_t i = 0; i < required_result_columns.size(); ++i)
|
||||
{
|
||||
const auto & name = required_result_columns[i];
|
||||
if (remove_dups)
|
||||
required_columns_with_duplicate_count[name] = 1;
|
||||
else
|
||||
++required_columns_with_duplicate_count[name];
|
||||
output_columns_positions.emplace(name, i);
|
||||
}
|
||||
}
|
||||
else if (remove_dups)
|
||||
@ -455,49 +462,44 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
|
||||
else
|
||||
return;
|
||||
|
||||
ASTs new_elements;
|
||||
new_elements.reserve(elements.size());
|
||||
ASTs new_elements(elements.size() + output_columns_positions.size());
|
||||
size_t new_elements_size = 0;
|
||||
|
||||
NameSet remove_columns;
|
||||
|
||||
/// Resort columns according to required_result_columns.
|
||||
if (reorder_columns_as_required_header && !required_result_columns.empty())
|
||||
{
|
||||
std::unordered_map<String, size_t> name_pos;
|
||||
{
|
||||
size_t pos = 0;
|
||||
for (const auto & name : required_result_columns)
|
||||
name_pos[name] = pos++;
|
||||
}
|
||||
::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs)
|
||||
{
|
||||
String lhs_name = lhs->getAliasOrColumnName();
|
||||
String rhs_name = rhs->getAliasOrColumnName();
|
||||
size_t lhs_pos = name_pos.size();
|
||||
size_t rhs_pos = name_pos.size();
|
||||
if (auto it = name_pos.find(lhs_name); it != name_pos.end())
|
||||
lhs_pos = it->second;
|
||||
if (auto it = name_pos.find(rhs_name); it != name_pos.end())
|
||||
rhs_pos = it->second;
|
||||
return lhs_pos < rhs_pos;
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto & elem : elements)
|
||||
{
|
||||
String name = elem->getAliasOrColumnName();
|
||||
|
||||
/// Columns that are presented in output_columns_positions should
|
||||
/// appears in the same order in the new_elements, hence default
|
||||
/// result_index goes after all elements of output_columns_positions
|
||||
/// (it is for columns that are not located in
|
||||
/// output_columns_positions, i.e. untuple())
|
||||
size_t result_index = output_columns_positions.size() + new_elements_size;
|
||||
|
||||
/// Note, order of duplicated columns is not important here (since they
|
||||
/// are the same), only order for unique columns is important, so it is
|
||||
/// fine to use multimap here.
|
||||
if (auto it = output_columns_positions.find(name); it != output_columns_positions.end())
|
||||
{
|
||||
result_index = it->second;
|
||||
output_columns_positions.erase(it);
|
||||
}
|
||||
|
||||
auto it = required_columns_with_duplicate_count.find(name);
|
||||
if (required_columns_with_duplicate_count.end() != it && it->second)
|
||||
{
|
||||
new_elements.push_back(elem);
|
||||
new_elements[result_index] = elem;
|
||||
--it->second;
|
||||
++new_elements_size;
|
||||
}
|
||||
else if (select_query->distinct || hasArrayJoin(elem))
|
||||
{
|
||||
/// ARRAY JOIN cannot be optimized out since it may change number of rows,
|
||||
/// so as DISTINCT.
|
||||
new_elements.push_back(elem);
|
||||
new_elements[result_index] = elem;
|
||||
++new_elements_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -508,13 +510,20 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
|
||||
/// Never remove untuple. It's result column may be in required columns.
|
||||
/// It is not easy to analyze untuple here, because types were not calculated yet.
|
||||
if (func && func->name == "untuple")
|
||||
new_elements.push_back(elem);
|
||||
|
||||
{
|
||||
new_elements[result_index] = elem;
|
||||
++new_elements_size;
|
||||
}
|
||||
/// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong
|
||||
if (func && AggregateUtils::isAggregateFunction(*func) && !select_query->groupBy())
|
||||
new_elements.push_back(elem);
|
||||
{
|
||||
new_elements[result_index] = elem;
|
||||
++new_elements_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Remove empty nodes.
|
||||
std::erase(new_elements, ASTPtr{});
|
||||
|
||||
if (select_query->interpolate())
|
||||
{
|
||||
@ -1193,7 +1202,6 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
|
||||
size_t subquery_depth = select_options.subquery_depth;
|
||||
bool remove_duplicates = select_options.remove_duplicates;
|
||||
bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header;
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
|
||||
@ -1245,7 +1253,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
|
||||
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
||||
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
|
||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header);
|
||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
|
||||
|
||||
/// Executing scalar subqueries - replacing them with constant values.
|
||||
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze);
|
||||
|
@ -402,8 +402,8 @@ FROM
|
||||
ANY LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
date,
|
||||
id,
|
||||
date,
|
||||
name,
|
||||
value
|
||||
FROM test_00597
|
||||
@ -472,8 +472,8 @@ FROM
|
||||
ANY LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
date,
|
||||
id,
|
||||
date,
|
||||
name,
|
||||
value
|
||||
FROM test_00597
|
||||
@ -537,10 +537,10 @@ FROM
|
||||
ANY LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
date,
|
||||
id,
|
||||
name,
|
||||
value
|
||||
value,
|
||||
date,
|
||||
id
|
||||
FROM test_00597
|
||||
) AS b ON id = b.id
|
||||
WHERE id = 1
|
||||
@ -567,8 +567,8 @@ FROM
|
||||
SEMI LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
date,
|
||||
id,
|
||||
date,
|
||||
name,
|
||||
value
|
||||
FROM
|
||||
|
7
tests/queries/0_stateless/02340_union_header.reference
Normal file
7
tests/queries/0_stateless/02340_union_header.reference
Normal file
@ -0,0 +1,7 @@
|
||||
-- { echo }
|
||||
SELECT a, b, c FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ORDER BY a;
|
||||
-2 \N -2
|
||||
3 2147483647 1048575
|
||||
SELECT a, b, c, d FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ALL LEFT JOIN (SELECT 100 AS a, -9223372036854775808 AS b, NULL AS d UNION ALL SELECT 256, 256, NULL) AS js2 USING (a, b) ORDER BY a DESC NULLS FIRST, '-0.02' ASC, b ASC NULLS FIRST, c DESC NULLS FIRST, 1048575 ASC NULLS LAST, d DESC SETTINGS enable_positional_arguments=0;
|
||||
3 2147483647 1048575 \N
|
||||
-2 \N -2 \N
|
3
tests/queries/0_stateless/02340_union_header.sql
Normal file
3
tests/queries/0_stateless/02340_union_header.sql
Normal file
@ -0,0 +1,3 @@
|
||||
-- { echo }
|
||||
SELECT a, b, c FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ORDER BY a;
|
||||
SELECT a, b, c, d FROM (SELECT 3 AS a, 2147483647 AS b, 1048575 AS c UNION ALL SELECT -2, NULL, -2) AS js1 ALL LEFT JOIN (SELECT 100 AS a, -9223372036854775808 AS b, NULL AS d UNION ALL SELECT 256, 256, NULL) AS js2 USING (a, b) ORDER BY a DESC NULLS FIRST, '-0.02' ASC, b ASC NULLS FIRST, c DESC NULLS FIRST, 1048575 ASC NULLS LAST, d DESC SETTINGS enable_positional_arguments=0;
|
Loading…
Reference in New Issue
Block a user