Fix converting types for UNION queries (may produce LOGICAL_ERROR)

CI founds [1]:

    2022.02.20 15:14:23.969247 [ 492 ] {} <Fatal> BaseDaemon: (version 22.3.1.1, build id: 6082C357CFA6FF99) (from thread 472) (query_id: a5187ff9-962a-4e7c-86f6-8d48850a47d6) (query: SELECT 0., round(avgWeighted(x, y)) FROM (SELECT toDate(toDate('214748364.8', '-922337203.6854775808', '-0.1', NULL) - NULL, 10.000100135803223, '-2147483647'), 255 AS x, -2147483647 AS y UNION ALL SELECT y, NULL AS x, 2147483646 AS y)) Received signal Aborted (6)

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/0/26d0e5438c86e52a145aaaf4cb523c399989a878/fuzzer_astfuzzerdebug,actions//report.html

The problem is that subqueries returns different headers:
- first query  -- x, y
- second query -- y, x

v2: Make order of columns strict only for UNION
    https://s3.amazonaws.com/clickhouse-test-reports/34775/9cc8c01a463d18c471853568b2f0af659a4e643f/stateless_tests__address__actions__[2/2].html
    Fixes: 00597_push_down_predicate_long
v3: add no-backward-compatibility-check for the test
Fixes: #37569
Resubmit: #34775
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
(cherry picked from commit a813f5996e)
This commit is contained in:
Azat Khuzhin 2022-02-20 22:12:25 +03:00
parent 141334448e
commit 2613149f6b
5 changed files with 89 additions and 2 deletions

View File

@ -46,6 +46,10 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
if (!num_children)
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
/// This is required for UNION to match headers correctly.
if (num_children > 1)
options.reorderColumns();
/// Note that we pass 'required_result_column_names' to first SELECT.
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
/// because names could be different.

View File

@ -31,6 +31,8 @@ struct SelectQueryOptions
bool only_analyze = false;
bool modify_inplace = false;
bool remove_duplicates = false;
/// This is required for UNION to match headers correctly.
bool reorder_columns_as_required_header = false;
bool ignore_quota = false;
bool ignore_limits = false;
/// This flag is needed to analyze query ignoring table projections.
@ -97,6 +99,12 @@ struct SelectQueryOptions
return *this;
}
SelectQueryOptions & reorderColumns(bool value = true)
{
reorder_columns_as_required_header = value;
return *this;
}
SelectQueryOptions & noSubquery()
{
subquery_depth = 0;

View File

@ -422,7 +422,7 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query)
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
/// Also remove all INTERPOLATE columns which are not in SELECT anymore.
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header)
{
ASTs & elements = select_query->select()->children;
@ -453,6 +453,29 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
NameSet remove_columns;
/// Resort columns according to required_result_columns.
if (reorder_columns_as_required_header && !required_result_columns.empty())
{
std::unordered_map<String, size_t> name_pos;
{
size_t pos = 0;
for (const auto & name : required_result_columns)
name_pos[name] = pos++;
}
std::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs)
{
String lhs_name = lhs->getAliasOrColumnName();
String rhs_name = rhs->getAliasOrColumnName();
size_t lhs_pos = name_pos.size();
size_t rhs_pos = name_pos.size();
if (auto it = name_pos.find(lhs_name); it != name_pos.end())
lhs_pos = it->second;
if (auto it = name_pos.find(rhs_name); it != name_pos.end())
rhs_pos = it->second;
return lhs_pos < rhs_pos;
});
}
for (const auto & elem : elements)
{
String name = elem->getAliasOrColumnName();
@ -465,6 +488,8 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
}
else if (select_query->distinct || hasArrayJoin(elem))
{
/// ARRAY JOIN cannot be optimized out since it may change number of rows,
/// so as DISTINCT.
new_elements.push_back(elem);
}
else
@ -1135,6 +1160,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
size_t subquery_depth = select_options.subquery_depth;
bool remove_duplicates = select_options.remove_duplicates;
bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header;
const auto & settings = getContext()->getSettingsRef();
@ -1186,7 +1212,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze);

View File

@ -0,0 +1,44 @@
-- { echo }
EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
Expression (Projection)
Header: avgWeighted(x, y) Nullable(Float64)
Expression (Before ORDER BY)
Header: avgWeighted(x, y) Nullable(Float64)
Aggregating
Header: avgWeighted(x, y) Nullable(Float64)
Expression (Before GROUP BY)
Header: x Nullable(UInt8)
y UInt8
Union
Header: x Nullable(UInt8)
y UInt8
Expression (Conversion before UNION)
Header: x Nullable(UInt8)
y UInt8
Expression (Projection)
Header: x UInt8
y UInt8
Expression (Before ORDER BY)
Header: 255 UInt8
1 UInt8
dummy UInt8
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Header: dummy UInt8
ReadFromStorage (SystemOne)
Header: dummy UInt8
Expression (Conversion before UNION)
Header: x Nullable(UInt8)
y UInt8
Expression (Projection)
Header: x Nullable(Nothing)
y UInt8
Expression (Before ORDER BY)
Header: NULL Nullable(Nothing)
1 UInt8
dummy UInt8
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Header: dummy UInt8
ReadFromStorage (SystemOne)
Header: dummy UInt8
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
255

View File

@ -0,0 +1,5 @@
-- Tags: no-backward-compatibility-check:22.5.1.2079
-- { echo }
EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);