From f8aed1a2fa88ccb43bad57d7e231626d28b3299a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Nov 2024 16:07:33 +0000 Subject: [PATCH] Backport #71432 to 24.8: Fix crash with optimize_rewrite_array_exists_to_has --- docker/test/stateless/attach_gdb.lib | 3 ++- src/Analyzer/JoinNode.cpp | 12 +++++++++--- .../ExecuteScalarSubqueriesVisitor.cpp | 12 ++++++++++++ src/Interpreters/QueryNormalizer.cpp | 6 ++++++ .../RewriteArrayExistsFunctionVisitor.cpp | 18 ++++++++++++------ .../PrometheusRemoteReadProtocol.cpp | 1 + ...rewrite_array_exists_to_has_crash.reference | 0 ...imize_rewrite_array_exists_to_has_crash.sql | 10 ++++++++++ 8 files changed, 52 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.reference create mode 100644 tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.sql diff --git a/docker/test/stateless/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib index 2f1375a2f0f..c46f072ed56 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -5,7 +5,8 @@ source /utils.lib function attach_gdb_to_clickhouse() { - IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)") + # Use retries to avoid failures due to fault injections + IS_ASAN=$(run_with_retry 5 clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)") if [[ "$IS_ASAN" = "1" ]]; then echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections" diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp index 6cb33d80556..71f7cbf0760 100644 --- a/src/Analyzer/JoinNode.cpp +++ b/src/Analyzer/JoinNode.cpp @@ -45,10 +45,16 @@ ASTPtr JoinNode::toASTTableJoin() const { auto join_expression_ast = children[join_expression_child_index]->toAST(); - if (children[join_expression_child_index]->getNodeType() == QueryTreeNodeType::LIST) - join_ast->using_expression_list = std::move(join_expression_ast); + if (is_using_join_expression) + { + join_ast->using_expression_list = join_expression_ast; + join_ast->children.push_back(join_ast->using_expression_list); + } else - join_ast->on_expression = std::move(join_expression_ast); + { + join_ast->on_expression = join_expression_ast; + join_ast->children.push_back(join_ast->on_expression); + } } return join_ast; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 1ca8c40460c..08be8758f61 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -60,6 +60,18 @@ bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr return false; } + if (auto * tables = node->as()) + { + /// Contrary to what's said in the code block above, ARRAY JOIN needs to resolve the subquery if possible + /// and assign an alias for 02367_optimize_trivial_count_with_array_join to pass. Otherwise it will fail in + /// ArrayJoinedColumnsVisitor (`No alias for non-trivial value in ARRAY JOIN: _a`) + /// This looks 100% as a incomplete code working on top of a bug, but this code has already been made obsolete + /// by the new analyzer, so it's an inconvenience we can live with until we deprecate it. + if (child == tables->array_join) + return true; + return false; + } + return true; } diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index a8639906aad..bba30fb5194 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -161,7 +161,13 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & { auto & join = node.table_join->as(); if (join.on_expression) + { + ASTPtr original_on_expression = join.on_expression; visit(join.on_expression, data); + if (join.on_expression != original_on_expression) + join.children = { join.on_expression }; + } + } } diff --git a/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp b/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp index 64e03767c49..7d9a69c87b4 100644 --- a/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp +++ b/src/Interpreters/RewriteArrayExistsFunctionVisitor.cpp @@ -6,6 +6,12 @@ namespace DB { + +namespace ErrorCode +{ +extern const int LOGICAL_ERROR; +} + void RewriteArrayExistsFunctionMatcher::visit(ASTPtr & ast, Data & data) { if (auto * func = ast->as()) @@ -20,21 +26,21 @@ void RewriteArrayExistsFunctionMatcher::visit(ASTPtr & ast, Data & data) if (join->using_expression_list) { auto * it = std::find(join->children.begin(), join->children.end(), join->using_expression_list); + if (it == join->children.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not find join->using_expression_list in '{}'", join->formatForLogging()); visit(join->using_expression_list, data); - - if (it && *it != join->using_expression_list) - *it = join->using_expression_list; + *it = join->using_expression_list; } if (join->on_expression) { auto * it = std::find(join->children.begin(), join->children.end(), join->on_expression); + if (it == join->children.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not find join->on_expression in '{}'", join->formatForLogging()); visit(join->on_expression, data); - - if (it && *it != join->on_expression) - *it = join->on_expression; + *it = join->on_expression; } } } diff --git a/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp b/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp index d6d258f5ff6..e39f145f2dc 100644 --- a/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp +++ b/src/Storages/TimeSeries/PrometheusRemoteReadProtocol.cpp @@ -231,6 +231,7 @@ namespace table_join->strictness = JoinStrictness::Semi; table_join->on_expression = makeASTFunction("equals", makeASTColumn(data_table_id, TimeSeriesColumnNames::ID), makeASTColumn(tags_table_id, TimeSeriesColumnNames::ID)); + table_join->children.push_back(table_join->on_expression); table->table_join = table_join; auto table_exp = std::make_shared(); diff --git a/tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.reference b/tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.sql b/tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.sql new file mode 100644 index 00000000000..e0018632be4 --- /dev/null +++ b/tests/queries/0_stateless/03261_optimize_rewrite_array_exists_to_has_crash.sql @@ -0,0 +1,10 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/71382 +DROP TABLE IF EXISTS rewrite; +CREATE TABLE rewrite (c0 Int) ENGINE = Memory(); +SELECT 1 +FROM rewrite +INNER JOIN rewrite AS y ON ( + SELECT 1 +) +INNER JOIN rewrite AS z ON 1 +SETTINGS optimize_rewrite_array_exists_to_has=1;