From 2657e2b3ef2f98802fd2b8ebcd359fe756b709c6 Mon Sep 17 00:00:00 2001 From: Graham Campbell Date: Fri, 9 Aug 2024 11:08:41 +0100 Subject: [PATCH 1/5] Do not apply redundant sorting removal when there's an offset --- .../Optimizations/removeRedundantSorting.cpp | 8 ++-- .../02496_remove_redundant_sorting.reference | 37 +++++++++++++++++ .../02496_remove_redundant_sorting.sh | 22 ++++++++++ ...emove_redundant_sorting_analyzer.reference | 41 +++++++++++++++++++ 4 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 7cac7bee6ec..f0094f0f8d2 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -59,9 +60,10 @@ public: if (typeid_cast(current_step) || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable - || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting - || typeid_cast(current_step)) /// (4) aggregation change order + || typeid_cast(current_step) /// (2) OFFSET on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (3) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (4) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (5) aggregation change order { logStep("nodes_affect_order/push", current_node); nodes_affect_order.push_back(current_node); diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..a0a1fd60812 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -527,3 +527,40 @@ Expression (Projection) 2 4 1 3 0 2 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT SUM(a) AS a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + GROUP BY 2 + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression (Projection) + Expression (Before ORDER BY) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Before ORDER BY) + Aggregating + Expression (Before GROUP BY) + ReadFromStorage (Values) +-- execute +0 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 646e2501a99..d59b4387101 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -329,3 +329,25 @@ FROM ORDER BY number DESC )" run_query "$query" + +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT SUM(a) AS a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + GROUP BY 2 + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index b6a2e3182df..58441de5f22 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -533,3 +533,44 @@ Expression (Project names) 2 4 1 3 0 2 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT SUM(a) AS a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + GROUP BY 2 + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression (Project names) + Expression (Projection) + Aggregating + Expression (Before GROUP BY) + Expression (Change column names to column identifiers) + Expression (Project names) + Offset + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + Expression (Project names) + Expression (Projection) + Aggregating + Expression (Before GROUP BY) + Expression (Change column names to column identifiers) + ReadFromStorage (Values) +-- execute +0 From 29afd2de785450f2e7f5faec1dc6b35e166cefb4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 13:26:45 +0200 Subject: [PATCH 2/5] Remove "Processing configuration file" message from clickhouse-local Make the behaviour identical to the clickhouse-client Signed-off-by: Azat Khuzhin --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6b0b8fc5b50..200beea7b63 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -143,7 +143,7 @@ void LocalServer::initialize(Poco::Util::Application & self) if (fs::exists(config_path)) { - ConfigProcessor config_processor(config_path, false, true); + ConfigProcessor config_processor(config_path); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); From 4ef3fe416debecefcea4d7336aac7c679092cf0c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 11 Aug 2024 13:08:53 +0000 Subject: [PATCH 3/5] Fix and simplify test --- .../02496_remove_redundant_sorting.reference | 68 ++++++++---------- .../02496_remove_redundant_sorting.sh | 43 ++++++----- ...emove_redundant_sorting_analyzer.reference | 71 ++++++++----------- 3 files changed, 82 insertions(+), 100 deletions(-) diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index a0a1fd60812..4d004f2f78f 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -465,6 +465,37 @@ Expression ((Projection + Before ORDER BY)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Projection + Before ORDER BY)) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query @@ -527,40 +558,3 @@ Expression (Projection) 2 4 1 3 0 2 --- presence of an inner OFFSET retains the ORDER BY --- query -WITH - t1 AS ( - SELECT SUM(a) AS a, b - FROM - VALUES ( - 'b UInt32, a Int32', - (1, 1), - (2, 0) - ) - GROUP BY 2 - ) -SELECT - SUM(a) -FROM ( - SELECT a, b - FROM t1 - ORDER BY 1 DESC, 2 - OFFSET 1 -) t2 --- explain -Expression (Projection) - Expression (Before ORDER BY) - Aggregating - Expression (Before GROUP BY) - Offset - Expression (Projection) - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - Expression (Projection) - Expression (Before ORDER BY) - Aggregating - Expression (Before GROUP BY) - ReadFromStorage (Values) --- execute -0 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index d59b4387101..c9bd242e429 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -302,6 +302,27 @@ FROM )" run_query "$query" +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" + echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" @@ -329,25 +350,3 @@ FROM ORDER BY number DESC )" run_query "$query" - -echo "-- presence of an inner OFFSET retains the ORDER BY" -query="WITH - t1 AS ( - SELECT SUM(a) AS a, b - FROM - VALUES ( - 'b UInt32, a Int32', - (1, 1), - (2, 0) - ) - GROUP BY 2 - ) -SELECT - SUM(a) -FROM ( - SELECT a, b - FROM t1 - ORDER BY 1 DESC, 2 - OFFSET 1 -) t2" -run_query "$query" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index 58441de5f22..dd5ac7bf706 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -464,6 +464,36 @@ Expression ((Project names + Projection)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Offset + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query @@ -533,44 +563,3 @@ Expression (Project names) 2 4 1 3 0 2 --- presence of an inner OFFSET retains the ORDER BY --- query -WITH - t1 AS ( - SELECT SUM(a) AS a, b - FROM - VALUES ( - 'b UInt32, a Int32', - (1, 1), - (2, 0) - ) - GROUP BY 2 - ) -SELECT - SUM(a) -FROM ( - SELECT a, b - FROM t1 - ORDER BY 1 DESC, 2 - OFFSET 1 -) t2 --- explain -Expression (Project names) - Expression (Projection) - Aggregating - Expression (Before GROUP BY) - Expression (Change column names to column identifiers) - Expression (Project names) - Offset - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - Expression (Projection) - Expression (Change column names to column identifiers) - Expression (Project names) - Expression (Projection) - Aggregating - Expression (Before GROUP BY) - Expression (Change column names to column identifiers) - ReadFromStorage (Values) --- execute -0 From d314e5aa45fb8ac91324721ab278185b09437a40 Mon Sep 17 00:00:00 2001 From: Vladimir Varankin Date: Sun, 11 Aug 2024 18:37:29 +0200 Subject: [PATCH 4/5] typos in prometheus.md --- docs/en/interfaces/prometheus.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/interfaces/prometheus.md b/docs/en/interfaces/prometheus.md index bf541901b34..8e7023cc51f 100644 --- a/docs/en/interfaces/prometheus.md +++ b/docs/en/interfaces/prometheus.md @@ -75,7 +75,7 @@ Data are received by this protocol and written to a [TimeSeries](/en/engines/tab /write - remote_writeremote_write db_name time_series_table
@@ -105,7 +105,7 @@ Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) /read - remote_readremote_read db_name time_series_table
@@ -144,14 +144,14 @@ Multiple protocols can be specified together in one place: /write - remote_writeremote_write db_name.time_series_table
/read - remote_readremote_read db_name.time_series_table
From e384e2c38e405b1b4758adaa44cd321e6d7f41b3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 18:34:33 +0200 Subject: [PATCH 5/5] tests: fix 02122_join_group_by_timeout flakiness CI found [1] failure of the test: 2024-08-11 21:06:07 /usr/share/clickhouse-test/queries/0_stateless/02122_join_group_by_timeout.sh: line 51: 52614 Killed timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q "SELECT a.name as n And the problem is not the server, but the client, since query executed for ~1 second: 2024.08.11 21:06:02.284318 [ 49232 ] {ba989ee2-f615-49ca-bcd8-31b3916aeb2c} executeQuery: (from [::1]:54144) (comment: 02122_join_group_by_timeout.sh) SELECT a.name as n FROM ( SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 ) AS a, ( SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 ) as b FORMAT Null SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' (stage: Complete) 2024.08.11 21:06:03.331249 [ 49232 ] {ba989ee2-f615-49ca-bcd8-31b3916aeb2c} executeQuery: Read 517104 rows, 3.95 MiB in 1.072023 sec., 482362.78512681165 rows/sec., 3.68 MiB/sec. [1]: https://s3.amazonaws.com/clickhouse-test-reports/67134/18da3f0ab63da1eef9396627d0dfd56cf5356f65/stateless_tests__msan__[1_4].html So instead of using timeout, let's use time from the system.query_log instead. Signed-off-by: Azat Khuzhin --- .../02122_join_group_by_timeout.reference | 6 +- .../02122_join_group_by_timeout.sh | 70 ++++++++++--------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference index f314e22e519..6500560e8fc 100644 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.reference +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -1,4 +1,6 @@ -Code: 159 -0 +Code: 159 +query_duration 1 +0 +query_duration 1 Code: 159 0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 8380c5dbd0c..b4644878544 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -1,27 +1,23 @@ #!/usr/bin/env bash -# Tags: no-debug - -# no-debug: Query is canceled by timeout after max_execution_time, -# but sending an exception to the client may hang -# for more than MAX_PROCESS_WAIT seconds in a slow debug build, -# and test will fail. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -MAX_PROCESS_WAIT=5 - -IS_SANITIZER=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%'") -if [ "$IS_SANITIZER" -gt 0 ]; then - # Query may hang for more than 5 seconds, especially in tsan build - MAX_PROCESS_WAIT=15 +TIMEOUT=5 +IS_SANITIZER_OR_DEBUG=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%' or message like '%built in debug mode%'") +if [ "$IS_SANITIZER_OR_DEBUG" -gt 0 ]; then + # Increase the timeout due to in debug/sanitizers build: + # - client is slow + # - stacktrace resolving is slow + TIMEOUT=15 fi # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ - "SELECT * FROM +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --query_id "$query_id" --max_execution_time 1 -q " + SELECT * FROM ( SELECT a.name as n FROM @@ -34,28 +30,35 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -m1 -o "Code: 159" +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" + ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ - "SELECT a.name as n - FROM - ( - SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 - ) AS a, - ( - SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 - ) as b - FORMAT Null - SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' +" echo $? +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ - "SELECT * FROM +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d " + SELECT * FROM ( SELECT a.name as n FROM @@ -68,12 +71,13 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ - "SELECT a.name as n +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL" -d " + SELECT a.name as n FROM ( SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 @@ -83,5 +87,5 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ ) as b FORMAT Null SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +" echo $?