Merge pull request #27487 from ClickHouse/aku/kill-window

make it possible to cancel window functions on ctrl+c
This commit is contained in:
Alexander Kuzmenkov 2021-08-10 14:17:34 +03:00 committed by GitHub
commit 3f7b96e15b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 4 deletions

View File

@ -1166,6 +1166,23 @@ void WindowTransform::appendChunk(Chunk & chunk)
// Write out the aggregation results.
writeOutCurrentRow();
if (isCancelled())
{
// Good time to check if the query is cancelled. Checking once
// per block might not be enough in severe quadratic cases.
// Just leave the work halfway through and return, the 'prepare'
// method will figure out what to do. Note that this doesn't
// handle 'max_execution_time' and other limits, because these
// limits are only updated between blocks. Eventually we should
// start updating them in background and canceling the processor,
// like we do for Ctrl+C handling.
//
// This class is final, so the check should hopefully be
// devirtualized and become a single never-taken branch that is
// basically free.
return;
}
// Move to the next row. The frame will have to be recalculated.
// The peer group start is updated at the beginning of the loop,
// because current_row might now be past-the-end.
@ -1255,10 +1272,12 @@ IProcessor::Status WindowTransform::prepare()
// next_output_block_number, first_not_ready_row, first_block_number,
// blocks.size());
if (output.isFinished())
if (output.isFinished() || isCancelled())
{
// The consumer asked us not to continue (or we decided it ourselves),
// so we abort.
// so we abort. Not sure what the difference between the two conditions
// is, but it seemed that output.isFinished() is not enough to cancel on
// Ctrl+C. Test manually if you change it.
input.close();
return Status::Finished;
}

View File

@ -80,8 +80,10 @@ struct RowNumber
* the order of input data. This property also trivially holds for the ROWS and
* GROUPS frames. For the RANGE frame, the proof requires the additional fact
* that the ranges are specified in terms of (the single) ORDER BY column.
*
* `final` is so that the isCancelled() is devirtualized, we call it every row.
*/
class WindowTransform : public IProcessor /* public ISimpleTransform */
class WindowTransform final : public IProcessor
{
public:
WindowTransform(

View File

@ -647,7 +647,7 @@ def run_tests_array(all_tests_with_params):
failures_chain += 1
status += MSG_FAIL
status += print_test_time(total_time)
status += " - having exception:\n{}\n".format(
status += " - having exception in stdout:\n{}\n".format(
'\n'.join(stdout.split('\n')[:100]))
status += 'Database: ' + testcase_args.testcase_database
elif reference_file is None:

View File

@ -0,0 +1,3 @@
Started
Sent kill request
Exit 138

View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -e -o pipefail
# Run a test query that takes very long to run.
query_id="01572_kill_window_function-$CLICKHOUSE_DATABASE"
$CLICKHOUSE_CLIENT --query_id="$query_id" --query "SELECT count(1048575) OVER (PARTITION BY intDiv(NULL, number) ORDER BY number DESC NULLS FIRST ROWS BETWEEN CURRENT ROW AND 1048575 FOLLOWING) FROM numbers(255, 1048575)" >/dev/null 2>&1 &
client_pid=$!
echo Started
# Use one query to both kill the test query and verify that it has started,
# because if we try to kill it before it starts, the test will fail.
while [ -z "$($CLICKHOUSE_CLIENT --query "kill query where query_id = '$query_id' and current_database = currentDatabase()")" ]
do
# If we don't yet see the query in the process list, the client should still
# be running. The query is very long.
kill -0 -- $client_pid
sleep 1
done
echo Sent kill request
# Wait for the client to terminate.
client_exit_code=0
wait $client_pid || client_exit_code=$?
echo "Exit $client_exit_code"
# We have tested for Ctrl+C.
# The following client flags don't cancel, but should: --max_execution_time,
# --receive_timeout. Probably needs asynchonous calculation of query limits, as
# discussed with Nikolay on TG: https://t.me/c/1214350934/21492