ClickHouse/tests/queries/0_stateless/01814_distributed_push_down_limit.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

152 lines
5.9 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
2021-09-12 12:35:27 +00:00
# Tags: distributed
# shellcheck disable=SC2206
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# -- NOTE: this test cannot use 'current_database = $CLICKHOUSE_DATABASE',
# -- because it does not propagated via remote queries,
# -- hence it uses query_id/initial_query_id.
function setup()
{
$CLICKHOUSE_CLIENT -nm -q "
drop table if exists data_01814;
drop table if exists dist_01814;
create table data_01814 (key Int) Engine=MergeTree() order by key settings index_granularity=10 as select * from numbers(100);
create table dist_01814 as data_01814 engine=Distributed('test_cluster_two_shards', $CLICKHOUSE_DATABASE, data_01814, key);
"
}
function cleanup()
{
$CLICKHOUSE_CLIENT -nm -q "
drop table data_01814;
drop table dist_01814;
"
}
function make_query_id()
{
echo "$(tr -cd '[:lower:]' < /dev/urandom | head -c10)-$CLICKHOUSE_DATABASE"
}
function test_distributed_push_down_limit_with_query_log()
{
local table=$1 && shift
local offset=$1 && shift
local query_id
query_id="$(make_query_id)"
# NOTES:
# - max_rows_to_read_leaf cannot be used since it does not know anything
# about optimize_aggregation_in_order,
# - limit push down can be checked only with optimize_aggregation_in_order,
# since otherwise the query will be canceled too early, and read_rows will be
# small.
local settings_and_opts=(
--query_id "$query_id"
--max_block_size 20
--optimize_aggregation_in_order 1
--log_queries 1
--log_queries_min_type 'QUERY_FINISH'
# disable hedged requests to avoid excessive log entries
--use_hedged_requests 0
"$@"
)
$CLICKHOUSE_CLIENT "${settings_and_opts[@]}" -q "select * from $table group by key limit $offset, 10"
$CLICKHOUSE_CLIENT -nm -q "
system flush logs;
select read_rows from system.query_log
where
tests: update tests with event_time/event_date = today() to >= yesterday() This will fix failures like in [1], from query_log from artifacts: SELECT query, event_time FROM system.query_log WHERE (NOT is_initial_query) AND (query NOT LIKE '%system%query_log%') AND (query LIKE concat('WITH%', 'test_84qkvq', '%AS `id_no` %')) AND (type = 'QueryFinish') Query id: c5d70aba-b0aa-4f92-bdb3-29547b9aabb1 ┌─query──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────event_time─┐ │ WITH _CAST('test_84qkvq', 'Nullable(String)') AS `id_no` SELECT `one`.`dummy`, ignore(`id_no`) FROM `system`.`one` WHERE `dummy` IN (0, 2) │ 2021-12-25 23:59:59 │ │ WITH _CAST('test_84qkvq', 'Nullable(String)') AS `id_no` SELECT `one`.`dummy`, ignore(`id_no`) FROM `system`.`one` WHERE `dummy` IN (0, 2) │ 2021-12-25 23:59:59 │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────┘ 2 rows in set. Elapsed: 0.032 sec. [1]: https://s3.amazonaws.com/clickhouse-test-reports/33175/465a9bf615e1b233606460f956c09f71931c99a2/stateless_tests__debug__actions__[2/3].html
2021-12-26 13:23:21 +00:00
event_date >= yesterday()
and query_kind = 'Select' /* exclude DESC TABLE */
Fix 01814_distributed_push_down_limit flakiness Before it was possible that the query was already cancelled, and so you will get ExceptionWhileProcessing and QueryFinish: ┌─query───────────────────────────────────────────────────────────────────┬─query_id─────────────────────────────┬─read_rows─┬─type─────────────────────┐ │ SELECT `key` FROM `test_qvcjdo`.`data_01814` GROUP BY `key` LIMIT 0, 10 │ 2d39bfd6-f0e7-404a-9990-7703f7a4ec3a │ 40 │ QueryFinish │ │ SELECT `key` FROM `test_qvcjdo`.`data_01814` GROUP BY `key` LIMIT 0, 10 │ d930bf54-e965-42fd-9d48-5e54a588187a │ 40 │ QueryFinish │ │ SELECT `key` FROM `test_qvcjdo`.`data_01814` GROUP BY `key` LIMIT 0, 10 │ d930bf54-e965-42fd-9d48-5e54a588187a │ 40 │ ExceptionWhileProcessing │ └─────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┴───────────┴──────────────────────────┘ And so you will got more then two rows. CI: https://s3.amazonaws.com/clickhouse-test-reports/33660/a4e8e61d57eab14116983a340c65e5e2d7039ed5/stateless_tests__release__wide_parts_enabled__actions_.html
2022-01-15 12:04:24 +00:00
and type = 'QueryFinish'
and initial_query_id = '$query_id' and initial_query_id != query_id;
" | xargs # convert new lines to spaces
}
function test_distributed_push_down_limit_0()
{
local args=(
"remote('127.{2,3}', $CLICKHOUSE_DATABASE, data_01814)"
0 # offset
--distributed_push_down_limit 0
)
test_distributed_push_down_limit_with_query_log "${args[@]}" "$@"
}
function test_distributed_push_down_limit_1()
{
local args=(
"remote('127.{2,3}', $CLICKHOUSE_DATABASE, data_01814, key)"
0 # offset
--distributed_push_down_limit 1
--optimize_skip_unused_shards 1
--optimize_distributed_group_by_sharding_key 1
)
test_distributed_push_down_limit_with_query_log "${args[@]}"
}
function test_distributed_push_down_limit_1_offset()
{
local settings_and_opts=(
--distributed_push_down_limit 1
--optimize_skip_unused_shards 1
--optimize_distributed_group_by_sharding_key 1
)
$CLICKHOUSE_CLIENT "${settings_and_opts[@]}" -q "select * from remote('127.{2,3}', $CLICKHOUSE_DATABASE, data_01814, key) group by key order by key desc limit 5, 10"
}
function main()
{
setup
trap cleanup EXIT
echo 'distributed_push_down_limit=0'
test_distributed_push_down_limit_0 --format Null
#
# The following tests (tests with distributed_push_down_limit=1) requires
# retries, since the query may be canceled earlier due to LIMIT, and so
# only one shard will be processed, and it will get not 40 but 20 rows:
#
# 1.160920 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Debug> executeQuery: (from [::ffff:127.0.0.1]:42778, initial_query_id: 66cf643c-b1b4-4f7e-942a-c4c3493029f6, using production parser) (comment: /usr/share/clickhouse-test/queries/0_stateless/01814_distributed_push_down_limit.sql) WITH CAST('test_31uut9', 'String') AS id_distributed_push_down_limit_1 SELECT key FROM test_31uut9.data_01814 GROUP BY key LIMIT 10
# 1.214964 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Trace> ContextAccess (default): Access granted: SELECT(key) ON test_31uut9.data_01814
# 1.216790 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Debug> test_31uut9.data_01814 (b484ad2e-0591-4faf-8110-1dcbd7cdd0db) (SelectExecutor): Key condition: unknown
# 1.227245 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Debug> test_31uut9.data_01814 (b484ad2e-0591-4faf-8110-1dcbd7cdd0db) (SelectExecutor): Selected 1/1 parts by partition key, 1 parts by primary key, 10/11 marks by primary key, 10 marks to read from 1 ranges
# 1.228452 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Trace> MergeTreeSelectProcessor: Reading 3 ranges from part all_1_1_0, approx. 100 rows starting from 0
# 1.229104 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Trace> InterpreterSelectQuery: FetchColumns -> WithMergeableStateAfterAggregationAndLimit
# 1.339085 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Information> TCPHandler: Query was cancelled.
# 1.416573 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Information> executeQuery: Read 20 rows, 80.00 B in 0.254374666 sec., 78 rows/sec., 314.50 B/sec.
# 1.419006 [ 291 ] {7ac5de70-c26c-4e3b-bdee-3873ad1b84f1} <Debug> MemoryTracker: Peak memory usage (for query): 0.00 B.
#
local out out_lines max_tries=20
echo 'distributed_push_down_limit=1'
for ((i = 0; i < max_tries; ++i)); do
out=$(test_distributed_push_down_limit_1)
out_lines=( $out )
if [[ ${#out_lines[@]} -gt 2 ]] && [[ ${out_lines[-1]} = 40 ]] && [[ ${out_lines[-2]} = 40 ]]; then
break
fi
done
echo "$out"
echo 'distributed_push_down_limit=1 with OFFSET'
test_distributed_push_down_limit_1_offset
}
main "$@"