Merge pull request #63684 from azat/tests/02340_parts_refcnt_mergetree

tests: attempt to fix 02340_parts_refcnt_mergetree flakiness
This commit is contained in:
Alexey Milovidov 2024-05-21 06:05:33 +02:00 committed by GitHub
commit fa57d71e0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 36 additions and 16 deletions

View File

@ -1,2 +1,2 @@
data_02340 1_2_2_0 6
data_02340_rep 1_0_0_0 6
data_02340 1_2_2_0 1
data_02340_rep 1_0_0_0 1

View File

@ -9,40 +9,58 @@ function check_refcnt_for_table()
{
local table=$1 && shift
$CLICKHOUSE_CLIENT -q "system stop merges $table"
$CLICKHOUSE_CLIENT -nm -q "
system stop merges $table;
-- cleanup thread may hold the parts lock
system stop cleanup $table;
-- queue may hold the parts lock for awhile as well
system stop pulling replication log $table;
"
$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)"
local query_id
query_id="$table-$(random_str 10)"
SETTINGS="--format Null --max_threads 1 --max_block_size 1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0"
local log_file
log_file=$(mktemp "$CUR_DIR/clickhouse-tests.XXXXXX.log")
local args=(
--format Null
--max_threads 1
--max_block_size 1
--merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0
--query_id "$query_id"
--send_logs_level "test"
--server_logs_file "$log_file"
)
# Notes:
# - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts
# - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts
# - "part = 1" condition should prune all parts except first
# - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier
$CLICKHOUSE_CLIENT $SETTINGS --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" &
$CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" &
PID=$!
# wait for query to be started
while [ "$($CLICKHOUSE_CLIENT -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do
sleep 0.1
done
# When the query only starts it execution it holds reference for each part,
# however when it starts reading, partition pruning takes place,
# and it should hold only parts that are required for SELECT
#
# But to reach partition prune the function sleepEachRow() will be executed twice,
# so 2 seconds for sleepEachRow() and 3 seconds just to ensure that it enters the reading stage.
sleep $((2+3))
# So let's wait while the reading will be started.
while ! grep -F -q -e "Exception" -e "MergeTreeRangeReader" "$log_file"; do
sleep 0.1
done
# NOTE: parts that are used in query will have refcount increased for each range
$CLICKHOUSE_CLIENT -q "select table, name, refcount from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount > 1"
# NOTE: parts that are used in query will be holded in multiple places, and
# this is where magic 6 came from. Also there could be some other
# background threads (i.e. asynchronous metrics) that uses the part, so we
# simply filter parts not by "refcount > 1" but with some delta - "3", to
# avoid flakiness.
$CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3"
# Kill the query gracefully.
kill -INT $PID
wait $PID
grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED
rm -f "${log_file:?}"
}
# NOTE: index_granularity=1 to cancel ASAP
@ -52,11 +70,13 @@ $CLICKHOUSE_CLIENT -nmq "
create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key settings index_granularity=1;
" || exit 1
check_refcnt_for_table data_02340
$CLICKHOUSE_CLIENT -q "drop table data_02340 sync"
$CLICKHOUSE_CLIENT -nmq "
drop table if exists data_02340_rep sync;
create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key settings index_granularity=1;
" || exit 1
check_refcnt_for_table data_02340_rep
$CLICKHOUSE_CLIENT -q "drop table data_02340_rep sync"
exit 0