mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 02:21:59 +00:00
Merge pull request #63684 from azat/tests/02340_parts_refcnt_mergetree
tests: attempt to fix 02340_parts_refcnt_mergetree flakiness
This commit is contained in:
commit
fa57d71e0e
@ -1,2 +1,2 @@
|
|||||||
data_02340 1_2_2_0 6
|
data_02340 1_2_2_0 1
|
||||||
data_02340_rep 1_0_0_0 6
|
data_02340_rep 1_0_0_0 1
|
||||||
|
@ -9,40 +9,58 @@ function check_refcnt_for_table()
|
|||||||
{
|
{
|
||||||
local table=$1 && shift
|
local table=$1 && shift
|
||||||
|
|
||||||
$CLICKHOUSE_CLIENT -q "system stop merges $table"
|
$CLICKHOUSE_CLIENT -nm -q "
|
||||||
|
system stop merges $table;
|
||||||
|
-- cleanup thread may hold the parts lock
|
||||||
|
system stop cleanup $table;
|
||||||
|
-- queue may hold the parts lock for awhile as well
|
||||||
|
system stop pulling replication log $table;
|
||||||
|
"
|
||||||
$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)"
|
$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)"
|
||||||
|
|
||||||
local query_id
|
local query_id
|
||||||
query_id="$table-$(random_str 10)"
|
query_id="$table-$(random_str 10)"
|
||||||
|
|
||||||
SETTINGS="--format Null --max_threads 1 --max_block_size 1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0"
|
local log_file
|
||||||
|
log_file=$(mktemp "$CUR_DIR/clickhouse-tests.XXXXXX.log")
|
||||||
|
local args=(
|
||||||
|
--format Null
|
||||||
|
--max_threads 1
|
||||||
|
--max_block_size 1
|
||||||
|
--merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0
|
||||||
|
--query_id "$query_id"
|
||||||
|
--send_logs_level "test"
|
||||||
|
--server_logs_file "$log_file"
|
||||||
|
)
|
||||||
|
|
||||||
# Notes:
|
# Notes:
|
||||||
# - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts
|
# - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts
|
||||||
# - "part = 1" condition should prune all parts except first
|
# - "part = 1" condition should prune all parts except first
|
||||||
# - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier
|
# - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier
|
||||||
$CLICKHOUSE_CLIENT $SETTINGS --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" &
|
$CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" &
|
||||||
PID=$!
|
PID=$!
|
||||||
|
|
||||||
# wait for query to be started
|
|
||||||
while [ "$($CLICKHOUSE_CLIENT -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do
|
|
||||||
sleep 0.1
|
|
||||||
done
|
|
||||||
|
|
||||||
# When the query only starts it execution it holds reference for each part,
|
# When the query only starts it execution it holds reference for each part,
|
||||||
# however when it starts reading, partition pruning takes place,
|
# however when it starts reading, partition pruning takes place,
|
||||||
# and it should hold only parts that are required for SELECT
|
# and it should hold only parts that are required for SELECT
|
||||||
#
|
#
|
||||||
# But to reach partition prune the function sleepEachRow() will be executed twice,
|
# So let's wait while the reading will be started.
|
||||||
# so 2 seconds for sleepEachRow() and 3 seconds just to ensure that it enters the reading stage.
|
while ! grep -F -q -e "Exception" -e "MergeTreeRangeReader" "$log_file"; do
|
||||||
sleep $((2+3))
|
sleep 0.1
|
||||||
|
done
|
||||||
|
|
||||||
# NOTE: parts that are used in query will have refcount increased for each range
|
# NOTE: parts that are used in query will be holded in multiple places, and
|
||||||
$CLICKHOUSE_CLIENT -q "select table, name, refcount from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount > 1"
|
# this is where magic 6 came from. Also there could be some other
|
||||||
|
# background threads (i.e. asynchronous metrics) that uses the part, so we
|
||||||
|
# simply filter parts not by "refcount > 1" but with some delta - "3", to
|
||||||
|
# avoid flakiness.
|
||||||
|
$CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3"
|
||||||
|
|
||||||
# Kill the query gracefully.
|
# Kill the query gracefully.
|
||||||
kill -INT $PID
|
kill -INT $PID
|
||||||
wait $PID
|
wait $PID
|
||||||
|
grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED
|
||||||
|
rm -f "${log_file:?}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# NOTE: index_granularity=1 to cancel ASAP
|
# NOTE: index_granularity=1 to cancel ASAP
|
||||||
@ -52,11 +70,13 @@ $CLICKHOUSE_CLIENT -nmq "
|
|||||||
create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key settings index_granularity=1;
|
create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key settings index_granularity=1;
|
||||||
" || exit 1
|
" || exit 1
|
||||||
check_refcnt_for_table data_02340
|
check_refcnt_for_table data_02340
|
||||||
|
$CLICKHOUSE_CLIENT -q "drop table data_02340 sync"
|
||||||
|
|
||||||
$CLICKHOUSE_CLIENT -nmq "
|
$CLICKHOUSE_CLIENT -nmq "
|
||||||
drop table if exists data_02340_rep sync;
|
drop table if exists data_02340_rep sync;
|
||||||
create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key settings index_granularity=1;
|
create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key settings index_granularity=1;
|
||||||
" || exit 1
|
" || exit 1
|
||||||
check_refcnt_for_table data_02340_rep
|
check_refcnt_for_table data_02340_rep
|
||||||
|
$CLICKHOUSE_CLIENT -q "drop table data_02340_rep sync"
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
Loading…
Reference in New Issue
Block a user