mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Backport #61869 to 24.3: Fix flaky tests 2 (stateless, integration)
This commit is contained in:
parent
2a929eaa7d
commit
969cc4e835
3
.gitignore
vendored
3
.gitignore
vendored
@ -164,6 +164,9 @@ tests/queries/0_stateless/*.generated-expect
|
||||
tests/queries/0_stateless/*.expect.history
|
||||
tests/integration/**/_gen
|
||||
|
||||
# pytest --pdb history
|
||||
.pdb_history
|
||||
|
||||
# rust
|
||||
/rust/**/target*
|
||||
# It is autogenerated from *.in
|
||||
|
@ -13,6 +13,34 @@ from helpers.network import _NetworkManager
|
||||
logging.raiseExceptions = False
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def pdb_history(request):
|
||||
"""
|
||||
Fixture loads and saves pdb history to file, so it can be preserved between runs
|
||||
"""
|
||||
if request.config.getoption("--pdb"):
|
||||
import readline # pylint:disable=import-outside-toplevel
|
||||
import pdb # pylint:disable=import-outside-toplevel
|
||||
|
||||
def save_history():
|
||||
readline.write_history_file(".pdb_history")
|
||||
|
||||
def load_history():
|
||||
try:
|
||||
readline.read_history_file(".pdb_history")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
load_history()
|
||||
pdb.Pdb.use_rawinput = True
|
||||
|
||||
yield
|
||||
|
||||
save_history()
|
||||
else:
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="session")
|
||||
def tune_local_port_range():
|
||||
# Lots of services uses non privileged ports:
|
||||
|
@ -862,12 +862,12 @@ class ClickHouseCluster:
|
||||
|
||||
def get_docker_handle(self, docker_id):
|
||||
exception = None
|
||||
for i in range(5):
|
||||
for i in range(20):
|
||||
try:
|
||||
return self.docker_client.containers.get(docker_id)
|
||||
except Exception as ex:
|
||||
print("Got exception getting docker handle", str(ex))
|
||||
time.sleep(i * 2)
|
||||
time.sleep(0.5)
|
||||
exception = ex
|
||||
raise exception
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
from time import sleep
|
||||
import pytest
|
||||
import re
|
||||
import os.path
|
||||
@ -164,8 +163,15 @@ def test_replicated_database():
|
||||
node2.query("INSERT INTO mydb.tbl VALUES (2, 'count')")
|
||||
node1.query("INSERT INTO mydb.tbl VALUES (3, 'your')")
|
||||
node2.query("INSERT INTO mydb.tbl VALUES (4, 'chickens')")
|
||||
node1.query("OPTIMIZE TABLE mydb.tbl ON CLUSTER 'cluster' FINAL")
|
||||
|
||||
node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl")
|
||||
|
||||
# check data in sync
|
||||
expect = TSV([[1, "Don\\'t"], [2, "count"], [3, "your"], [4, "chickens"]])
|
||||
assert node1.query("SELECT * FROM mydb.tbl ORDER BY x") == expect
|
||||
assert node2.query("SELECT * FROM mydb.tbl ORDER BY x") == expect
|
||||
|
||||
# Make backup.
|
||||
backup_name = new_backup_name()
|
||||
node1.query(
|
||||
@ -179,14 +185,63 @@ def test_replicated_database():
|
||||
node1.query(f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name}")
|
||||
node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl")
|
||||
|
||||
assert node1.query("SELECT * FROM mydb.tbl ORDER BY x") == TSV(
|
||||
[[1, "Don\\'t"], [2, "count"], [3, "your"], [4, "chickens"]]
|
||||
assert node1.query("SELECT * FROM mydb.tbl ORDER BY x") == expect
|
||||
assert node2.query("SELECT * FROM mydb.tbl ORDER BY x") == expect
|
||||
|
||||
|
||||
def test_replicated_database_compare_parts():
|
||||
"""
|
||||
stop merges and fetches then write data to two nodes and
|
||||
compare that parts are restored from single node (second) after backup
|
||||
replica is selected by settings replica_num=2, replica_num_in_backup=2
|
||||
"""
|
||||
node1.query(
|
||||
"CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')"
|
||||
)
|
||||
|
||||
assert node2.query("SELECT * FROM mydb.tbl ORDER BY x") == TSV(
|
||||
[[1, "Don\\'t"], [2, "count"], [3, "your"], [4, "chickens"]]
|
||||
node1.query(
|
||||
"CREATE TABLE mydb.tbl(x UInt8, y String) ENGINE=ReplicatedMergeTree ORDER BY x"
|
||||
)
|
||||
|
||||
node2.query("SYSTEM SYNC DATABASE REPLICA mydb")
|
||||
|
||||
node1.query("SYSTEM STOP MERGES mydb.tbl")
|
||||
node2.query("SYSTEM STOP MERGES mydb.tbl")
|
||||
|
||||
node1.query("SYSTEM STOP FETCHES mydb.tbl")
|
||||
node2.query("SYSTEM STOP FETCHES mydb.tbl")
|
||||
|
||||
node1.query("INSERT INTO mydb.tbl VALUES (1, 'a')")
|
||||
node1.query("INSERT INTO mydb.tbl VALUES (2, 'b')")
|
||||
|
||||
node2.query("INSERT INTO mydb.tbl VALUES (3, 'x')")
|
||||
node2.query("INSERT INTO mydb.tbl VALUES (4, 'y')")
|
||||
|
||||
p2 = node2.query("SELECT * FROM mydb.tbl ORDER BY x")
|
||||
|
||||
# Make backup.
|
||||
backup_name = new_backup_name()
|
||||
node1.query(
|
||||
f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name} SETTINGS replica_num=2"
|
||||
)
|
||||
|
||||
# Drop table on both nodes.
|
||||
node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC")
|
||||
|
||||
# Restore from backup on node2.
|
||||
node1.query(
|
||||
f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2"
|
||||
)
|
||||
node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl")
|
||||
|
||||
# compare parts
|
||||
p1_ = node1.query("SELECT _part, * FROM mydb.tbl ORDER BY x")
|
||||
p2_ = node2.query("SELECT _part, * FROM mydb.tbl ORDER BY x")
|
||||
assert p1_ == p2_
|
||||
|
||||
# compare data
|
||||
assert p2 == node2.query("SELECT * FROM mydb.tbl ORDER BY x")
|
||||
|
||||
|
||||
def test_different_tables_on_nodes():
|
||||
node1.query(
|
||||
@ -427,7 +482,12 @@ def test_replicated_database_async():
|
||||
node1.query("INSERT INTO mydb.tbl VALUES (22)")
|
||||
node2.query("INSERT INTO mydb.tbl2 VALUES ('a')")
|
||||
node2.query("INSERT INTO mydb.tbl2 VALUES ('bb')")
|
||||
|
||||
node1.query("OPTIMIZE TABLE mydb.tbl ON CLUSTER 'cluster' FINAL")
|
||||
node1.query("OPTIMIZE TABLE mydb.tbl2 ON CLUSTER 'cluster' FINAL")
|
||||
|
||||
node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl")
|
||||
node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl2")
|
||||
|
||||
backup_name = new_backup_name()
|
||||
[id, status] = node1.query(
|
||||
|
@ -37,6 +37,16 @@ def get_status(dictionary_name):
|
||||
).rstrip("\n")
|
||||
|
||||
|
||||
def get_status_retry(dictionary_name, expect, retry_count=10, sleep_time=0.5):
|
||||
for _ in range(retry_count):
|
||||
res = get_status(dictionary_name)
|
||||
if res == expect:
|
||||
return res
|
||||
time.sleep(sleep_time)
|
||||
|
||||
raise Exception(f'Expected result "{expect}" did not occur')
|
||||
|
||||
|
||||
def get_last_exception(dictionary_name):
|
||||
return (
|
||||
instance.query(
|
||||
@ -250,6 +260,15 @@ def test_reload_after_fail_by_timer(started_cluster):
|
||||
assert expected_error in instance.query_and_get_error(
|
||||
"SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))"
|
||||
)
|
||||
|
||||
# on sanitizers builds it can return 'FAILED_AND_RELOADING' which is not quite right
|
||||
# add retry for these builds
|
||||
if (
|
||||
instance.is_built_with_sanitizer()
|
||||
and get_status("no_file_2") == "FAILED_AND_RELOADING"
|
||||
):
|
||||
get_status_retry("no_file_2", expect="FAILED")
|
||||
|
||||
assert get_status("no_file_2") == "FAILED"
|
||||
|
||||
# Creating the file source makes the dictionary able to load.
|
||||
|
@ -1,4 +1,4 @@
|
||||
<clickhouse>
|
||||
<max_server_memory_usage>2000000000</max_server_memory_usage>
|
||||
<max_server_memory_usage>1500000000</max_server_memory_usage>
|
||||
<allow_use_jemalloc_memory>false</allow_use_jemalloc_memory>
|
||||
</clickhouse>
|
@ -22,7 +22,7 @@ def start_cluster():
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
GLOBAL_TEST_QUERY_A = "SELECT groupArray(number) FROM numbers(2500000) SETTINGS memory_overcommit_ratio_denominator_for_user=1"
|
||||
GLOBAL_TEST_QUERY_A = "SELECT groupArray(number) FROM numbers(5000000) SETTINGS memory_overcommit_ratio_denominator_for_user=1"
|
||||
GLOBAL_TEST_QUERY_B = "SELECT groupArray(number) FROM numbers(2500000) SETTINGS memory_overcommit_ratio_denominator_for_user=80000000"
|
||||
|
||||
|
||||
@ -42,10 +42,8 @@ def test_global_overcommit():
|
||||
|
||||
responses_A = list()
|
||||
responses_B = list()
|
||||
for i in range(100):
|
||||
if i % 2 == 0:
|
||||
for i in range(50):
|
||||
responses_A.append(node.get_query_request(GLOBAL_TEST_QUERY_A, user="A"))
|
||||
else:
|
||||
responses_B.append(node.get_query_request(GLOBAL_TEST_QUERY_B, user="B"))
|
||||
|
||||
overcommited_killed = False
|
||||
|
@ -17,6 +17,12 @@ def start_cluster():
|
||||
|
||||
|
||||
def test_memory_limit_success():
|
||||
if node.is_built_with_thread_sanitizer():
|
||||
pytest.skip(
|
||||
"tsan build is skipped because it slowly merges the parts, "
|
||||
"rather than failing over the memory limit"
|
||||
)
|
||||
|
||||
node.query(
|
||||
"CREATE TABLE test_merge_oom ENGINE=AggregatingMergeTree ORDER BY id EMPTY AS SELECT number%1024 AS id, arrayReduce('groupArrayState', arrayMap(x-> randomPrintableASCII(100), range(8192))) fat_state FROM numbers(20000)"
|
||||
)
|
||||
|
@ -1,7 +1,6 @@
|
||||
import os
|
||||
import pytest
|
||||
import shutil
|
||||
import time
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
# Tests that sizes of in-memory caches (mark / uncompressed / index mark / index uncompressed / mmapped file / query cache) can be changed
|
||||
@ -101,9 +100,10 @@ def test_query_cache_size_is_runtime_configurable(start_cluster):
|
||||
node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
|
||||
res = node.query_with_retry(
|
||||
time.sleep(2)
|
||||
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
|
||||
res = node.query(
|
||||
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
|
||||
check_callback=lambda result: result == "2\n",
|
||||
)
|
||||
assert res == "2\n"
|
||||
|
||||
@ -116,9 +116,10 @@ def test_query_cache_size_is_runtime_configurable(start_cluster):
|
||||
node.query("SYSTEM RELOAD CONFIG")
|
||||
|
||||
# check that eviction worked as expected
|
||||
res = node.query_with_retry(
|
||||
time.sleep(2)
|
||||
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
|
||||
res = node.query(
|
||||
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
|
||||
check_callback=lambda result: result == "2\n",
|
||||
)
|
||||
assert (
|
||||
res == "2\n"
|
||||
@ -132,9 +133,10 @@ def test_query_cache_size_is_runtime_configurable(start_cluster):
|
||||
node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
|
||||
res = node.query_with_retry(
|
||||
time.sleep(2)
|
||||
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
|
||||
res = node.query(
|
||||
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
|
||||
check_callback=lambda result: result == "1\n",
|
||||
)
|
||||
assert res == "1\n"
|
||||
|
||||
|
@ -2,13 +2,13 @@
|
||||
|
||||
SET min_execution_speed = 100000000000, timeout_before_checking_execution_speed = 0;
|
||||
SELECT count() FROM system.numbers; -- { serverError 160 }
|
||||
SELECT 'Ok (1)';
|
||||
SET min_execution_speed = 0;
|
||||
SELECT 'Ok (1)';
|
||||
|
||||
SET min_execution_speed_bytes = 800000000000, timeout_before_checking_execution_speed = 0;
|
||||
SELECT count() FROM system.numbers; -- { serverError 160 }
|
||||
SELECT 'Ok (2)';
|
||||
SET min_execution_speed_bytes = 0;
|
||||
SELECT 'Ok (2)';
|
||||
|
||||
SET max_execution_speed = 1000000;
|
||||
SET max_block_size = 100;
|
||||
|
@ -21,6 +21,10 @@ function test_completion_word()
|
||||
# - here and below you should escape variables of the expect.
|
||||
# - you should not use "expect <<..." since in this case timeout/eof will
|
||||
# not work (I guess due to attached stdin)
|
||||
|
||||
# TODO: get build sanitizer and debug/release info to dynamically change test
|
||||
# like here timeout 120 seconds is too big for release build
|
||||
# but ok for sanitizer builds
|
||||
cat > "$SCRIPT_PATH" << EOF
|
||||
# NOTE: log will be appended
|
||||
exp_internal -f $CLICKHOUSE_TMP/$(basename "${BASH_SOURCE[0]}").debuglog 0
|
||||
@ -30,7 +34,7 @@ exp_internal -f $CLICKHOUSE_TMP/$(basename "${BASH_SOURCE[0]}").debuglog 0
|
||||
set stdout_channel [open "/dev/stdout" w]
|
||||
|
||||
log_user 0
|
||||
set timeout 60
|
||||
set timeout 120
|
||||
match_max 100000
|
||||
expect_after {
|
||||
# Do not ignore eof from expect
|
||||
|
@ -1,6 +1,5 @@
|
||||
create table test (number UInt64) engine=MergeTree order by number;
|
||||
insert into test select * from numbers(100000000);
|
||||
insert into test select * from numbers(50000000);
|
||||
select ignore(number) from test where RAND() > 4292390314 limit 10;
|
||||
select count() > 0 from test where RAND() > 4292390314;
|
||||
drop table test;
|
||||
|
||||
|
@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
db=${CLICKHOUSE_DATABASE}
|
||||
user1="user1_03006_$db_$RANDOM"
|
||||
user2="user2_03006_$db_$RANDOM"
|
||||
user1="user1_03006_${db}_$RANDOM"
|
||||
user2="user2_03006_${db}_$RANDOM"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --multiquery <<EOF
|
||||
DROP DATABASE IF EXISTS $db;
|
||||
|
Loading…
Reference in New Issue
Block a user