Backport #66218 to 24.6: Don't throw TIMEOUT_EXCEEDED for none_only_active

This commit is contained in:
robot-clickhouse 2024-07-09 12:10:45 +00:00
parent d1a517d0bb
commit fc339460a7
3 changed files with 38 additions and 7 deletions

View File

@ -237,6 +237,7 @@ private:
Int64 timeout_seconds = 120;
bool is_replicated_database = false;
bool throw_on_timeout = true;
bool throw_on_timeout_only_active = false;
bool only_running_hosts = false;
bool timeout_exceeded = false;
@ -316,8 +317,8 @@ DDLQueryStatusSource::DDLQueryStatusSource(
, log(getLogger("DDLQueryStatusSource"))
{
auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE
|| output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE;
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
throw_on_timeout_only_active = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE;
if (hosts_to_wait)
{
@ -451,7 +452,7 @@ Chunk DDLQueryStatusSource::generate()
"({} of them are currently executing the task, {} are inactive). "
"They are going to execute the query in background. Was waiting for {} seconds{}";
if (throw_on_timeout)
if (throw_on_timeout || (throw_on_timeout_only_active && !stop_waiting_offline_hosts))
{
if (!first_exception)
first_exception = std::make_unique<Exception>(Exception(ErrorCodes::TIMEOUT_EXCEEDED,

View File

@ -12,11 +12,21 @@ t
2
rdb_default 1 1 s1 r1 1
2
2
2
skip inactive
s1 r1 OK 2 0
s1 r2 QUEUED 2 0
s2 r1 QUEUED 2 0
s1 r1 OK 2 0
s1 r2 QUEUED 2 0
s2 r1 QUEUED 2 0
timeout on active
2
2
s1 r1 OK 3 0
s1 r2 QUEUED 3 0
s2 r1 QUEUED 3 0
s9 r9 QUEUED 3 0
drop replica
2
rdb_default 1 1 s1 r1 1
rdb_default 1 2 s1 r2 0
@ -24,6 +34,9 @@ rdb_default 1 2 s1 r2 0
2
t
t2
t22
t3
t33
t4
t44
rdb_default_4 1 1 s1 r1 1

View File

@ -33,10 +33,27 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na
$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
# Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas
timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
echo 'skip inactive'
timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log"
timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" | sort
timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort
# And that it still throws TIMEOUT_EXCEEDED for active replicas
echo 'timeout on active'
db9="${db}_9"
$CLICKHOUSE_CLIENT -q "create database $db9 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's9', 'r9')"
$CLICKHOUSE_CLIENT -q "detach database $db9"
$CLICKHOUSE_CLIENT -q "insert into system.zookeeper(name, path, value) values ('active', '/test/$CLICKHOUSE_DATABASE/rdb/replicas/s9|r9', '$($CLICKHOUSE_CLIENT -q "select serverUUID()")')"
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=none_only_active -q "create table $db.t22 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t33 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t44 (n int) engine=Log" | sort
$CLICKHOUSE_CLIENT -q "attach database $db9"
$CLICKHOUSE_CLIENT -q "drop database $db9"
echo 'drop replica'
$CLICKHOUSE_CLIENT -q "detach database $db3"
$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"
$CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null