Merge pull request #51049 from ClickHouse/ddl_replication_improvements

An optimiation for ALTERs and Replicated db with one shard
This commit is contained in:
Alexey Milovidov 2023-06-17 16:57:59 +03:00 committed by GitHub
commit e913a58284
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 123 additions and 7 deletions

View File

@ -104,7 +104,7 @@ DNSResolver::IPAddresses hostByName(const std::string & host)
}
catch (const Poco::Net::DNSException & e)
{
LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
LOG_WARNING(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
addresses.clear();
}

View File

@ -36,6 +36,7 @@
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/StorageKeeperMap.h>
#include <Storages/AlterCommands.h>
namespace DB
{
@ -1441,9 +1442,49 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
return table->as<StorageKeeperMap>() != nullptr;
};
const auto is_replicated_table = [&](const ASTPtr & ast)
{
auto table_id = query_context->resolveStorageID(ast, Context::ResolveOrdinary);
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, query_context);
return table->supportsReplication();
};
const auto has_many_shards = [&]()
{
/// If there is only 1 shard then there is no need to replicate some queries.
auto current_cluster = tryGetCluster();
return
!current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are.
current_cluster->getShardsInfo().size() > 1;
};
/// Some ALTERs are not replicated on database level
if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter() && !is_keeper_map_table(query_ptr);
{
if (alter->isAttachAlter() || alter->isFetchAlter() || alter->isDropPartitionAlter() || is_keeper_map_table(query_ptr))
return false;
if (has_many_shards() || !is_replicated_table(query_ptr))
return true;
try
{
/// Metadata alter should go through database
for (const auto & child : alter->command_list->children)
if (AlterCommand::parse(child->as<ASTAlterCommand>()))
return true;
/// It's ALTER PARTITION or mutation, doesn't involve database
return false;
}
catch (...)
{
tryLogCurrentException(log);
}
return true;
}
/// DROP DATABASE is not replicated
if (const auto * drop = query_ptr->as<const ASTDropQuery>())
@ -1459,11 +1500,7 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
if (is_keeper_map_table(query_ptr))
return false;
/// If there is only 1 shard then there is no need to replicate DELETE query.
auto current_cluster = tryGetCluster();
return
!current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are.
current_cluster->getShardsInfo().size() > 1;
return has_many_shards() || !is_replicated_table(query_ptr);
}
return true;

View File

@ -0,0 +1,20 @@
not allowed to execute ALTERs of different types
not allowed to execute ALTERs of different types
not allowed to execute ALTERs of different types
not allowed to execute ALTERs of different types
TIMEOUT_EXCEEDED
TIMEOUT_EXCEEDED
TIMEOUT_EXCEEDED
TIMEOUT_EXCEEDED
not allowed to execute ALTERs of different types
not allowed to execute ALTERs of different types
not allowed to execute ALTERs of different types
not allowed to execute ALTERs of different types
TIMEOUT_EXCEEDED
1 0
1 2
TIMEOUT_EXCEEDED
2 0
TIMEOUT_EXCEEDED
3 0
3 0

View File

@ -0,0 +1,59 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
db="rdb_$CLICKHOUSE_DATABASE"
db2="${db}_2"
db3="${db}_3"
$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')"
$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.mt (n int) engine=MergeTree order by tuple()"
$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.rmt (n int) engine=ReplicatedMergeTree order by tuple()"
$CLICKHOUSE_CLIENT -q "insert into $db.rmt values (0), (1)"
$CLICKHOUSE_CLIENT -q "insert into $db.mt values (0), (1)"
$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')"
$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')"
$CLICKHOUSE_CLIENT -q "alter table $db.mt drop partition id 'all', add column m int" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.rmt drop partition id 'all', add column m int" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.mt drop partition id 'all', update n = 2 where 1" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.rmt drop partition id 'all', update n = 2 where 1" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "delete from $db.mt where n=2" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "delete from $db.rmt where n=2" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "alter table $db.mt update n=2 where n=3" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "alter table $db.rmt update n=2 where n=3" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT -q "drop database $db3"
# now there's only one shard
$CLICKHOUSE_CLIENT -q "alter table $db.mt drop partition id 'all', add column m int" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.rmt drop partition id 'all', add column m int" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.mt drop partition id 'all', update n = 2 where 1" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.rmt drop partition id 'all', update n = 2 where 1" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "alter table $db.mt update n=2 where n=1" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT -q "alter table $db.rmt update n=2 where n=1 settings mutations_sync=1"
$CLICKHOUSE_CLIENT -q "select 1, * from $db.rmt order by n"
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "delete from $db.mt where n=2" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "delete from $db.rmt where n=2"
$CLICKHOUSE_CLIENT -q "select 2, * from $db.rmt order by n"
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "delete from $db.mt where n=2" 2>&1| grep -Eo "TIMEOUT_EXCEEDED" | head -1
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=3 -q "alter table $db.rmt attach partition id 'all' from $db.mt"
$CLICKHOUSE_CLIENT -q "select 3, * from $db.rmt order by n"
$CLICKHOUSE_CLIENT -q "drop database $db2"
$CLICKHOUSE_CLIENT -q "drop database $db"