ClickHouse/src/Interpreters/executeDDLQueryOnCluster.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

619 lines
22 KiB
C++
Raw Normal View History

2020-11-03 13:47:26 +00:00
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/DDLWorker.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/AddDefaultDatabaseVisitor.h>
#include <Interpreters/Context.h>
2020-11-03 13:47:26 +00:00
#include <Parsers/ASTQueryWithOutput.h>
#include <Parsers/ASTQueryWithOnCluster.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTIdentifier.h>
2020-11-03 13:47:26 +00:00
#include <Parsers/queryToString.h>
#include <Access/Common/AccessRightsElement.h>
2020-11-03 13:47:26 +00:00
#include <Access/ContextAccess.h>
#include <Common/Macros.h>
#include <Common/ZooKeeper/ZooKeeper.h>
2022-05-11 14:09:14 +00:00
#include <Databases/DatabaseReplicated.h>
2020-11-03 13:47:26 +00:00
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
2022-05-11 14:09:14 +00:00
#include <DataTypes/DataTypeEnum.h>
2021-07-23 14:25:35 +00:00
#include <Processors/Sinks/EmptySink.h>
2021-10-16 14:03:50 +00:00
#include <QueryPipeline/Pipe.h>
#include <filesystem>
2022-01-30 19:49:48 +00:00
#include <base/sort.h>
2020-11-03 13:47:26 +00:00
2021-07-17 21:45:07 +00:00
namespace fs = std::filesystem;
2020-11-03 13:47:26 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int TIMEOUT_EXCEEDED;
extern const int UNFINISHED;
extern const int QUERY_IS_PROHIBITED;
2020-11-29 11:45:32 +00:00
extern const int LOGICAL_ERROR;
2020-11-03 13:47:26 +00:00
}
2023-03-30 18:42:06 +00:00
struct RetriesForDDL
{
ZooKeeperRetriesInfo info;
ZooKeeperRetriesControl ctl;
};
static RetriesForDDL getRetriesForDistributedDDL()
{
const auto & config_ref = Context::getGlobalContextInstance()->getConfigRef();
auto info = ZooKeeperRetriesInfo(
"DistributedDDL",
&Poco::Logger::get("DDLQueryStatusSource"),
config_ref.getInt("distributed_ddl_keeper_max_retries", 5),
config_ref.getInt("distributed_ddl_keeper_initial_backoff_ms", 100),
config_ref.getInt("distributed_ddl_keeper_max_backoff_ms", 5000)
);
auto ctl = ZooKeeperRetriesControl("executeDDLQueryOnCluster", info);
return {info, ctl};
}
bool isSupportedAlterType(int type)
2020-11-03 13:47:26 +00:00
{
2021-01-26 17:51:25 +00:00
assert(type != ASTAlterCommand::NO_TYPE);
2020-11-03 13:47:26 +00:00
static const std::unordered_set<int> unsupported_alter_types{
2021-02-03 20:02:37 +00:00
/// It's dangerous, because it may duplicate data if executed on multiple replicas. We can allow it after #18978
2020-11-03 13:47:26 +00:00
ASTAlterCommand::ATTACH_PARTITION,
2021-01-26 17:51:25 +00:00
/// Usually followed by ATTACH PARTITION
2020-11-03 13:47:26 +00:00
ASTAlterCommand::FETCH_PARTITION,
2021-01-26 17:51:25 +00:00
/// Logical error
2020-11-03 13:47:26 +00:00
ASTAlterCommand::NO_TYPE,
};
return !unsupported_alter_types.contains(type);
2020-11-03 13:47:26 +00:00
}
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params)
2020-11-03 13:47:26 +00:00
{
2023-03-12 13:31:24 +00:00
OpenTelemetry::SpanHolder span(__FUNCTION__, OpenTelemetry::PRODUCER);
if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported");
2020-11-03 13:47:26 +00:00
/// Remove FORMAT <fmt> and INTO OUTFILE <file> if exists
ASTPtr query_ptr = query_ptr_->clone();
ASTQueryWithOutput::resetOutputASTIfExist(*query_ptr);
// XXX: serious design flaw since `ASTQueryWithOnCluster` is not inherited from `IAST`!
auto * query = dynamic_cast<ASTQueryWithOnCluster *>(query_ptr.get());
if (!query)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed execution is not supported for such DDL queries");
2020-11-03 13:47:26 +00:00
}
if (!context->getSettingsRef().allow_distributed_ddl)
throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Distributed DDL queries are prohibited for the user");
2020-11-03 13:47:26 +00:00
if (const auto * query_alter = query_ptr->as<ASTAlterQuery>())
{
for (const auto & command : query_alter->command_list->children)
2020-11-03 13:47:26 +00:00
{
if (!isSupportedAlterType(command->as<ASTAlterCommand&>().type))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query");
2020-11-03 13:47:26 +00:00
}
}
ClusterPtr cluster = params.cluster;
if (!cluster)
{
query->cluster = context->getMacros()->expand(query->cluster);
cluster = context->getCluster(query->cluster);
2020-11-03 13:47:26 +00:00
}
span.addAttribute("clickhouse.cluster", query->cluster);
if (!cluster->areDistributedDDLQueriesAllowed())
throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Distributed DDL queries are prohibited for the cluster");
/// TODO: support per-cluster grant
context->checkAccess(AccessType::CLUSTER);
DDLWorker & ddl_worker = context->getDDLWorker();
2020-11-03 13:47:26 +00:00
/// Enumerate hosts which will be used to send query.
auto addresses = cluster->filterAddressesByShardOrReplica(params.only_shard_num, params.only_replica_num);
if (addresses.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts defined to execute distributed DDL query");
2020-11-03 13:47:26 +00:00
std::vector<HostID> hosts;
2022-06-19 23:54:56 +00:00
hosts.reserve(addresses.size());
for (const auto * address : addresses)
hosts.emplace_back(*address);
2020-11-03 13:47:26 +00:00
/// The current database in a distributed query need to be replaced with either
/// the local current database or a shard's default database.
AccessRightsElements access_to_check = params.access_to_check;
SYSTEM RESTORE REPLICA replica [ON CLUSTER cluster] (#13652) * initial commit: add setting and stub * typo * added test stub * fix * wip merging new integration test and code proto * adding steps interpreters * adding firstly proposed solution (moving parts etc) * added checking zookeeper path existence * fixing the include * fixing and sorting includes * fixing outdated struct * fix the name * added ast ptr as level of indirection * fix ref * updating the changes * working on test stub * fix iterator -> reference * revert rocksdb submodule update * fixed show privileges test * updated the test stub * replaced rand() with thread_local_rng(), updated the tests updated the test fixed test config path test fix removed error messages fixed the test updated the test fixed string literal fixed literal typo: = * fixed the empty replica error message * updated the test and the code with logs * updated the possible test cases, updated * added the code/test milestone comments * updated the test (added more testcases) * replaced native assert with CH one * individual replicas recursive delete fix * updated the AS db.name AST * two small logging fixes * manually generated AST fixes * Updated the test, added the possible algo change * Some thoughts about optimizing the solution: ALTER MOVE PARTITION .. TO TABLE -> move to detached/ + ALTER ... ATTACH * fix * Removed the replica sync in test as it's invalid * Some test tweaks * tmp * Rewrote the algo by using the executeQuery instead of hand-crafting the ASTPtr. Two questions still active. * tr: logging active parts * Extracted the parts moving algo into a separate helper function * Fixed the test data and the queries slightly * Replaced query to system.parts to direct invocation, started building the test that breaks on various parts. * Added the case for tables when at least one replica is alive * Updated the test to test replicas restoration by detaching/attaching * Altered the test to check restoration without replica restart * Added the tables swap in the start if the server failed last time * Hotfix when only /replicas/replica... path was deleted * Restore ZK paths while creating a replicated MergeTree table * Updated the docs, fixed the algo for individual replicas restoration case * Initial parts table storage fix, tests sync fix * Reverted individual replica restoration to general algo * Slightly optimised getDataParts * Trying another solution with parts detaching * Rewrote algo without any steps, added ON CLUSTER support * Attaching parts from other replica on restoration * Getting part checksums from ZK * Removed ON CLUSTER, finished working solution * Multiple small changes after review * Fixing parallel test * Supporting rewritten form on cluster * Test fix * Moar logging * Using source replica as checksum provider * improve test, remove some code from parser * Trying solution with move to detached + forget * Moving all parts (not only Committed) to detached * Edited docs for RESTORE REPLICA * Re-merging * minor fixes Co-authored-by: Alexander Tokmakov <avtokmakov@yandex-team.ru>
2021-06-20 08:24:43 +00:00
bool need_replace_current_database = std::any_of(
access_to_check.begin(),
access_to_check.end(),
SYSTEM RESTORE REPLICA replica [ON CLUSTER cluster] (#13652) * initial commit: add setting and stub * typo * added test stub * fix * wip merging new integration test and code proto * adding steps interpreters * adding firstly proposed solution (moving parts etc) * added checking zookeeper path existence * fixing the include * fixing and sorting includes * fixing outdated struct * fix the name * added ast ptr as level of indirection * fix ref * updating the changes * working on test stub * fix iterator -> reference * revert rocksdb submodule update * fixed show privileges test * updated the test stub * replaced rand() with thread_local_rng(), updated the tests updated the test fixed test config path test fix removed error messages fixed the test updated the test fixed string literal fixed literal typo: = * fixed the empty replica error message * updated the test and the code with logs * updated the possible test cases, updated * added the code/test milestone comments * updated the test (added more testcases) * replaced native assert with CH one * individual replicas recursive delete fix * updated the AS db.name AST * two small logging fixes * manually generated AST fixes * Updated the test, added the possible algo change * Some thoughts about optimizing the solution: ALTER MOVE PARTITION .. TO TABLE -> move to detached/ + ALTER ... ATTACH * fix * Removed the replica sync in test as it's invalid * Some test tweaks * tmp * Rewrote the algo by using the executeQuery instead of hand-crafting the ASTPtr. Two questions still active. * tr: logging active parts * Extracted the parts moving algo into a separate helper function * Fixed the test data and the queries slightly * Replaced query to system.parts to direct invocation, started building the test that breaks on various parts. * Added the case for tables when at least one replica is alive * Updated the test to test replicas restoration by detaching/attaching * Altered the test to check restoration without replica restart * Added the tables swap in the start if the server failed last time * Hotfix when only /replicas/replica... path was deleted * Restore ZK paths while creating a replicated MergeTree table * Updated the docs, fixed the algo for individual replicas restoration case * Initial parts table storage fix, tests sync fix * Reverted individual replica restoration to general algo * Slightly optimised getDataParts * Trying another solution with parts detaching * Rewrote algo without any steps, added ON CLUSTER support * Attaching parts from other replica on restoration * Getting part checksums from ZK * Removed ON CLUSTER, finished working solution * Multiple small changes after review * Fixing parallel test * Supporting rewritten form on cluster * Test fix * Moar logging * Using source replica as checksum provider * improve test, remove some code from parser * Trying solution with move to detached + forget * Moving all parts (not only Committed) to detached * Edited docs for RESTORE REPLICA * Re-merging * minor fixes Co-authored-by: Alexander Tokmakov <avtokmakov@yandex-team.ru>
2021-06-20 08:24:43 +00:00
[](const AccessRightsElement & elem) { return elem.isEmptyDatabase(); });
2020-11-03 13:47:26 +00:00
bool use_local_default_database = false;
const String & current_database = context->getCurrentDatabase();
2020-11-03 13:47:26 +00:00
if (need_replace_current_database)
{
Strings host_default_databases;
for (const auto * address : addresses)
2020-11-03 13:47:26 +00:00
{
if (!address->default_database.empty())
host_default_databases.push_back(address->default_database);
else
use_local_default_database = true;
2020-11-03 13:47:26 +00:00
}
::sort(host_default_databases.begin(), host_default_databases.end());
host_default_databases.erase(std::unique(host_default_databases.begin(), host_default_databases.end()), host_default_databases.end());
assert(use_local_default_database || !host_default_databases.empty());
2020-11-03 13:47:26 +00:00
if (use_local_default_database && !host_default_databases.empty())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mixed local default DB and shard default DB in DDL query");
2020-11-03 13:47:26 +00:00
if (use_local_default_database)
{
access_to_check.replaceEmptyDatabase(current_database);
2020-11-03 13:47:26 +00:00
}
else
{
for (size_t i = 0; i != access_to_check.size();)
2020-11-03 13:47:26 +00:00
{
auto & element = access_to_check[i];
2020-11-03 13:47:26 +00:00
if (element.isEmptyDatabase())
{
access_to_check.insert(access_to_check.begin() + i + 1, host_default_databases.size() - 1, element);
for (size_t j = 0; j != host_default_databases.size(); ++j)
access_to_check[i + j].replaceEmptyDatabase(host_default_databases[j]);
i += host_default_databases.size();
2020-11-03 13:47:26 +00:00
}
else
++i;
}
}
}
AddDefaultDatabaseVisitor visitor(context, current_database, !use_local_default_database);
2020-11-03 13:47:26 +00:00
visitor.visitDDL(query_ptr);
/// Check access rights, assume that all servers have the same users config
context->checkAccess(access_to_check);
2020-11-03 13:47:26 +00:00
DDLLogEntry entry;
entry.hosts = std::move(hosts);
entry.query = queryToString(query_ptr);
entry.initiator = ddl_worker.getCommonHostID();
entry.setSettingsIfRequired(context);
entry.tracing_context = OpenTelemetry::CurrentContext();
2020-11-03 13:47:26 +00:00
String node_path = ddl_worker.enqueueQuery(entry);
2023-03-30 18:42:06 +00:00
return getDistributedDDLStatus(node_path, entry, context, /* hosts_to_wait */ std::nullopt);
}
2021-07-22 00:38:28 +00:00
2022-05-20 19:49:31 +00:00
class DDLQueryStatusSource final : public ISource
2021-07-22 00:38:28 +00:00
{
public:
DDLQueryStatusSource(
2023-03-30 18:42:06 +00:00
const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait);
2021-07-22 00:38:28 +00:00
String getName() const override { return "DDLQueryStatus"; }
Chunk generate() override;
Status prepare() override;
private:
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
2022-05-11 14:09:14 +00:00
static Block getSampleBlock(ContextPtr context_, bool hosts_to_wait);
2021-07-22 00:38:28 +00:00
Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
std::pair<String, UInt16> parseHostAndPort(const String & host_id) const;
2022-05-11 14:09:14 +00:00
Chunk generateChunkWithUnfinishedHosts() const;
enum ReplicatedDatabaseQueryStatus
{
/// Query is (successfully) finished
OK = 0,
/// Query is not finished yet, but replica is currently executing it
IN_PROGRESS = 1,
/// Replica is not available or busy with previous queries. It will process query asynchronously
QUEUED = 2,
};
2021-07-22 00:38:28 +00:00
String node_path;
ContextPtr context;
Stopwatch watch;
Poco::Logger * log;
NameSet waiting_hosts; /// hosts from task host list
NameSet finished_hosts; /// finished hosts from host list
NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
Strings current_active_hosts; /// Hosts that were in active state at the last check
size_t num_hosts_finished = 0;
/// Save the first detected error and throw it at the end of execution
std::unique_ptr<Exception> first_exception;
Int64 timeout_seconds = 120;
2022-05-11 14:09:14 +00:00
bool is_replicated_database = false;
2021-07-22 00:38:28 +00:00
bool throw_on_timeout = true;
bool timeout_exceeded = false;
};
2023-03-30 18:42:06 +00:00
BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & entry, ContextPtr context,
const std::optional<Strings> & hosts_to_wait)
{
2020-11-03 13:47:26 +00:00
BlockIO io;
if (context->getSettingsRef().distributed_ddl_task_timeout == 0)
2020-11-03 13:47:26 +00:00
return io;
2021-09-16 17:40:42 +00:00
auto source = std::make_shared<DDLQueryStatusSource>(node_path, entry, context, hosts_to_wait);
io.pipeline = QueryPipeline(std::move(source));
2021-07-17 21:45:07 +00:00
if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE)
2021-09-21 06:57:55 +00:00
io.pipeline.complete(std::make_shared<EmptySink>(io.pipeline.getHeader()));
2021-06-15 20:52:29 +00:00
2020-11-03 13:47:26 +00:00
return io;
}
2022-05-11 14:09:14 +00:00
Block DDLQueryStatusSource::getSampleBlock(ContextPtr context_, bool hosts_to_wait)
2021-07-19 16:46:58 +00:00
{
auto output_mode = context_->getSettingsRef().distributed_ddl_output_mode;
auto maybe_make_nullable = [&](const DataTypePtr & type) -> DataTypePtr
{
if (output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE)
return type;
return std::make_shared<DataTypeNullable>(type);
};
2022-05-11 14:09:14 +00:00
auto get_status_enum = []()
{
return std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
{
{"OK", static_cast<Int8>(OK)},
{"IN_PROGRESS", static_cast<Int8>(IN_PROGRESS)},
{"QUEUED", static_cast<Int8>(QUEUED)},
});
2021-07-19 16:46:58 +00:00
};
if (hosts_to_wait)
2022-05-11 14:09:14 +00:00
{
return Block{
{std::make_shared<DataTypeString>(), "shard"},
{std::make_shared<DataTypeString>(), "replica"},
{get_status_enum(), "status"},
{std::make_shared<DataTypeUInt64>(), "num_hosts_remaining"},
{std::make_shared<DataTypeUInt64>(), "num_hosts_active"},
};
}
else
{
return Block{
{std::make_shared<DataTypeString>(), "host"},
{std::make_shared<DataTypeUInt16>(), "port"},
{maybe_make_nullable(std::make_shared<DataTypeInt64>()), "status"},
{maybe_make_nullable(std::make_shared<DataTypeString>()), "error"},
{std::make_shared<DataTypeUInt64>(), "num_hosts_remaining"},
{std::make_shared<DataTypeUInt64>(), "num_hosts_active"},
};
}
2021-07-19 16:46:58 +00:00
}
2021-07-17 21:45:07 +00:00
DDLQueryStatusSource::DDLQueryStatusSource(
const String & zk_node_path, const DDLLogEntry & entry, ContextPtr context_, const std::optional<Strings> & hosts_to_wait)
2022-05-20 19:49:31 +00:00
: ISource(getSampleBlock(context_, hosts_to_wait.has_value()))
2021-07-17 21:45:07 +00:00
, node_path(zk_node_path)
2020-11-03 13:47:26 +00:00
, context(context_)
, watch(CLOCK_MONOTONIC_COARSE)
2022-05-09 19:13:02 +00:00
, log(&Poco::Logger::get("DDLQueryStatusSource"))
2020-11-03 13:47:26 +00:00
{
2021-07-17 21:45:07 +00:00
auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
if (hosts_to_wait)
{
waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
2022-05-11 14:09:14 +00:00
is_replicated_database = true;
2021-07-17 21:45:07 +00:00
}
else
2021-07-17 21:45:07 +00:00
{
for (const HostID & host : entry.hosts)
waiting_hosts.emplace(host.toString());
}
addTotalRowsApprox(waiting_hosts.size());
timeout_seconds = context->getSettingsRef().distributed_ddl_task_timeout;
}
std::pair<String, UInt16> DDLQueryStatusSource::parseHostAndPort(const String & host_id) const
{
String host = host_id;
UInt16 port = 0;
2022-05-11 14:09:14 +00:00
if (!is_replicated_database)
{
auto host_and_port = Cluster::Address::fromString(host_id);
host = host_and_port.first;
port = host_and_port.second;
}
return {host, port};
2020-11-03 13:47:26 +00:00
}
2022-05-11 14:09:14 +00:00
Chunk DDLQueryStatusSource::generateChunkWithUnfinishedHosts() const
{
NameSet unfinished_hosts = waiting_hosts;
for (const auto & host_id : finished_hosts)
unfinished_hosts.erase(host_id);
NameSet active_hosts_set = NameSet{current_active_hosts.begin(), current_active_hosts.end()};
/// Query is not finished on the rest hosts, so fill the corresponding rows with NULLs.
MutableColumns columns = output.getHeader().cloneEmptyColumns();
for (const String & host_id : unfinished_hosts)
{
size_t num = 0;
if (is_replicated_database)
{
auto [shard, replica] = DatabaseReplicated::parseFullReplicaName(host_id);
columns[num++]->insert(shard);
columns[num++]->insert(replica);
if (active_hosts_set.contains(host_id))
columns[num++]->insert(IN_PROGRESS);
else
columns[num++]->insert(QUEUED);
}
else
{
auto [host, port] = parseHostAndPort(host_id);
columns[num++]->insert(host);
columns[num++]->insert(port);
columns[num++]->insert(Field{});
columns[num++]->insert(Field{});
}
columns[num++]->insert(unfinished_hosts.size());
columns[num++]->insert(current_active_hosts.size());
}
return Chunk(std::move(columns), unfinished_hosts.size());
}
2021-07-17 21:45:07 +00:00
Chunk DDLQueryStatusSource::generate()
2020-11-03 13:47:26 +00:00
{
bool all_hosts_finished = num_hosts_finished >= waiting_hosts.size();
2021-07-17 21:45:07 +00:00
/// Seems like num_hosts_finished cannot be strictly greater than waiting_hosts.size()
assert(num_hosts_finished <= waiting_hosts.size());
2020-11-03 13:47:26 +00:00
2021-07-17 21:45:07 +00:00
if (all_hosts_finished || timeout_exceeded)
return {};
2020-11-03 13:47:26 +00:00
2022-07-27 17:15:00 +00:00
String node_to_wait = "finished";
if (is_replicated_database && context->getSettingsRef().database_replicated_enforce_synchronous_settings)
node_to_wait = "synced";
2020-11-03 13:47:26 +00:00
size_t try_number = 0;
2021-07-17 21:45:07 +00:00
while (true)
2020-11-03 13:47:26 +00:00
{
if (isCancelled())
2021-07-17 21:45:07 +00:00
return {};
2020-11-03 13:47:26 +00:00
if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
2020-11-03 13:47:26 +00:00
{
2021-07-20 22:27:27 +00:00
timeout_exceeded = true;
2020-11-03 13:47:26 +00:00
size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished;
size_t num_active_hosts = current_active_hosts.size();
2023-01-26 09:52:47 +00:00
constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
"There are {} unfinished hosts ({} of them are currently active), "
"they are going to execute the query in background";
if (throw_on_timeout)
2021-07-20 22:27:27 +00:00
{
if (!first_exception)
2023-01-26 09:52:47 +00:00
first_exception = std::make_unique<Exception>(Exception(ErrorCodes::TIMEOUT_EXCEEDED,
msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts));
2022-05-11 14:09:14 +00:00
/// For Replicated database print a list of unfinished hosts as well. Will return empty block on next iteration.
if (is_replicated_database)
return generateChunkWithUnfinishedHosts();
2021-07-20 22:27:27 +00:00
return {};
}
LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
2020-11-03 13:47:26 +00:00
2022-05-11 14:09:14 +00:00
return generateChunkWithUnfinishedHosts();
2020-11-03 13:47:26 +00:00
}
if (num_hosts_finished != 0 || try_number != 0)
{
sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
}
2023-03-30 18:42:06 +00:00
bool node_exists = false;
Strings tmp_hosts;
Strings tmp_active_hosts;
getRetriesForDistributedDDL().ctl.retryLoop([&](){
auto zookeeper = context->getZooKeeper();
node_exists = zookeeper->exists(node_path);
tmp_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / node_to_wait);
tmp_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
});
if (!node_exists)
2020-11-03 13:47:26 +00:00
{
2021-07-20 22:27:27 +00:00
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
if (!first_exception)
2023-01-26 09:52:47 +00:00
first_exception = std::make_unique<Exception>(Exception(ErrorCodes::UNFINISHED,
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
" since it was finished (or its lifetime is expired)",
2023-01-23 13:16:14 +00:00
node_path));
2021-07-20 22:27:27 +00:00
return {};
2020-11-03 13:47:26 +00:00
}
2023-03-30 18:42:06 +00:00
Strings new_hosts = getNewAndUpdate(tmp_hosts);
2020-11-03 13:47:26 +00:00
++try_number;
if (new_hosts.empty())
continue;
2023-03-30 18:42:06 +00:00
current_active_hosts = std::move(tmp_active_hosts);
2020-11-03 13:47:26 +00:00
2021-07-17 21:45:07 +00:00
MutableColumns columns = output.getHeader().cloneEmptyColumns();
2020-11-03 13:47:26 +00:00
for (const String & host_id : new_hosts)
{
ExecutionStatus status(-1, "Cannot obtain error message");
2022-07-27 17:15:00 +00:00
if (node_to_wait == "finished")
2020-11-03 13:47:26 +00:00
{
String status_data;
2023-03-30 18:42:06 +00:00
bool finished_exists = false;
getRetriesForDistributedDDL().ctl.retryLoop([&](){
finished_exists = context->getZooKeeper()->tryGet(fs::path(node_path) / "finished" / host_id, status_data);
});
if (finished_exists)
2020-11-03 13:47:26 +00:00
status.tryDeserializeText(status_data);
}
2022-07-27 17:15:00 +00:00
else
{
status = ExecutionStatus{0};
}
2020-11-03 13:47:26 +00:00
2021-07-20 22:27:27 +00:00
if (status.code != 0 && !first_exception
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
{
2022-05-11 14:09:14 +00:00
/// Replicated database retries in case of error, it should not write error status.
if (is_replicated_database)
throw Exception(ErrorCodes::LOGICAL_ERROR, "There was an error on {}: {} (probably it's a bug)", host_id, status.message);
auto [host, port] = parseHostAndPort(host_id);
2023-01-26 09:52:47 +00:00
first_exception = std::make_unique<Exception>(Exception(status.code,
"There was an error on [{}:{}]: {}", host, port, status.message));
2021-07-20 22:27:27 +00:00
}
2020-11-03 13:47:26 +00:00
++num_hosts_finished;
2021-03-09 21:41:04 +00:00
size_t num = 0;
2022-05-11 14:09:14 +00:00
if (is_replicated_database)
{
if (status.code != 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "There was an error on {}: {} (probably it's a bug)", host_id, status.message);
auto [shard, replica] = DatabaseReplicated::parseFullReplicaName(host_id);
columns[num++]->insert(shard);
columns[num++]->insert(replica);
columns[num++]->insert(OK);
}
else
{
auto [host, port] = parseHostAndPort(host_id);
columns[num++]->insert(host);
2021-03-09 21:41:04 +00:00
columns[num++]->insert(port);
2022-05-11 14:09:14 +00:00
columns[num++]->insert(status.code);
columns[num++]->insert(status.message);
}
2021-03-09 21:41:04 +00:00
columns[num++]->insert(waiting_hosts.size() - num_hosts_finished);
columns[num++]->insert(current_active_hosts.size());
2020-11-03 13:47:26 +00:00
}
2021-07-17 21:45:07 +00:00
return Chunk(std::move(columns), new_hosts.size());
2020-11-03 13:47:26 +00:00
}
2021-07-17 21:45:07 +00:00
}
2020-11-03 13:47:26 +00:00
2021-07-19 17:23:16 +00:00
IProcessor::Status DDLQueryStatusSource::prepare()
2021-07-17 21:45:07 +00:00
{
2021-07-19 21:58:48 +00:00
/// This method is overloaded to throw exception after all data is read.
/// Exception is pushed into pipe (instead of simply being thrown) to ensure the order of data processing and exception.
2021-07-17 21:45:07 +00:00
if (finished)
{
2021-07-20 22:27:27 +00:00
if (first_exception)
2021-07-19 18:46:30 +00:00
{
if (!output.canPush())
return Status::PortFull;
2021-07-19 16:46:58 +00:00
output.pushException(std::make_exception_ptr(*first_exception));
2021-07-19 18:46:30 +00:00
}
2021-07-19 17:23:16 +00:00
output.finish();
return Status::Finished;
2021-07-17 21:45:07 +00:00
}
2021-07-19 17:23:16 +00:00
else
2022-05-20 19:49:31 +00:00
return ISource::prepare();
2020-11-03 13:47:26 +00:00
}
2021-07-17 21:45:07 +00:00
Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
2020-11-03 13:47:26 +00:00
{
Strings res;
Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
throw Coordination::Exception(code, node_path);
return res;
}
2021-07-17 21:45:07 +00:00
Strings DDLQueryStatusSource::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
2020-11-03 13:47:26 +00:00
{
Strings diff;
for (const String & host : current_list_of_finished_hosts)
{
if (!waiting_hosts.contains(host))
2020-11-03 13:47:26 +00:00
{
if (!ignoring_hosts.contains(host))
2020-11-03 13:47:26 +00:00
{
ignoring_hosts.emplace(host);
2021-07-17 21:45:07 +00:00
LOG_INFO(log, "Unexpected host {} appeared in task {}", host, node_path);
2020-11-03 13:47:26 +00:00
}
continue;
}
if (!finished_hosts.contains(host))
2020-11-03 13:47:26 +00:00
{
diff.emplace_back(host);
finished_hosts.emplace(host);
}
}
return diff;
}
2022-07-06 17:54:05 +00:00
bool maybeRemoveOnCluster(const ASTPtr & query_ptr, ContextPtr context)
{
const auto * query = dynamic_cast<const ASTQueryWithTableAndOutput *>(query_ptr.get());
if (!query || !query->table)
return false;
String database_name = query->getDatabase();
if (database_name.empty())
database_name = context->getCurrentDatabase();
auto * query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query_ptr.get());
if (database_name != query_on_cluster->cluster)
return false;
auto database = DatabaseCatalog::instance().tryGetDatabase(database_name);
if (database && database->shouldReplicateQuery(context, query_ptr))
{
/// It's Replicated database and query is replicated on database level,
/// so ON CLUSTER clause is redundant.
query_on_cluster->cluster.clear();
return true;
}
2022-07-06 17:54:05 +00:00
return false;
2022-07-06 17:54:05 +00:00
}
2020-11-03 13:47:26 +00:00
}