ClickHouse/src/Interpreters/DDLTask.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

256 lines
7.9 KiB
C++
Raw Normal View History

2020-11-03 13:47:26 +00:00
#pragma once
2020-11-03 13:47:26 +00:00
#include <Core/Types.h>
#include <Interpreters/Cluster.h>
#include <Common/OpenTelemetryTraceContext.h>
2020-11-20 16:06:27 +00:00
#include <Common/ZooKeeper/Types.h>
#include <filesystem>
2020-11-03 13:47:26 +00:00
2020-11-27 14:04:03 +00:00
namespace Poco
{
class Logger;
}
2020-11-03 13:47:26 +00:00
namespace zkutil
{
class ZooKeeper;
}
namespace fs = std::filesystem;
2020-11-03 13:47:26 +00:00
namespace DB
{
2021-07-02 16:39:55 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
2020-11-03 13:47:26 +00:00
class ASTQueryWithOnCluster;
2020-11-20 16:06:27 +00:00
using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
using ClusterPtr = std::shared_ptr<Cluster>;
2020-11-27 14:04:03 +00:00
class DatabaseReplicated;
2020-11-03 13:47:26 +00:00
2021-02-19 23:41:58 +00:00
class ZooKeeperMetadataTransaction;
using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;
2020-12-04 20:12:32 +00:00
2020-11-03 13:47:26 +00:00
struct HostID
{
String host_name;
UInt16 port;
HostID() = default;
explicit HostID(const Cluster::Address & address)
: host_name(address.host_name), port(address.port) {}
2024-01-04 12:36:42 +00:00
HostID(const String & host_name_, UInt16 port_)
: host_name(host_name_), port(port_) {}
2020-11-03 13:47:26 +00:00
static HostID fromString(const String & host_port_str);
String toString() const
{
return Cluster::Address::toString(host_name, port);
}
String readableString() const
{
return host_name + ":" + DB::toString(port);
}
bool isLocalAddress(UInt16 clickhouse_port) const;
static String applyToString(const HostID & host_id)
{
return host_id.toString();
}
};
struct DDLLogEntry
{
static constexpr const UInt64 OLDEST_VERSION = 1;
static constexpr const UInt64 SETTINGS_IN_ZK_VERSION = 2;
static constexpr const UInt64 NORMALIZE_CREATE_ON_INITIATOR_VERSION = 3;
2022-09-23 03:32:21 +00:00
static constexpr const UInt64 OPENTELEMETRY_ENABLED_VERSION = 4;
static constexpr const UInt64 PRESERVE_INITIAL_QUERY_ID_VERSION = 5;
2023-11-07 14:58:25 +00:00
static constexpr const UInt64 BACKUP_RESTORE_FLAG_IN_ZK_VERSION = 6;
2022-09-23 03:32:21 +00:00
/// Add new version here
/// Remember to update the value below once new version is added
2023-11-07 14:58:25 +00:00
static constexpr const UInt64 DDL_ENTRY_FORMAT_MAX_VERSION = 6;
UInt64 version = 1;
2020-11-03 13:47:26 +00:00
String query;
std::vector<HostID> hosts;
String initiator; // optional
std::optional<SettingsChanges> settings;
OpenTelemetry::TracingContext tracing_context;
String initial_query_id;
2023-11-07 14:58:25 +00:00
bool is_backup_restore = false;
2020-11-03 13:47:26 +00:00
void setSettingsIfRequired(ContextPtr context);
2020-11-03 13:47:26 +00:00
String toString() const;
void parse(const String & data);
void assertVersion() const;
2020-11-03 13:47:26 +00:00
};
2020-11-27 14:04:03 +00:00
struct DDLTaskBase
{
const String entry_name;
const String entry_path;
DDLLogEntry entry;
String host_id_str;
ASTPtr query;
String query_str;
String query_for_logging;
bool is_initial_query = false;
2020-11-27 14:04:03 +00:00
bool is_circular_replicated = false;
bool execute_on_leader = false;
2020-11-03 13:47:26 +00:00
2020-12-04 20:12:32 +00:00
Coordination::Requests ops;
2020-11-27 14:04:03 +00:00
ExecutionStatus execution_status;
bool was_executed = false;
2021-01-26 17:51:25 +00:00
std::atomic_bool completely_processed = false;
2020-11-29 11:45:32 +00:00
DDLTaskBase(const String & name, const String & path) : entry_name(name), entry_path(path) {}
DDLTaskBase(const DDLTaskBase &) = delete;
2020-11-29 11:45:32 +00:00
virtual ~DDLTaskBase() = default;
virtual void parseQueryFromEntry(ContextPtr context);
void formatRewrittenQuery(ContextPtr context);
2020-11-29 11:45:32 +00:00
2020-11-27 14:04:03 +00:00
virtual String getShardID() const = 0;
2021-05-31 14:49:02 +00:00
virtual ContextMutablePtr makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper);
2022-05-06 16:37:20 +00:00
virtual Coordination::RequestPtr getOpToUpdateLogPointer() { return nullptr; }
2020-11-27 14:04:03 +00:00
2022-07-27 17:15:00 +00:00
virtual void createSyncedNodeIfNeed(const ZooKeeperPtr & /*zookeeper*/) {}
inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; }
inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; }
inline String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); }
2022-07-27 17:15:00 +00:00
inline String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; }
2020-11-27 14:04:03 +00:00
2021-02-09 15:14:20 +00:00
static String getLogEntryName(UInt32 log_entry_number);
static UInt32 getLogEntryNumber(const String & log_entry_name);
2020-11-27 14:04:03 +00:00
};
struct DDLTask : public DDLTaskBase
2020-11-03 13:47:26 +00:00
{
2020-11-27 14:04:03 +00:00
DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional<std::string> & config_host_name);
2020-11-27 14:04:03 +00:00
void setClusterInfo(ContextPtr context, Poco::Logger * log);
2020-11-03 13:47:26 +00:00
2020-11-27 14:04:03 +00:00
String getShardID() const override;
private:
bool tryFindHostInCluster();
bool tryFindHostInClusterViaResolving(ContextPtr context);
2020-11-27 14:04:03 +00:00
HostID host_id;
2020-11-03 13:47:26 +00:00
String cluster_name;
ClusterPtr cluster;
Cluster::Address address_in_cluster;
2021-05-08 15:20:40 +00:00
size_t host_shard_num = 0;
size_t host_replica_num = 0;
2020-11-27 14:04:03 +00:00
};
2020-11-03 13:47:26 +00:00
2020-11-27 14:04:03 +00:00
struct DatabaseReplicatedTask : public DDLTaskBase
{
DatabaseReplicatedTask(const String & name, const String & path, DatabaseReplicated * database_);
2020-11-03 13:47:26 +00:00
2020-11-27 14:04:03 +00:00
String getShardID() const override;
void parseQueryFromEntry(ContextPtr context) override;
2021-05-31 14:49:02 +00:00
ContextMutablePtr makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) override;
2022-05-06 16:37:20 +00:00
Coordination::RequestPtr getOpToUpdateLogPointer() override;
2022-07-27 17:15:00 +00:00
void createSyncedNodeIfNeed(const ZooKeeperPtr & zookeeper) override;
2020-11-20 16:06:27 +00:00
2020-11-27 14:04:03 +00:00
DatabaseReplicated * database;
2020-11-03 13:47:26 +00:00
};
2021-02-19 23:41:58 +00:00
/// The main purpose of ZooKeeperMetadataTransaction is to execute all zookeeper operation related to query
/// in a single transaction when we performed all required checks and ready to "commit" changes.
/// For example, create ALTER_METADATA entry in ReplicatedMergeTree log,
/// create path/to/entry/finished/host_id node in distributed DDL queue to mark query as executed and
/// update metadata in path/to/replicated_database/metadata/table_name
/// It's used for DatabaseReplicated.
/// TODO we can also use it for ordinary ON CLUSTER queries
class ZooKeeperMetadataTransaction
2020-11-20 16:06:27 +00:00
{
2020-12-04 20:12:32 +00:00
enum State
{
CREATED,
2021-02-08 19:36:17 +00:00
COMMITTED,
2020-12-04 20:12:32 +00:00
FAILED
};
State state = CREATED;
2020-11-20 16:06:27 +00:00
ZooKeeperPtr current_zookeeper;
String zookeeper_path;
2020-11-29 11:45:32 +00:00
bool is_initial_query;
2021-07-06 10:26:03 +00:00
String task_path;
2020-11-20 16:06:27 +00:00
Coordination::Requests ops;
2022-08-03 12:13:01 +00:00
/// CREATE OR REPLACE is special query that consists of 3 separate DDL queries (CREATE, RENAME, DROP)
/// and not all changes should be applied to metadata in ZooKeeper
/// (otherwise we may get partially applied changes on connection loss).
/// So we need this flag to avoid doing unnecessary operations with metadata.
bool is_create_or_replace_query = false;
2021-02-19 23:41:58 +00:00
public:
2021-07-06 10:26:03 +00:00
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_, const String & task_path_)
2021-02-19 23:41:58 +00:00
: current_zookeeper(current_zookeeper_)
, zookeeper_path(zookeeper_path_)
, is_initial_query(is_initial_query_)
2021-07-06 10:26:03 +00:00
, task_path(task_path_)
2021-02-19 23:41:58 +00:00
{
}
bool isInitialQuery() const { return is_initial_query; }
bool isExecuted() const { return state != CREATED; }
String getDatabaseZooKeeperPath() const { return zookeeper_path; }
2021-07-06 10:26:03 +00:00
String getTaskZooKeeperPath() const { return task_path; }
2021-07-02 16:39:55 +00:00
ZooKeeperPtr getZooKeeper() const { return current_zookeeper; }
2022-08-03 12:13:01 +00:00
void setIsCreateOrReplaceQuery() { is_create_or_replace_query = true; }
bool isCreateOrReplaceQuery() const { return is_create_or_replace_query; }
2021-02-19 23:41:58 +00:00
void addOp(Coordination::RequestPtr && op)
{
2021-07-02 16:39:55 +00:00
if (isExecuted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
2021-02-19 23:41:58 +00:00
ops.emplace_back(op);
}
2021-02-08 19:36:17 +00:00
void moveOpsTo(Coordination::Requests & other_ops)
2020-11-20 16:06:27 +00:00
{
2021-07-02 16:39:55 +00:00
if (isExecuted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
2020-11-20 16:06:27 +00:00
std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
2021-01-28 19:02:39 +00:00
ops.clear();
2021-02-08 19:36:17 +00:00
state = COMMITTED;
2020-11-20 16:06:27 +00:00
}
2020-11-29 11:45:32 +00:00
void commit();
2020-12-04 20:12:32 +00:00
2021-07-30 16:34:18 +00:00
~ZooKeeperMetadataTransaction() { assert(isExecuted() || std::uncaught_exceptions() || ops.empty()); }
2020-11-20 16:06:27 +00:00
};
ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name);
2020-11-03 13:47:26 +00:00
}