show clusters for replicated db

This commit is contained in:
Alexander Tokmakov 2021-02-08 12:14:17 +03:00
parent 18f6b5bbad
commit 7ce0ef2561
8 changed files with 186 additions and 44 deletions

View File

@ -36,8 +36,11 @@ namespace ErrorCodes
extern const int UNKNOWN_DATABASE;
extern const int NOT_IMPLEMENTED;
extern const int INCORRECT_QUERY;
extern const int ALL_CONNECTION_TRIES_FAILED;
}
static constexpr const char * DROPPED_MARK = "DROPPED";
zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
{
return global_context.getZooKeeper();
@ -68,6 +71,8 @@ DatabaseReplicated::DatabaseReplicated(
throw Exception("ZooKeeper path, shard and replica names must be non-empty", ErrorCodes::BAD_ARGUMENTS);
if (shard_name.find('/') != std::string::npos || replica_name.find('/') != std::string::npos)
throw Exception("Shard and replica names should not contain '/'", ErrorCodes::BAD_ARGUMENTS);
if (shard_name.find('|') != std::string::npos || replica_name.find('|') != std::string::npos)
throw Exception("Shard and replica names should not contain '|'", ErrorCodes::BAD_ARGUMENTS);
if (zookeeper_path.back() == '/')
zookeeper_path.resize(zookeeper_path.size() - 1);
@ -90,7 +95,7 @@ DatabaseReplicated::DatabaseReplicated(
createDatabaseNodesInZooKeeper(current_zookeeper);
}
replica_path = zookeeper_path + "/replicas/" + shard_name + "/" + replica_name;
replica_path = zookeeper_path + "/replicas/" + getFullReplicaName();
String replica_host_id;
if (current_zookeeper->tryGet(replica_path, replica_host_id))
@ -110,6 +115,93 @@ DatabaseReplicated::DatabaseReplicated(
}
}
String DatabaseReplicated::getFullReplicaName() const
{
return shard_name + '|' + replica_name;
}
std::pair<String, String> DatabaseReplicated::parseFullReplicaName(const String & name)
{
String shard;
String replica;
auto pos = name.find('|');
if (pos == std::string::npos || name.find('|', pos + 1) != std::string::npos)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect replica identifier: {}", name);
shard = name.substr(0, pos);
replica = name.substr(pos + 1);
return {shard, replica};
}
ClusterPtr DatabaseReplicated::getCluster() const
{
Strings hosts;
Strings host_ids;
auto zookeeper = global_context.getZooKeeper();
constexpr int max_retries = 10;
int iteration = 0;
bool success = false;
while (++iteration <= max_retries)
{
host_ids.resize(0);
Coordination::Stat stat;
hosts = zookeeper->getChildren(zookeeper_path + "/replicas", &stat);
if (hosts.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts found");
Int32 cver = stat.cversion;
std::vector<zkutil::ZooKeeper::FutureGet> futures;
futures.reserve(hosts.size());
host_ids.reserve(hosts.size());
for (const auto & host : hosts)
futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/replicas/" + host));
success = true;
for (auto & future : futures)
{
auto res = future.get();
if (res.error != Coordination::Error::ZOK)
success = false;
host_ids.emplace_back(res.data);
}
zookeeper->get(zookeeper_path + "/replicas", &stat);
if (success && cver == stat.version)
break;
}
if (!success)
throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get consistent cluster snapshot");
assert(!hosts.empty());
assert(hosts.size() == host_ids.size());
std::sort(hosts.begin(), hosts.end());
String current_shard = parseFullReplicaName(hosts.front()).first;
std::vector<Strings> shards;
shards.emplace_back();
for (size_t i = 0; i < hosts.size(); ++i)
{
const auto & id = host_ids[i];
if (id == DROPPED_MARK)
continue;
auto [shard, replica] = parseFullReplicaName(hosts[i]);
auto pos = id.find(':');
String host = id.substr(0, pos);
if (shard != current_shard)
{
current_shard = shard;
if (!shards.back().empty())
shards.emplace_back();
}
shards.back().emplace_back(unescapeForFileName(host));
}
/// TODO make it configurable
String username = "default";
String password;
return std::make_shared<Cluster>(global_context.getSettingsRef(), shards, username, password, global_context.getTCPPort(), false);
}
bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
{
current_zookeeper->createAncestors(zookeeper_path);
@ -139,8 +231,6 @@ bool DatabaseReplicated::createDatabaseNodesInZooKeeper(const zkutil::ZooKeeperP
void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPtr & current_zookeeper)
{
current_zookeeper->createAncestors(replica_path);
/// When creating new replica, use latest snapshot version as initial value of log_pointer
//log_entry_to_execute = 0; //FIXME
@ -296,9 +386,15 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
void DatabaseReplicated::drop(const Context & context_)
{
auto current_zookeeper = getZooKeeper();
current_zookeeper->set(replica_path, "DROPPED");
current_zookeeper->set(replica_path, DROPPED_MARK);
DatabaseAtomic::drop(context_);
current_zookeeper->tryRemoveRecursive(replica_path);
/// TODO it may leave garbage in ZooKeeper if the last node lost connection here
if (current_zookeeper->tryRemove(zookeeper_path + "/replicas") == Coordination::Error::ZOK)
{
/// It was the last replica, remove all metadata
current_zookeeper->tryRemoveRecursive(zookeeper_path);
}
}
void DatabaseReplicated::stopReplication()
@ -318,7 +414,7 @@ void DatabaseReplicated::shutdown()
void DatabaseReplicated::dropTable(const Context & context, const String & table_name, bool no_delay)
{
auto txn = context.getMetadataTransaction();
//assert(!ddl_worker->isCurrentlyActive() || txn /*|| called from DROP DATABASE */);
assert(!ddl_worker->isCurrentlyActive() || txn);
if (txn && txn->is_initial_query)
{
String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name);
@ -335,6 +431,8 @@ void DatabaseReplicated::renameTable(const Context & context, const String & tab
if (txn->is_initial_query)
{
if (this != &to_database)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases is not supported for Replicated engine");
if (!isTableExist(table_name, context))
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_name);
if (exchange && !to_database.isTableExist(to_table_name, context))

View File

@ -15,6 +15,9 @@ namespace DB
class DatabaseReplicatedDDLWorker;
using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
/** DatabaseReplicated engine
* supports replication of metadata
* via DDL log being written to ZooKeeper
@ -67,7 +70,10 @@ public:
void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
String getFullReplicaName() const { return shard_name + '|' + replica_name; }
String getFullReplicaName() const;
static std::pair<String, String> parseFullReplicaName(const String & name);
ClusterPtr getCluster() const;
//FIXME
friend struct DatabaseReplicatedTask;

View File

@ -208,7 +208,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
if (task->is_initial_query)
{
assert(!zookeeper->exists(entry_path + "/try"));
assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == "0"));
assert(zookeeper->exists(entry_path + "/committed") == (zookeeper->get(task->getFinishedNodePath()) == ExecutionStatus(0).serializeText()));
out_reason = fmt::format("Entry {} has been executed as initial query", entry_name);
return {};
}

View File

@ -488,7 +488,7 @@ void DDLWorker::processTask(DDLTaskBase & task)
/// updating metadata in Replicated database), so we make create request for finished_node_path with status "0",
/// which means that query executed successfully.
task.ops.emplace_back(zkutil::makeRemoveRequest(active_node_path, -1));
task.ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, "0", zkutil::CreateMode::Persistent));
task.ops.emplace_back(zkutil::makeCreateRequest(finished_node_path, ExecutionStatus(0).serializeText(), zkutil::CreateMode::Persistent));
try
{

View File

@ -827,17 +827,28 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (create.attach_from_path)
{
fs::path data_path = fs::path(*create.attach_from_path).lexically_normal();
fs::path user_files = fs::path(context.getUserFilesPath()).lexically_normal();
if (data_path.is_relative())
data_path = (user_files / data_path).lexically_normal();
if (!startsWith(data_path, user_files))
throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
"Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
fs::path root_path = fs::path(context.getPath()).lexically_normal();
/// Data path must be relative to root_path
create.attach_from_path = fs::relative(data_path, root_path) / "";
if (context.getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
{
fs::path data_path = fs::path(*create.attach_from_path).lexically_normal();
if (data_path.is_relative())
data_path = (user_files / data_path).lexically_normal();
if (!startsWith(data_path, user_files))
throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
"Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
/// Data path must be relative to root_path
create.attach_from_path = fs::relative(data_path, root_path) / "";
}
else
{
fs::path data_path = (root_path / *create.attach_from_path).lexically_normal();
if (!startsWith(data_path, user_files))
throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
"Data directory {} must be inside {} to attach it", String(data_path), String(user_files));
}
}
else if (create.attach && !create.attach_short_syntax && context.getClientInfo().query_kind != ClientInfo::QueryKind::REPLICATED_LOG_QUERY)
{

View File

@ -3,6 +3,7 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Storages/System/StorageSystemClusters.h>
#include <Databases/DatabaseReplicated.h>
namespace DB
{
@ -26,40 +27,51 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes()
};
}
void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
{
for (const auto & name_and_cluster : context.getClusters().getContainer())
writeCluster(res_columns, name_and_cluster);
const auto databases = DatabaseCatalog::instance().getDatabases();
for (const auto & name_and_database : databases)
{
const String & cluster_name = name_and_cluster.first;
const ClusterPtr & cluster = name_and_cluster.second;
const auto & shards_info = cluster->getShardsInfo();
const auto & addresses_with_failover = cluster->getShardsAddresses();
if (const auto * replicated = typeid_cast<const DatabaseReplicated *>(name_and_database.second.get()))
writeCluster(res_columns, {name_and_database.first, replicated->getCluster()});
}
}
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster) const
{
const String & cluster_name = name_and_cluster.first;
const ClusterPtr & cluster = name_and_cluster.second;
const auto & shards_info = cluster->getShardsInfo();
const auto & addresses_with_failover = cluster->getShardsAddresses();
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
{
const auto & shard_info = shards_info[shard_index];
const auto & shard_addresses = addresses_with_failover[shard_index];
const auto pool_status = shard_info.pool->getStatus();
for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index)
{
const auto & shard_info = shards_info[shard_index];
const auto & shard_addresses = addresses_with_failover[shard_index];
const auto pool_status = shard_info.pool->getStatus();
size_t i = 0;
const auto & address = shard_addresses[replica_index];
for (size_t replica_index = 0; replica_index < shard_addresses.size(); ++replica_index)
{
size_t i = 0;
const auto & address = shard_addresses[replica_index];
res_columns[i++]->insert(cluster_name);
res_columns[i++]->insert(shard_info.shard_num);
res_columns[i++]->insert(shard_info.weight);
res_columns[i++]->insert(replica_index + 1);
res_columns[i++]->insert(address.host_name);
auto resolved = address.getResolvedAddress();
res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
res_columns[i++]->insert(address.port);
res_columns[i++]->insert(address.is_local);
res_columns[i++]->insert(address.user);
res_columns[i++]->insert(address.default_database);
res_columns[i++]->insert(pool_status[replica_index].error_count);
res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
}
res_columns[i++]->insert(cluster_name);
res_columns[i++]->insert(shard_info.shard_num);
res_columns[i++]->insert(shard_info.weight);
res_columns[i++]->insert(replica_index + 1);
res_columns[i++]->insert(address.host_name);
auto resolved = address.getResolvedAddress();
res_columns[i++]->insert(resolved ? resolved->host().toString() : String());
res_columns[i++]->insert(address.port);
res_columns[i++]->insert(address.is_local);
res_columns[i++]->insert(address.user);
res_columns[i++]->insert(address.default_database);
res_columns[i++]->insert(pool_status[replica_index].error_count);
res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
}
}
}

View File

@ -10,6 +10,7 @@ namespace DB
{
class Context;
class Cluster;
/** Implements system table 'clusters'
* that allows to obtain information about available clusters
@ -25,8 +26,10 @@ public:
protected:
using IStorageSystemOneBlock::IStorageSystemOneBlock;
using NameAndCluster = std::pair<String, std::shared_ptr<Cluster>>;
void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override;
void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster) const;
};
}

View File

@ -103,7 +103,19 @@
"memory_tracking", /// FIXME remove it before merge
"memory_tracking",
"memory_usage",
"01686_rocksdb",
"01550_mutation_subquery",
"01070_mutations_with_dependencies",
"01070_materialize_ttl",
"01055_compact_parts",
"01017_mutations_with_nondeterministic_functions_zookeeper",
"00926_adaptive_index_granularity_pk",
"00910_zookeeper_test_alter_compression_codecs",
"00908_bloom_filter_index",
"00616_final_single_part",
"00446_clear_column_in_partition_zookeeper",
"01533_multiple_nested",
"01213_alter_rename_column_zookeeper",
"01575_disable_detach_table_of_dictionary",
"01457_create_as_table_function_structure",
"01415_inconsistent_merge_tree_settings",