mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
minor improvements
This commit is contained in:
parent
2a51286527
commit
7b5de16da9
@ -70,23 +70,21 @@ Resets the mark cache. Used in development of ClickHouse and performance tests.
|
||||
|
||||
## DROP REPLICA {#query_language-system-drop-replica}
|
||||
|
||||
Replicas can be dropped using following syntax:
|
||||
Dead replicas can be dropped using following syntax:
|
||||
|
||||
```sql
|
||||
SYSTEM DROP REPLICA 'replica_name';
|
||||
SYSTEM DROP REPLICA 'replica_name' FROM DATABASE database;
|
||||
SYSTEM DROP REPLICA 'replica_name' FROM TABLE database.table;
|
||||
```
|
||||
|
||||
Queries will remove the replica path in zookeeper, it's useful when you want to decrease your replica factor. It will only drop the inactive/stale replica, and it can't drop local replica, please use `SYSTEM DROP REPLICA` for that.
|
||||
|
||||
If you want to drop a inactive/stale replicate table that does not have a local replica, you can following syntax:
|
||||
|
||||
```sql
|
||||
SYSTEM DROP REPLICA 'replica_name' FROM DATABASE database;
|
||||
SYSTEM DROP REPLICA 'replica_name';
|
||||
SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
|
||||
```
|
||||
|
||||
It's useful to remove metadata of dead replica from ZooKeeper. The right way to decrease replication factor is `DROP TABLE`.
|
||||
Queries will remove the replica path in ZooKeeper. It's useful when replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it can't drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
|
||||
|
||||
The first one removes metadata of `'replica_name'` replica of `database.table` table.
|
||||
The second one does the same for all replicated tables in the database.
|
||||
The third one does the same for all replicated tables on local server.
|
||||
The forth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation.
|
||||
|
||||
## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
|
||||
|
||||
|
@ -49,6 +49,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_KILL;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
extern const int TABLE_WAS_NOT_DROPPED;
|
||||
}
|
||||
|
||||
|
||||
@ -405,113 +406,107 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context)
|
||||
|
||||
void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query)
|
||||
{
|
||||
StorageReplicatedMergeTree::Status status;
|
||||
auto zookeeper = context.getZooKeeper();
|
||||
if (query.replica.empty())
|
||||
throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (!table_id.empty())
|
||||
{
|
||||
context.checkAccess(AccessType::SYSTEM_DROP_REPLICA, table_id);
|
||||
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
|
||||
|
||||
if (auto * storage_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get()))
|
||||
{
|
||||
storage_replicated->getStatus(status);
|
||||
if (query.replica == status.replica_name)
|
||||
throw Exception("We can't drop local replica, please use `DROP TABLE` if you want to clean the data and drop this replica", ErrorCodes::LOGICAL_ERROR);
|
||||
if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active"))
|
||||
throw Exception("Can't drop replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, status.is_readonly);
|
||||
LOG_TRACE(log, "DROP REPLICA " + table_id.getNameForLogs() + " [" + query.replica + "]: OK");
|
||||
}
|
||||
else
|
||||
if (!dropReplicaImpl(query, table))
|
||||
throw Exception("Table " + table_id.getNameForLogs() + " is not replicated", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
else if (!query.database.empty())
|
||||
{
|
||||
DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(query.database);
|
||||
if (database.get() != NULL)
|
||||
{
|
||||
for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
if (auto * storage_replicated = dynamic_cast<StorageReplicatedMergeTree *>(iterator->table().get()))
|
||||
{
|
||||
context.checkAccess(AccessType::SYSTEM_DROP_REPLICA, iterator->table()->getStorageID());
|
||||
storage_replicated->getStatus(status);
|
||||
if (query.replica == status.replica_name)
|
||||
throw Exception("We can't drop local replica, please use `DROP TABLE` if you want to clean the data and drop this replica", ErrorCodes::LOGICAL_ERROR);
|
||||
if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active"))
|
||||
throw Exception("Can't drop replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, status.is_readonly);
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "DROP REPLICA " + query.replica + " DATABSE " + database->getDatabaseName() + ": OK");
|
||||
}
|
||||
else
|
||||
throw Exception("DATABSE " + query.database + " doesn't exist", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
else if (!query.replica_zk_path.empty())
|
||||
{
|
||||
auto remote_replica_path = query.replica_zk_path + "/replicas/" + query.replica;
|
||||
auto & catalog = DatabaseCatalog::instance();
|
||||
|
||||
for (auto & elem : catalog.getDatabases())
|
||||
{
|
||||
DatabasePtr & database = elem.second;
|
||||
for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
if (auto * storage_replicated = dynamic_cast<StorageReplicatedMergeTree *>(iterator->table().get()))
|
||||
{
|
||||
storage_replicated->getStatus(status);
|
||||
if (status.replica_path.compare(remote_replica_path) == 0)
|
||||
throw Exception("We can't drop local replica, please use `DROP TABLE` if you want to clean the data and drop this replica",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
if (status.replica_path.compare(query.replica_zk_path + "/replicas/" + status.replica_name) == 0)
|
||||
{
|
||||
if (zookeeper->exists(query.replica_zk_path + "/replicas/" + query.replica + "/is_active"))
|
||||
throw Exception("Can't drop replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
storage_replicated->dropReplica(zookeeper, query.replica_zk_path, query.replica, status.is_readonly);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// It may left some garbage if replica_path subtree are concurently modified
|
||||
/// check if is active replica if we drop other replicas
|
||||
if (zookeeper->exists(remote_replica_path + "/is_active"))
|
||||
throw Exception("Can't remove replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
zookeeper->tryRemoveRecursive(remote_replica_path);
|
||||
LOG_INFO(log, "Removing replica {}", query.replica_zk_path + "/replicas/" + query.replica);
|
||||
context.checkAccess(AccessType::SYSTEM_DROP_REPLICA, query.database);
|
||||
DatabasePtr database = DatabaseCatalog::instance().getDatabase(query.database);
|
||||
for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next())
|
||||
dropReplicaImpl(query, iterator->table());
|
||||
LOG_TRACE(log, "Dropped replica {} from database {}", query.replica, backQuoteIfNeed(database->getDatabaseName()));
|
||||
}
|
||||
else if (query.is_drop_whole_replica)
|
||||
{
|
||||
context.checkAccess(AccessType::SYSTEM_DROP_REPLICA);
|
||||
auto databases = DatabaseCatalog::instance().getDatabases();
|
||||
|
||||
for (auto & elem : databases)
|
||||
{
|
||||
DatabasePtr & database = elem.second;
|
||||
for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next())
|
||||
dropReplicaImpl(query, iterator->table());
|
||||
LOG_TRACE(log, "Dropped replica {} from database {}", query.replica, backQuoteIfNeed(database->getDatabaseName()));
|
||||
}
|
||||
}
|
||||
else if (!query.replica_zk_path.empty())
|
||||
{
|
||||
context.checkAccess(AccessType::SYSTEM_DROP_REPLICA);
|
||||
auto remote_replica_path = query.replica_zk_path + "/replicas/" + query.replica;
|
||||
|
||||
/// This check is actually redundant, but it may prevent from some user mistakes
|
||||
for (auto & elem : DatabaseCatalog::instance().getDatabases())
|
||||
{
|
||||
DatabasePtr & database = elem.second;
|
||||
for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
if (auto * storage_replicated = dynamic_cast<StorageReplicatedMergeTree *>(iterator->table().get()))
|
||||
{
|
||||
context.checkAccess(AccessType::SYSTEM_DROP_REPLICA, iterator->table()->getStorageID());
|
||||
StorageReplicatedMergeTree::Status status;
|
||||
storage_replicated->getStatus(status);
|
||||
if (query.replica == status.replica_name)
|
||||
throw Exception("We can't drop local replica, please use `DROP TABLE` if you want to clean the data and drop this replica", ErrorCodes::LOGICAL_ERROR);
|
||||
if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active"))
|
||||
throw Exception("Can't drop replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, status.is_readonly);
|
||||
if (status.zookeeper_path == query.replica_zk_path)
|
||||
throw Exception("There is a local table " + storage_replicated->getStorageID().getNameForLogs() +
|
||||
", which has the same table path in ZooKeeper. Please check the path in query. "
|
||||
"If you want to drop replica of this table, use `DROP TABLE` "
|
||||
"or `SYSTEM DROP REPLICA 'name' FROM db.table`", ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "DROP REPLICA " + query.replica + " DATABSE " + database->getDatabaseName() + ": OK");
|
||||
}
|
||||
|
||||
auto zookeeper = context.getZooKeeper();
|
||||
|
||||
bool looks_like_table_path = zookeeper->exists(query.replica_zk_path + "/replicas") ||
|
||||
zookeeper->exists(query.replica_zk_path + "/dropped");
|
||||
if (!looks_like_table_path)
|
||||
throw Exception("Specified path " + query.replica_zk_path + " does not look like a table path",
|
||||
ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
if (zookeeper->exists(remote_replica_path + "/is_active"))
|
||||
throw Exception("Can't remove replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
StorageReplicatedMergeTree::dropReplica(zookeeper, query.replica_zk_path, query.replica, log);
|
||||
LOG_INFO(log, "Dropped replica {}", remote_replica_path);
|
||||
}
|
||||
else
|
||||
throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table)
|
||||
{
|
||||
auto * storage_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get());
|
||||
if (!storage_replicated)
|
||||
return false;
|
||||
|
||||
StorageReplicatedMergeTree::Status status;
|
||||
auto zookeeper = context.getZooKeeper();
|
||||
storage_replicated->getStatus(status);
|
||||
|
||||
/// Do not allow to drop local replicas and active remote replicas
|
||||
if (query.replica == status.replica_name)
|
||||
throw Exception("We can't drop local replica, please use `DROP TABLE` "
|
||||
"if you want to clean the data and drop this replica", ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
/// NOTE it's not atomic: replica may become active after this check, but before dropReplica(...)
|
||||
/// However, the main usecase is to drop dead replica, which cannot become active.
|
||||
/// This check prevents only from accidental drop of some other replica.
|
||||
if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active"))
|
||||
throw Exception("Can't drop replica: " + query.replica + ", because it's active",
|
||||
ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, log);
|
||||
LOG_TRACE(log, "Dropped replica {} of {}", query.replica, table->getStorageID().getNameForLogs());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
|
||||
|
@ -52,6 +52,7 @@ private:
|
||||
void restartReplicas(Context & system_context);
|
||||
void syncReplica(ASTSystemQuery & query);
|
||||
void dropReplica(ASTSystemQuery & query);
|
||||
bool dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table);
|
||||
void flushDistributed(ASTSystemQuery & query);
|
||||
|
||||
AccessRightsElements getRequiredAccessForDDLOnCluster() const;
|
||||
|
@ -9,7 +9,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_TYPE_OF_FIELD;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -84,15 +84,15 @@ const char * ASTSystemQuery::typeToString(Type type)
|
||||
case Type::FLUSH_LOGS:
|
||||
return "FLUSH LOGS";
|
||||
default:
|
||||
throw Exception("Unknown SYSTEM query command", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
throw Exception("Unknown SYSTEM query command", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM " << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << typeToString(type);
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM ";
|
||||
settings.ostr << typeToString(type) << (settings.hilite ? hilite_none : "");
|
||||
|
||||
auto print_database_table = [&]
|
||||
{
|
||||
@ -119,22 +119,24 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
|
||||
};
|
||||
|
||||
auto print_drop_replica = [&] {
|
||||
settings.ostr << " " << quoteString(replica) << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << " " << quoteString(replica);
|
||||
if (!table.empty())
|
||||
{
|
||||
settings.ostr << " FROM TABLE";
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM TABLE"
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
print_database_table();
|
||||
}
|
||||
else if (!replica_zk_path.empty())
|
||||
{
|
||||
settings.ostr << " FROM ZKPATH " << (settings.hilite ? hilite_identifier : "") << quoteString(replica_zk_path)
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM ZKPATH "
|
||||
<< (settings.hilite ? hilite_none : "") << quoteString(replica_zk_path);
|
||||
}
|
||||
else if (!database.empty())
|
||||
{
|
||||
settings.ostr << " FROM DATABASE ";
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM DATABASE "
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(database)
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
<< (settings.hilite ? hilite_none : "");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -620,7 +620,8 @@ void StorageReplicatedMergeTree::createReplica()
|
||||
|
||||
void StorageReplicatedMergeTree::drop()
|
||||
{
|
||||
/// There is also the case when user has configured ClickHouse to wrong ZooKeeper cluster,
|
||||
/// There is also the case when user has configured ClickHouse to wrong ZooKeeper cluster
|
||||
/// or metadata of staled replica were removed manually,
|
||||
/// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table.
|
||||
|
||||
if (has_metadata_in_zookeeper)
|
||||
@ -632,12 +633,102 @@ void StorageReplicatedMergeTree::drop()
|
||||
throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY);
|
||||
|
||||
shutdown();
|
||||
dropReplica(zookeeper, zookeeper_path, replica_name, is_readonly);
|
||||
dropReplica(zookeeper, zookeeper_path, replica_name, log);
|
||||
}
|
||||
|
||||
dropAllData();
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, Poco::Logger * logger)
|
||||
{
|
||||
if (zookeeper->expired())
|
||||
throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
auto remote_replica_path = zookeeper_path + "/replicas/" + replica;
|
||||
LOG_INFO(logger, "Removing replica {}", remote_replica_path);
|
||||
/// It may left some garbage if replica_path subtree are concurrently modified
|
||||
zookeeper->tryRemoveRecursive(remote_replica_path);
|
||||
if (zookeeper->exists(remote_replica_path))
|
||||
LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
|
||||
|
||||
/// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line.
|
||||
Strings replicas;
|
||||
if (Coordination::Error::ZOK != zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) || !replicas.empty())
|
||||
return;
|
||||
|
||||
LOG_INFO(logger, "{} is the last replica, will remove table", remote_replica_path);
|
||||
|
||||
/** At this moment, another replica can be created and we cannot remove the table.
|
||||
* Try to remove /replicas node first. If we successfully removed it,
|
||||
* it guarantees that we are the only replica that proceed to remove the table
|
||||
* and no new replicas can be created after that moment (it requires the existence of /replicas node).
|
||||
* and table cannot be recreated with new /replicas node on another servers while we are removing data,
|
||||
* because table creation is executed in single transaction that will conflict with remaining nodes.
|
||||
*/
|
||||
|
||||
Coordination::Requests ops;
|
||||
Coordination::Responses responses;
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/replicas", -1));
|
||||
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/dropped", "", zkutil::CreateMode::Persistent));
|
||||
Coordination::Error code = zookeeper->tryMulti(ops, responses);
|
||||
|
||||
if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
LOG_WARNING(logger, "Table {} is already started to be removing by another replica right now", remote_replica_path);
|
||||
}
|
||||
else if (code == Coordination::Error::ZNOTEMPTY)
|
||||
{
|
||||
LOG_WARNING(logger, "Another replica was suddenly created, will keep the table {}", remote_replica_path);
|
||||
}
|
||||
else if (code != Coordination::Error::ZOK)
|
||||
{
|
||||
zkutil::KeeperMultiException::check(code, ops, responses);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(logger, "Removing table {} (this might take several minutes)", zookeeper_path);
|
||||
|
||||
Strings children;
|
||||
code = zookeeper->tryGetChildren(zookeeper_path, children);
|
||||
if (code == Coordination::Error::ZNONODE)
|
||||
{
|
||||
LOG_WARNING(logger, "Table {} is already finished removing by another replica right now", remote_replica_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & child : children)
|
||||
if (child != "dropped")
|
||||
zookeeper->tryRemoveRecursive(zookeeper_path + "/" + child);
|
||||
|
||||
ops.clear();
|
||||
responses.clear();
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/dropped", -1));
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1));
|
||||
code = zookeeper->tryMulti(ops, responses);
|
||||
|
||||
if (code == Coordination::Error::ZNONODE)
|
||||
{
|
||||
LOG_WARNING(logger, "Table {} is already finished removing by another replica right now", remote_replica_path);
|
||||
}
|
||||
else if (code == Coordination::Error::ZNOTEMPTY)
|
||||
{
|
||||
LOG_ERROR(logger, "Table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.",
|
||||
zookeeper_path);
|
||||
}
|
||||
else if (code != Coordination::Error::ZOK)
|
||||
{
|
||||
/// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation.
|
||||
zkutil::KeeperMultiException::check(code, ops, responses);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(logger, "Table {} was successfully removed from ZooKeeper", zookeeper_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Verify that list of columns and table storage_settings_ptr match those specified in ZK (/ metadata).
|
||||
* If not, throw an exception.
|
||||
*/
|
||||
@ -750,100 +841,6 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin
|
||||
return res;
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, bool is_readonly)
|
||||
{
|
||||
static Poco::Logger * log = &Poco::Logger::get("StorageReplicatedMergeTree::dropReplica");
|
||||
|
||||
/// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
|
||||
if (is_readonly || !zookeeper)
|
||||
throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY);
|
||||
|
||||
if (zookeeper->expired())
|
||||
throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
auto remote_replica_path = zookeeper_path + "/replicas" + "/" + replica;
|
||||
LOG_INFO(log, "Removing replica {}", remote_replica_path);
|
||||
/// It may left some garbage if replica_path subtree are concurently modified
|
||||
zookeeper->tryRemoveRecursive(remote_replica_path);
|
||||
if (zookeeper->exists(remote_replica_path))
|
||||
LOG_ERROR(log, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
|
||||
|
||||
/// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line.
|
||||
Strings replicas;
|
||||
if (Coordination::Error::ZOK == zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) && replicas.empty())
|
||||
{
|
||||
LOG_INFO(log, "{} is the last replica, will remove table", remote_replica_path);
|
||||
|
||||
/** At this moment, another replica can be created and we cannot remove the table.
|
||||
* Try to remove /replicas node first. If we successfully removed it,
|
||||
* it guarantees that we are the only replica that proceed to remove the table
|
||||
* and no new replicas can be created after that moment (it requires the existence of /replicas node).
|
||||
* and table cannot be recreated with new /replicas node on another servers while we are removing data,
|
||||
* because table creation is executed in single transaction that will conflict with remaining nodes.
|
||||
*/
|
||||
|
||||
Coordination::Requests ops;
|
||||
Coordination::Responses responses;
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/replicas", -1));
|
||||
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/dropped", "", zkutil::CreateMode::Persistent));
|
||||
Coordination::Error code = zookeeper->tryMulti(ops, responses);
|
||||
|
||||
if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
LOG_WARNING(log, "Table {} is already started to be removing by another replica right now", remote_replica_path);
|
||||
}
|
||||
else if (code == Coordination::Error::ZNOTEMPTY)
|
||||
{
|
||||
LOG_WARNING(log, "Another replica was suddenly created, will keep the table {}", remote_replica_path);
|
||||
}
|
||||
else if (code != Coordination::Error::ZOK)
|
||||
{
|
||||
zkutil::KeeperMultiException::check(code, ops, responses);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(log, "Removing table {} (this might take several minutes)", zookeeper_path);
|
||||
|
||||
Strings children;
|
||||
code = zookeeper->tryGetChildren(zookeeper_path, children);
|
||||
if (code == Coordination::Error::ZNONODE)
|
||||
{
|
||||
LOG_WARNING(log, "Table {} is already finished removing by another replica right now", remote_replica_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & child : children)
|
||||
if (child != "dropped")
|
||||
zookeeper->tryRemoveRecursive(zookeeper_path + "/" + child);
|
||||
|
||||
ops.clear();
|
||||
responses.clear();
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/dropped", -1));
|
||||
ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1));
|
||||
code = zookeeper->tryMulti(ops, responses);
|
||||
|
||||
if (code == Coordination::Error::ZNONODE)
|
||||
{
|
||||
LOG_WARNING(log, "Table {} is already finished removing by another replica right now", remote_replica_path);
|
||||
}
|
||||
else if (code == Coordination::Error::ZNOTEMPTY)
|
||||
{
|
||||
LOG_ERROR(log, "Table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.",
|
||||
zookeeper_path);
|
||||
}
|
||||
else if (code != Coordination::Error::ZOK)
|
||||
{
|
||||
/// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation.
|
||||
zkutil::KeeperMultiException::check(code, ops, responses);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(log, "Table {} was successfully removed from ZooKeeper", zookeeper_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
|
||||
{
|
||||
@ -4046,6 +4043,7 @@ void StorageReplicatedMergeTree::checkPartitionCanBeDropped(const ASTPtr & parti
|
||||
global_context.checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size);
|
||||
}
|
||||
|
||||
|
||||
void StorageReplicatedMergeTree::rename(const String & new_path_to_table_data, const StorageID & new_table_id)
|
||||
{
|
||||
MergeTreeData::rename(new_path_to_table_data, new_table_id);
|
||||
|
@ -182,7 +182,7 @@ public:
|
||||
|
||||
/** Remove a specific replica from zookeeper.
|
||||
*/
|
||||
static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, bool is_readonly);
|
||||
static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, Poco::Logger * logger);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -83,13 +83,17 @@ def test_drop_replica(start_cluster):
|
||||
assert "can't drop local replica" in node_1_1.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1'")
|
||||
assert "can't drop local replica" in node_1_1.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM DATABASE test")
|
||||
assert "can't drop local replica" in node_1_1.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM TABLE test.test_table")
|
||||
assert "can't drop local replica" in \
|
||||
node_1_1.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test/{shard}/replicated/test_table'".format(shard=1))
|
||||
assert "it's active" in node_1_2.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1'")
|
||||
assert "it's active" in node_1_2.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM DATABASE test")
|
||||
assert "it's active" in node_1_2.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM TABLE test.test_table")
|
||||
assert "it's active" in \
|
||||
node_1_3.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test/{shard}/replicated/test_table'".format(shard=1))
|
||||
assert "There is a local table" in \
|
||||
node_1_2.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test/{shard}/replicated/test_table'".format(shard=1))
|
||||
assert "There is a local table" in \
|
||||
node_1_1.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test/{shard}/replicated/test_table'".format(shard=1))
|
||||
assert "does not look like a table path" in \
|
||||
node_1_3.query_and_get_error("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test'")
|
||||
|
||||
with PartitionManager() as pm:
|
||||
## make node_1_1 dead
|
||||
@ -117,7 +121,7 @@ def test_drop_replica(start_cluster):
|
||||
exists_replica_1_1 = zk.exists("/clickhouse/tables/test1/{shard}/replicated/test_table/replicas/{replica}".format(shard=1, replica='node_1_1'))
|
||||
assert (exists_replica_1_1 == None)
|
||||
|
||||
node_1_2.query("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test3/{shard}/replicated/test_table'".format(shard=1))
|
||||
node_1_3.query("SYSTEM DROP REPLICA 'node_1_1' FROM ZKPATH '/clickhouse/tables/test3/{shard}/replicated/test_table'".format(shard=1))
|
||||
exists_replica_1_1 = zk.exists("/clickhouse/tables/test3/{shard}/replicated/test_table/replicas/{replica}".format(shard=1, replica='node_1_1'))
|
||||
assert (exists_replica_1_1 == None)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user