fix DROP vs OPTIMIZE race in ReplicatedMergeTree

This commit is contained in:
Alexander Tokmakov 2020-04-16 18:30:18 +03:00
parent fa8e4e4735
commit 605f3b2119
4 changed files with 18 additions and 6 deletions

View File

@ -16,6 +16,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER; extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
extern const int UNFINISHED; extern const int UNFINISHED;
extern const int ABORTED;
} }
@ -426,6 +427,8 @@ bool ReplicatedMergeTreeQueue::removeFromVirtualParts(const MergeTreePartInfo &
void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback) void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback)
{ {
std::lock_guard lock(pull_logs_to_queue_mutex); std::lock_guard lock(pull_logs_to_queue_mutex);
if (pull_log_blocker.isCancelled())
throw Exception("Log pulling is cancelled", ErrorCodes::ABORTED);
String index_str = zookeeper->get(replica_path + "/log_pointer"); String index_str = zookeeper->get(replica_path + "/log_pointer");
UInt64 index; UInt64 index;

View File

@ -356,6 +356,9 @@ public:
/// A blocker that stops selects from the queue /// A blocker that stops selects from the queue
ActionBlocker actions_blocker; ActionBlocker actions_blocker;
/// A blocker that stops pulling entries from replication log to queue
ActionBlocker pull_log_blocker;
/// Adds a subscriber /// Adds a subscriber
SubscriberHandler addSubscriber(SubscriberCallBack && callback); SubscriberHandler addSubscriber(SubscriberCallBack && callback);

View File

@ -300,8 +300,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
} }
createNewZooKeeperNodes(); createNewZooKeeperNodes();
} }
@ -2905,6 +2903,7 @@ void StorageReplicatedMergeTree::shutdown()
fetcher.blocker.cancelForever(); fetcher.blocker.cancelForever();
merger_mutator.merges_blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever();
parts_mover.moves_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever();
queue.pull_log_blocker.cancelForever();
restarting_thread.shutdown(); restarting_thread.shutdown();
@ -3641,7 +3640,11 @@ void StorageReplicatedMergeTree::drop(TableStructureWriteLockHolder &)
LOG_INFO(log, "Removing replica " << replica_path); LOG_INFO(log, "Removing replica " << replica_path);
replica_is_active_node = nullptr; replica_is_active_node = nullptr;
/// It may left some garbage if replica_path subtree are concurently modified
zookeeper->tryRemoveRecursive(replica_path); zookeeper->tryRemoveRecursive(replica_path);
if (zookeeper->exists(replica_path))
LOG_ERROR(log, "Replica was not completely removed from ZooKeeper, "
<< replica_path << " still exists and may contain some garbage.");
/// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line.
Strings replicas; Strings replicas;
@ -3649,6 +3652,9 @@ void StorageReplicatedMergeTree::drop(TableStructureWriteLockHolder &)
{ {
LOG_INFO(log, "Removing table " << zookeeper_path << " (this might take several minutes)"); LOG_INFO(log, "Removing table " << zookeeper_path << " (this might take several minutes)");
zookeeper->tryRemoveRecursive(zookeeper_path); zookeeper->tryRemoveRecursive(zookeeper_path);
if (zookeeper->exists(zookeeper_path))
LOG_ERROR(log, "Table was not completely removed from ZooKeeper, "
<< zookeeper_path << " still exists and may contain some garbage.");
} }
} }

View File

@ -72,28 +72,28 @@ timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null & timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null & timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null & timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null & timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null & timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null & timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null & timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null & timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null & timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null & timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null & timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null & timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null & timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null & timeout $TIMEOUT bash -c thread4 2> /dev/null &
timeout $TIMEOUT bash -c thread5 2> /dev/null & timeout $TIMEOUT bash -c thread5 2> /dev/null &
timeout $TIMEOUT bash -c thread6 2> /dev/null & timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from ZooKeeper" &
wait wait