Merge pull request #9901 from ClickHouse/fix_optimize_drop_race

Fix race condition between optimize and drop
This commit is contained in:
alexey-milovidov 2020-03-28 04:32:38 +03:00 committed by GitHub
commit e75cf7b6af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 82 additions and 5 deletions

View File

@ -1549,6 +1549,10 @@ std::vector<MergeTreeMutationStatus> ReplicatedMergeTreeQueue::getMutationsStatu
return result;
}
ReplicatedMergeTreeQueue::QueueLocks ReplicatedMergeTreeQueue::lockQueue()
{
return QueueLocks(state_mutex, pull_logs_to_queue_mutex, update_mutations_mutex);
}
ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate(
ReplicatedMergeTreeQueue & queue_, zkutil::ZooKeeperPtr & zookeeper)

View File

@ -384,6 +384,15 @@ public:
std::vector<MergeTreeMutationStatus> getMutationsStatus() const;
void removeCurrentPartsFromMutations();
using QueueLocks = std::scoped_lock<std::mutex, std::mutex, std::mutex>;
/// This method locks all important queue mutexes: state_mutex,
/// pull_logs_to_queue and update_mutations_mutex. It should be used only
/// once while we want to shutdown our queue and remove it's task from pool.
/// It's needed because queue itself can trigger it's task handler and in
/// this case race condition is possible.
QueueLocks lockQueue();
};
class ReplicatedMergeTreeMergePredicate

View File

@ -334,7 +334,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown()
storage.partial_shutdown_called = true;
storage.partial_shutdown_event.set();
storage.alter_query_event->set();
storage.replica_is_active_node = nullptr;
LOG_TRACE(log, "Waiting for threads to finish");

View File

@ -2882,7 +2882,14 @@ void StorageReplicatedMergeTree::shutdown()
if (queue_task_handle)
global_context.getBackgroundPool().removeTask(queue_task_handle);
queue_task_handle.reset();
{
/// Queue can trigger queue_task_handle itself. So we ensure that all
/// queue processes finished and after that reset queue_task_handle.
auto lock = queue.lockQueue();
queue_task_handle.reset();
}
if (move_parts_task_handle)
global_context.getBackgroundMovePool().removeTask(move_parts_task_handle);

View File

@ -284,9 +284,6 @@ private:
/// A thread that processes reconnection to ZooKeeper when the session expires.
ReplicatedMergeTreeRestartingThread restarting_thread;
/// An event that awakens `alter` method from waiting for the completion of the ALTER query.
zkutil::EventPtr alter_query_event = std::make_shared<Poco::Event>();
/// True if replica was created for existing table with fixed granularity
bool other_replicas_fixed_granularity = false;

View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
set -e
function thread1()
{
while true; do
$CLICKHOUSE_CLIENT -q "INSERT INTO concurrent_optimize_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(10000)";
done
}
function thread2()
{
while true; do
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE concurrent_optimize_table FINAL";
sleep 0.$RANDOM;
done
}
function thread3()
{
while true; do
$CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS concurrent_optimize_table;
CREATE TABLE concurrent_optimize_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/concurrent_optimize_table', '1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;";
sleep 0.$RANDOM;
sleep 0.$RANDOM;
sleep 0.$RANDOM;
done
}
export -f thread1;
export -f thread2;
export -f thread3;
TIMEOUT=15
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
wait
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_optimize_table"