ClickHouse/dbms/src/Common/RWLock.cpp

298 lines
10 KiB
C++
Raw Normal View History

2018-11-27 16:45:45 +00:00
#include "RWLock.h"
#include <Common/Stopwatch.h>
#include <Common/Exception.h>
#include <Common/CurrentMetrics.h>
#include <Common/ProfileEvents.h>
namespace ProfileEvents
{
extern const Event RWLockAcquiredReadLocks;
extern const Event RWLockAcquiredWriteLocks;
extern const Event RWLockReadersWaitMilliseconds;
extern const Event RWLockWritersWaitMilliseconds;
}
namespace CurrentMetrics
{
extern const Metric RWLockWaitingReaders;
extern const Metric RWLockWaitingWriters;
extern const Metric RWLockActiveReaders;
extern const Metric RWLockActiveWriters;
}
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
2019-09-01 01:32:44 +00:00
extern const int DEADLOCK_AVOIDED;
}
/// A single-use object that represents lock's ownership
/// For the purpose of exception safety guarantees it is to be used in two steps:
/// 1. Create an instance allocating all the memory needed
/// 2. Associate the instance with the lock (attach to the lock and prepared locking request group)
class RWLockImpl::LockHolderImpl
{
bool bound{false};
Type lock_type;
2019-09-05 18:09:33 +00:00
String query_id;
CurrentMetrics::Increment active_client_increment;
RWLock parent;
GroupsContainer::iterator it_group;
public:
LockHolderImpl(const LockHolderImpl & other) = delete;
LockHolderImpl& operator=(const LockHolderImpl & other) = delete;
/// Implicit memory allocation for query_id is done here
LockHolderImpl(const String & query_id_, Type type)
: lock_type{type}, query_id{query_id_},
active_client_increment{
type == Type::Read ? CurrentMetrics::RWLockActiveReaders : CurrentMetrics::RWLockActiveWriters}
{
}
~LockHolderImpl();
private:
/// A separate method which binds the holder to the owned lock
/// N.B. It is very important that this method produces no allocations
bool bind_with(RWLock && parent_, GroupsContainer::iterator it_group_) noexcept
{
if (bound)
return false;
it_group = it_group_;
parent = std::move(parent_);
++it_group->refererrs;
bound = true;
return true;
}
2018-11-27 16:45:45 +00:00
friend class RWLockImpl;
};
2019-09-01 01:32:44 +00:00
namespace
{
/// Global information about all read locks that query has. It is needed to avoid some type of deadlocks.
class QueryLockInfo
{
private:
mutable std::mutex mutex;
2019-09-01 01:32:44 +00:00
std::map<std::string, size_t> queries;
public:
void add(const String & query_id)
{
std::lock_guard lock(mutex);
const auto res = queries.emplace(query_id, 1); // may throw
if (!res.second)
++res.first->second;
2019-09-01 01:32:44 +00:00
}
void remove(const String & query_id) noexcept
2019-09-01 01:32:44 +00:00
{
std::lock_guard lock(mutex);
const auto query_it = queries.find(query_id);
if (query_it != queries.cend() && --query_it->second == 0)
queries.erase(query_it);
2019-09-01 01:32:44 +00:00
}
void check(const String & query_id) const
2019-09-01 01:32:44 +00:00
{
std::lock_guard lock(mutex);
if (queries.find(query_id) != queries.cend())
2019-09-01 19:21:00 +00:00
throw Exception("Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED);
2019-09-01 01:32:44 +00:00
}
};
QueryLockInfo all_read_locks;
}
/// To guarantee that we do not get any piece of our data corrupted:
/// 1. Perform all actions that include allocations before changing lock's internal state
/// 2. Roll back any changes that make the state inconsistent
///
/// Note: "SM" in the commentaries below stands for STATE MODIFICATION
RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id)
{
2019-09-05 12:45:44 +00:00
const bool request_has_query_id = query_id != NO_QUERY;
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders
: CurrentMetrics::RWLockWaitingWriters);
auto finalize_metrics = [type, &watch] ()
{
ProfileEvents::increment((type == Read) ? ProfileEvents::RWLockAcquiredReadLocks
: ProfileEvents::RWLockAcquiredWriteLocks);
ProfileEvents::increment((type == Read) ? ProfileEvents::RWLockReadersWaitMilliseconds
: ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds());
};
2019-09-02 00:12:01 +00:00
/// This object is placed above unique_lock, because it may lock in destructor.
auto lock_holder = std::make_shared<LockHolderImpl>(query_id, type);
2019-09-02 00:12:01 +00:00
2019-09-06 15:13:22 +00:00
std::unique_lock lock(mutex);
/// The FastPath:
/// Check if the same query_id already holds the required lock in which case we can proceed without waiting
2019-09-05 12:45:44 +00:00
if (request_has_query_id)
2019-09-02 01:04:41 +00:00
{
2019-09-05 18:09:33 +00:00
const auto it_query = owner_queries.find(query_id);
if (it_query != owner_queries.end())
2019-09-05 12:20:10 +00:00
{
2019-09-05 18:09:33 +00:00
const auto current_owner_group = queue.begin();
/// XXX: it means we can't upgrade lock from read to write!
2019-09-05 18:09:33 +00:00
if (type == Write)
throw Exception(
"RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked",
ErrorCodes::LOGICAL_ERROR);
if (current_owner_group->type == Write)
throw Exception(
"RWLockImpl::getLock(): RWLock is already locked in exclusive mode",
ErrorCodes::LOGICAL_ERROR);
/// N.B. Type is Read here, query_id is not empty and it_query is a valid iterator
all_read_locks.add(query_id); /// SM1: may throw on insertion (nothing to roll back)
++it_query->second; /// SM2: nothrow
lock_holder->bind_with(shared_from_this(), current_owner_group); /// SM3: nothrow
2019-09-05 18:09:33 +00:00
finalize_metrics();
return lock_holder;
2019-09-05 12:20:10 +00:00
}
}
2019-09-01 01:32:44 +00:00
/** If the query already has any active read lock and tries to acquire another read lock
* but it is not in front of the queue and has to wait, deadlock is possible:
*
* Example (four queries, two RWLocks - 'a' and 'b'):
*
* --> time -->
*
* q1: ra rb
* q2: wa
* q3: rb ra
* q4: wb
*
* We will throw an exception instead.
*/
if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
{
2019-09-05 12:49:20 +00:00
if (type == Type::Read && request_has_query_id && !queue.empty())
2019-09-01 01:32:44 +00:00
all_read_locks.check(query_id);
2019-09-05 15:14:17 +00:00
/// Create a new group of locking requests
queue.emplace_back(type); /// SM1: may throw (nothing to roll back)
}
else if (request_has_query_id && queue.size() > 1)
all_read_locks.check(query_id);
2019-09-05 12:49:20 +00:00
GroupsContainer::iterator it_group = std::prev(queue.end());
2019-09-06 15:13:22 +00:00
/// We need to reference the associated group before waiting to guarantee
/// that this group does not get deleted prematurely
++it_group->refererrs;
/// Wait a notification until we will be the only in the group.
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
--it_group->refererrs;
2019-09-05 12:45:44 +00:00
if (request_has_query_id)
2019-09-01 01:32:44 +00:00
{
try
{
if (type == Type::Read)
all_read_locks.add(query_id); /// SM2: may throw on insertion
/// and is safe to roll back unconditionally
const auto emplace_res =
owner_queries.emplace(query_id, 1); /// SM3: may throw on insertion
if (!emplace_res.second)
++emplace_res.first->second; /// SM4: nothrow
}
catch (...)
{
/// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety
/// We only need to roll back the changes to these objects: all_read_locks and the locking queue
if (type == Type::Read)
all_read_locks.remove(query_id); /// Rollback(SM2): nothrow
2019-09-05 18:09:33 +00:00
if (it_group->refererrs == 0)
{
const auto next = queue.erase(it_group); /// Rollback(SM1): nothrow
if (next != queue.end())
next->cv.notify_all();
}
throw;
}
2019-09-01 01:32:44 +00:00
}
lock_holder->bind_with(shared_from_this(), it_group); /// SM: nothrow
finalize_metrics();
return lock_holder;
}
/// The sequence points of acquiring lock's ownership by an instance of LockHolderImpl:
/// 1. all_read_locks is updated
/// 2. owner_queries is updated
/// 3. request group is updated by LockHolderImpl which in turn becomes "bound"
///
/// Reasoning as to why this algorithm is correct:
/// If by the time when destructor of LockHolderImpl is called the instance has been "bound",
/// it is guaranteed that all three steps have been executed successfully and the resulting state is consistent.
/// With the mutex locked the order of steps to restore the lock's state can be arbitrary
///
/// We do not employ try-catch: if something bad happens, there is nothing we can do =(
RWLockImpl::LockHolderImpl::~LockHolderImpl()
{
if (!bound || parent == nullptr)
return;
std::lock_guard lock(parent->mutex); // throws - must catch and print warning??
/// The associated group must exist (and be the beginning of the queue?)
if (parent->queue.empty() || it_group != parent->queue.begin())
return;
/// If query_id is not empty it must be listed in parent->owner_queries
if (query_id != RWLockImpl::NO_QUERY)
{
const auto owner_it = parent->owner_queries.find(query_id);
if (owner_it != parent->owner_queries.end())
{
if (--owner_it->second == 0) /// SM: nothrow
parent->owner_queries.erase(owner_it); /// SM: nothrow
if (lock_type == RWLockImpl::Read)
all_read_locks.remove(query_id); /// SM: nothrow
}
}
2019-09-01 01:32:44 +00:00
/// If we are the last remaining referrer, remove the group and notify the next group
if (--it_group->refererrs == 0) /// SM: nothrow
{
const auto next = parent->queue.erase(it_group); /// SM: nothrow
if (next != parent->queue.end())
next->cv.notify_all();
}
}
}