mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 10:10:50 +00:00
CLICKHOUSE-3847 add "is_lost". load() in Queue can check duplicate records
This commit is contained in:
parent
e3495a6484
commit
75900f494f
@ -93,6 +93,9 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
|||||||
: 0];
|
: 0];
|
||||||
String min_pointer_inactive_replica_str;
|
String min_pointer_inactive_replica_str;
|
||||||
|
|
||||||
|
std::unordered_map<String, String> log_pointers_lost_replicas;
|
||||||
|
std::unordered_map<String, UInt32> log_pointers_version;
|
||||||
|
|
||||||
for (const String & replica : replicas)
|
for (const String & replica : replicas)
|
||||||
{
|
{
|
||||||
zkutil::Stat log_pointer_stat;
|
zkutil::Stat log_pointer_stat;
|
||||||
@ -110,6 +113,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
|||||||
min_pointer_active_replica = std::min(min_pointer_active_replica, log_pointer);
|
min_pointer_active_replica = std::min(min_pointer_active_replica, log_pointer);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
log_pointers_lost_replicas[replica] = log_pointer_str;
|
||||||
|
log_pointers_version[replica] = log_pointer_stat.version;
|
||||||
|
|
||||||
if (log_pointer_str >= min_saved_record_log_str)
|
if (log_pointer_str >= min_saved_record_log_str)
|
||||||
{
|
{
|
||||||
@ -132,11 +137,21 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
|||||||
/// We will not touch records that are no less than `min_pointer_active_replica`.
|
/// We will not touch records that are no less than `min_pointer_active_replica`.
|
||||||
entries.erase(std::lower_bound(entries.begin(), entries.end(), min_pointer_replica_str), entries.end());
|
entries.erase(std::lower_bound(entries.begin(), entries.end(), min_pointer_replica_str), entries.end());
|
||||||
|
|
||||||
/// We must check if we are only active_node
|
|
||||||
|
|
||||||
if (entries.empty())
|
if (entries.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/// We must mark lost replicas.
|
||||||
|
try
|
||||||
|
{
|
||||||
|
markLostReplicas(log_pointers_lost_replicas, log_pointers_version, entries[0], zookeeper);
|
||||||
|
}
|
||||||
|
catch (const zkutil::KeeperException & e)
|
||||||
|
{
|
||||||
|
if (e.code != ZooKeeperImpl::ZooKeeper::ZNODEEXISTS)
|
||||||
|
throw e;
|
||||||
|
else return;
|
||||||
|
}
|
||||||
|
|
||||||
zkutil::Requests ops;
|
zkutil::Requests ops;
|
||||||
for (size_t i = 0; i < entries.size(); ++i)
|
for (size_t i = 0; i < entries.size(); ++i)
|
||||||
{
|
{
|
||||||
@ -155,6 +170,30 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ReplicatedMergeTreeCleanupThread::markLostReplicas(const std::unordered_map<String, String> & log_pointers_lost_replicas,
|
||||||
|
const std::unordered_map<String, UInt32> & log_pointers_version,
|
||||||
|
const String & remove_border, const zkutil::ZooKeeperPtr & zookeeper)
|
||||||
|
{
|
||||||
|
std::vector<zkutil::ZooKeeper::FutureMulti> futures;
|
||||||
|
|
||||||
|
for (auto pair : log_pointers_lost_replicas)
|
||||||
|
{
|
||||||
|
String replica = pair.first;
|
||||||
|
if (pair.second <= remove_border)
|
||||||
|
{
|
||||||
|
zkutil::Requests ops;
|
||||||
|
/// If log pointer changes version we can not mark replicas, so we check it.
|
||||||
|
ops.emplace_back(zkutil::makeCheckRequest(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer", log_pointers_version.at(replica)));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(storage.zookeeper_path + "/replicas/" + replica + "/is_lost", "", zkutil::CreateMode::Persistent));
|
||||||
|
futures.push_back(zookeeper->asyncMulti(ops));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto & future : futures)
|
||||||
|
future.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
struct ReplicatedMergeTreeCleanupThread::NodeWithStat
|
struct ReplicatedMergeTreeCleanupThread::NodeWithStat
|
||||||
{
|
{
|
||||||
String node;
|
String node;
|
||||||
|
@ -39,6 +39,11 @@ private:
|
|||||||
/// Remove old records from ZooKeeper.
|
/// Remove old records from ZooKeeper.
|
||||||
void clearOldLogs();
|
void clearOldLogs();
|
||||||
|
|
||||||
|
/// Mark lost replicas.
|
||||||
|
void markLostReplicas(const std::unordered_map<String, String> & log_pointers_lost_replicas,
|
||||||
|
const std::unordered_map <String, UInt32> & log_pointers_version,
|
||||||
|
const String & remove_border, const zkutil::ZooKeeperPtr & zookeeper);
|
||||||
|
|
||||||
/// Remove old block hashes from ZooKeeper. This is done by the leader replica.
|
/// Remove old block hashes from ZooKeeper. This is done by the leader replica.
|
||||||
void clearOldBlocks();
|
void clearOldBlocks();
|
||||||
|
|
||||||
|
@ -47,13 +47,25 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper)
|
|||||||
std::optional<time_t> min_unprocessed_insert_time_changed;
|
std::optional<time_t> min_unprocessed_insert_time_changed;
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard lock(state_mutex);
|
std::lock_guard pull_logs_lock(pull_logs_to_queue_mutex);
|
||||||
|
|
||||||
String log_pointer_str = zookeeper->get(replica_path + "/log_pointer");
|
String log_pointer_str = zookeeper->get(replica_path + "/log_pointer");
|
||||||
log_pointer = log_pointer_str.empty() ? 0 : parse<UInt64>(log_pointer_str);
|
log_pointer = log_pointer_str.empty() ? 0 : parse<UInt64>(log_pointer_str);
|
||||||
|
|
||||||
Strings children = zookeeper->getChildren(queue_path);
|
Strings children = zookeeper->getChildren(queue_path);
|
||||||
LOG_DEBUG(log, "Having " << children.size() << " queue entries to load.");
|
|
||||||
|
std::unordered_set<String> already_loaded_paths;
|
||||||
|
auto to_remove_it = std::remove_if(
|
||||||
|
children.begin(), children.end(), [&](const String & path)
|
||||||
|
{
|
||||||
|
return already_loaded_paths.count(path);
|
||||||
|
});
|
||||||
|
|
||||||
|
LOG_DEBUG(log,
|
||||||
|
"Having " << (to_remove_it - children.begin()) << " queue entries to load, "
|
||||||
|
<< (children.end() - to_remove_it) << " entries already loaded.");
|
||||||
|
children.erase(to_remove_it, children.end());
|
||||||
|
|
||||||
std::sort(children.begin(), children.end());
|
std::sort(children.begin(), children.end());
|
||||||
|
|
||||||
zkutil::AsyncResponses<zkutil::GetResponse> futures;
|
zkutil::AsyncResponses<zkutil::GetResponse> futures;
|
||||||
@ -68,6 +80,8 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper)
|
|||||||
LogEntryPtr entry = LogEntry::parse(res.data, res.stat);
|
LogEntryPtr entry = LogEntry::parse(res.data, res.stat);
|
||||||
entry->znode_name = future.first;
|
entry->znode_name = future.first;
|
||||||
|
|
||||||
|
std::lock_guard lock(state_mutex);
|
||||||
|
|
||||||
insertUnlocked(entry, min_unprocessed_insert_time_changed, lock);
|
insertUnlocked(entry, min_unprocessed_insert_time_changed, lock);
|
||||||
|
|
||||||
updated = true;
|
updated = true;
|
||||||
@ -93,7 +107,6 @@ void ReplicatedMergeTreeQueue::initialize(
|
|||||||
log = &Logger::get(logger_name);
|
log = &Logger::get(logger_name);
|
||||||
|
|
||||||
addVirtualParts(parts);
|
addVirtualParts(parts);
|
||||||
load(zookeeper);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -147,9 +147,6 @@ private:
|
|||||||
/// Put a set of (already existing) parts in virtual_parts.
|
/// Put a set of (already existing) parts in virtual_parts.
|
||||||
void addVirtualParts(const MergeTreeData::DataParts & parts);
|
void addVirtualParts(const MergeTreeData::DataParts & parts);
|
||||||
|
|
||||||
/// Load (initialize) a queue from ZooKeeper (/replicas/me/queue/).
|
|
||||||
bool load(zkutil::ZooKeeperPtr zookeeper);
|
|
||||||
|
|
||||||
void insertUnlocked(
|
void insertUnlocked(
|
||||||
const LogEntryPtr & entry, std::optional<time_t> & min_unprocessed_insert_time_changed,
|
const LogEntryPtr & entry, std::optional<time_t> & min_unprocessed_insert_time_changed,
|
||||||
std::lock_guard<std::mutex> & state_lock);
|
std::lock_guard<std::mutex> & state_lock);
|
||||||
@ -233,6 +230,9 @@ public:
|
|||||||
*/
|
*/
|
||||||
bool remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name);
|
bool remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name);
|
||||||
|
|
||||||
|
/// Load (initialize) a queue from ZooKeeper (/replicas/me/queue/).
|
||||||
|
bool load(zkutil::ZooKeeperPtr zookeeper);
|
||||||
|
|
||||||
bool removeFromVirtualParts(const MergeTreePartInfo & part_info);
|
bool removeFromVirtualParts(const MergeTreePartInfo & part_info);
|
||||||
|
|
||||||
/** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value.
|
/** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value.
|
||||||
|
@ -200,11 +200,15 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup()
|
|||||||
|
|
||||||
storage.cloneReplicaIfNeeded();
|
storage.cloneReplicaIfNeeded();
|
||||||
|
|
||||||
|
const auto & zookeeper = storage.getZooKeeper();
|
||||||
|
|
||||||
|
storage.queue.load(zookeeper);
|
||||||
|
|
||||||
/// pullLogsToQueue() after we mark replica 'is_active' and clone();
|
/// pullLogsToQueue() after we mark replica 'is_active' and clone();
|
||||||
/// because cleanup_thread don't del our log_pointer.
|
/// because cleanup_thread don't del our log_pointer.
|
||||||
storage.queue.pullLogsToQueue(storage.getZooKeeper());
|
storage.queue.pullLogsToQueue(storage.getZooKeeper());
|
||||||
storage.last_queue_update_finish_time.store(time(nullptr));
|
storage.last_queue_update_finish_time.store(time(nullptr));
|
||||||
|
|
||||||
storage.cloneReplicaIfNeeded();
|
storage.cloneReplicaIfNeeded();
|
||||||
updateQuorumIfWeHavePart();
|
updateQuorumIfWeHavePart();
|
||||||
|
|
||||||
|
@ -1977,11 +1977,10 @@ bool StorageReplicatedMergeTree::cloneReplica(const String & source_replica, zku
|
|||||||
|
|
||||||
Strings entries = zookeeper->getChildren(zookeeper_path + "/log");
|
Strings entries = zookeeper->getChildren(zookeeper_path + "/log");
|
||||||
|
|
||||||
if (!entries.empty())
|
if (entries.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
std::sort(entries.begin(), entries.end());
|
std::sort(entries.begin(), entries.end());
|
||||||
|
|
||||||
if ("log-" + padIndex(parse<UInt64>(raw_log_pointer)) < entries[0])
|
if ("log-" + padIndex(parse<UInt64>(raw_log_pointer)) < entries[0])
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -2053,12 +2052,14 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded()
|
|||||||
{
|
{
|
||||||
String source_replica_path = zookeeper_path + "/replicas/" + replica_name;
|
String source_replica_path = zookeeper_path + "/replicas/" + replica_name;
|
||||||
String source_log_pointer_raw = zookeeper->get(source_replica_path + "/log_pointer");
|
String source_log_pointer_raw = zookeeper->get(source_replica_path + "/log_pointer");
|
||||||
if ((source_replica_path != replica_path) && (!source_log_pointer_raw.empty()) && ("log-" + padIndex(parse<UInt64>(source_log_pointer_raw)) >= entries[0]))
|
if ((source_replica_path != replica_path) && (!zookeeper->exists(source_replica_path + "/is_lost")))
|
||||||
source_replica = replica_name;
|
source_replica = replica_name;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} while (cloneReplica(source_replica, zookeeper));
|
} while (!cloneReplica(source_replica, zookeeper));
|
||||||
|
|
||||||
|
queueUpdatingTask();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user