From 682eb6bcc99696a74059e91270634152c370b9b5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Jun 2022 15:10:45 +0200 Subject: [PATCH 1/4] Add checks for numChildren --- src/Coordination/Changelog.cpp | 2 +- src/Coordination/KeeperSnapshotManager.cpp | 22 ++++++++++++++++++++++ src/Coordination/KeeperStorage.cpp | 10 ++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index e61a827472e..3e2ef297ea4 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -405,7 +405,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it { - LOG_INFO(log, "Removing log {} because it's empty or read finished with error", description.path); + LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description.path); std::filesystem::remove(description.path); existing_changelogs.erase(last_log_read_result->log_start_index); std::erase_if(logs, [last_log_read_result] (const auto & item) { return item.first >= last_log_read_result->log_start_index; }); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 1481767add3..5b1c75b8352 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { @@ -20,6 +21,7 @@ namespace ErrorCodes { extern const int UNKNOWN_FORMAT_VERSION; extern const int UNKNOWN_SNAPSHOT; + extern const int LOGICAL_ERROR; } namespace @@ -296,6 +298,26 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial } } + for (const auto & itr : storage.container) + { + if (itr.key != "/") + { + if (itr.value.stat.numChildren != static_cast(itr.value.getChildren().size())) + { +#ifdef NDEBUG + LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" + " is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key); + + itr.value.stat.numChildren = static_cast(itr.value.getChildren().size()); +#else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" + " is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key); +#endif + } + } + } + + size_t active_sessions_size; readBinary(active_sessions_size, in); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index f6121b238b3..f23394e09ee 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -349,7 +349,9 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid, parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent) { + ++parent.stat.numChildren; parent.addChild(child_path); + prev_parent_cversion = parent.stat.cversion; prev_parent_zxid = parent.stat.pzxid; @@ -363,7 +365,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr if (zxid > parent.stat.pzxid) parent.stat.pzxid = zxid; - ++parent.stat.numChildren; + assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); response.path_created = path_created; @@ -385,6 +387,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr undo_parent.stat.cversion = prev_parent_cversion; undo_parent.stat.pzxid = prev_parent_zxid; undo_parent.removeChild(child_path); + assert(undo_parent.stat.numChildren == static_cast(undo_parent.getChildren().size())); }); storage.container.erase(path_created); @@ -494,7 +497,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr { response.error = Coordination::Error::ZBADVERSION; } - else if (it->value.stat.numChildren) + else if (!it->value.getChildren().empty()) { response.error = Coordination::Error::ZNOTEMPTY; } @@ -519,6 +522,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr --parent.stat.numChildren; ++parent.stat.cversion; parent.removeChild(child_basename); + assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); response.error = Coordination::Error::ZOK; @@ -540,6 +544,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr ++parent.stat.numChildren; --parent.stat.cversion; parent.addChild(child_name); + assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); }; } @@ -1110,6 +1115,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina ++parent.stat.cversion; auto base_name = getBaseName(ephemeral_path); parent.removeChild(base_name); + assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); container.erase(ephemeral_path); From 4c574f30afc1f44a92784fe1fa8ad04a8d77749e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Jun 2022 15:12:14 +0200 Subject: [PATCH 2/4] Add comment --- src/Coordination/KeeperSnapshotManager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 5b1c75b8352..e5627a7cd12 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -308,6 +308,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" " is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key); + /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. itr.value.stat.numChildren = static_cast(itr.value.getChildren().size()); #else throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" From aa1fb9b2e7cd29c7efee9482ade0d0fefac5e77c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Jun 2022 15:27:27 +0200 Subject: [PATCH 3/4] Remove trash --- src/Coordination/KeeperSnapshotManager.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index e5627a7cd12..cb0da723453 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -305,11 +305,9 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial if (itr.value.stat.numChildren != static_cast(itr.value.getChildren().size())) { #ifdef NDEBUG + /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" " is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key); - - /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. - itr.value.stat.numChildren = static_cast(itr.value.getChildren().size()); #else throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" " is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key); From aff6b7a8d63aaf0d2897efe7aa26bc5191404465 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 12 Jun 2022 00:01:41 +0200 Subject: [PATCH 4/4] Fix keeper storage --- src/Coordination/KeeperStorage.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index f23394e09ee..e47a0905194 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace DB { @@ -365,7 +365,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr if (zxid > parent.stat.pzxid) parent.stat.pzxid = zxid; - assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); + chassert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); response.path_created = path_created; @@ -387,7 +387,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr undo_parent.stat.cversion = prev_parent_cversion; undo_parent.stat.pzxid = prev_parent_zxid; undo_parent.removeChild(child_path); - assert(undo_parent.stat.numChildren == static_cast(undo_parent.getChildren().size())); + chassert(undo_parent.stat.numChildren == static_cast(undo_parent.getChildren().size())); }); storage.container.erase(path_created); @@ -522,7 +522,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr --parent.stat.numChildren; ++parent.stat.cversion; parent.removeChild(child_basename); - assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); + chassert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); response.error = Coordination::Error::ZOK; @@ -544,7 +544,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr ++parent.stat.numChildren; --parent.stat.cversion; parent.addChild(child_name); - assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); + chassert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); }; } @@ -1115,7 +1115,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina ++parent.stat.cversion; auto base_name = getBaseName(ephemeral_path); parent.removeChild(base_name); - assert(parent.stat.numChildren == static_cast(parent.getChildren().size())); + chassert(parent.stat.numChildren == static_cast(parent.getChildren().size())); }); container.erase(ephemeral_path);