diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 639e0957f49..5f2787f8930 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -45,16 +45,17 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) keeper_context->setDigestEnabled(true); keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as())); - DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); + /// TODO(hanfei): support rocksdb here + DB::KeeperMemoryStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as(), logger); storage.initializeSystemNodes(); DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as(), logger); DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(storage.getZXID(), 1, std::make_shared()); - DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); + DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); - DB::KeeperSnapshotManager manager(1, keeper_context); + DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl; diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 6e23a56ef97..35a23fc9e78 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -55,6 +55,7 @@ struct Settings; M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \ + M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \ M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 4eab03e3c2c..1f66882ecad 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -5,18 +5,27 @@ #include #include +#include +#include +#include +#include +#include +#include #include #include #include -#include #include -#include -#include -#include #include #include +#include "config.h" +#if USE_ROCKSDB +#include +#include +#include +#endif + namespace DB { @@ -24,6 +33,8 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +extern const int ROCKSDB_ERROR; } @@ -41,6 +52,95 @@ KeeperContext::KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr co system_nodes_with_data[keeper_api_version_path] = toString(static_cast(KeeperApiVersion::WITH_MULTI_READ)); } +#if USE_ROCKSDB +using RocksDBOptions = std::unordered_map; + +static RocksDBOptions getOptionsFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & path) +{ + RocksDBOptions options; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(path, keys); + + for (const auto & key : keys) + { + const String key_path = path + "." + key; + options[key] = config.getString(key_path); + } + + return options; +} + +static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + rocksdb::Status status; + rocksdb::Options base; + + base.create_if_missing = true; + base.compression = rocksdb::CompressionType::kZSTD; + base.statistics = rocksdb::CreateDBStatistics(); + /// It is too verbose by default, and in fact we don't care about rocksdb logs at all. + base.info_log_level = rocksdb::ERROR_LEVEL; + + rocksdb::Options merged = base; + rocksdb::BlockBasedTableOptions table_options; + + if (config.has("keeper_server.rocksdb.options")) + { + auto config_options = getOptionsFromConfig(config, "keeper_server.rocksdb.options"); + status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' : {}", + status.ToString()); + } + } + if (config.has("rocksdb.column_family_options")) + { + auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options"); + status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}", status.ToString()); + } + } + if (config.has("rocksdb.block_based_table_options")) + { + auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options"); + status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}", status.ToString()); + } + } + + merged.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); + return merged; +} +#endif + +KeeperContext::Storage KeeperContext::getRocksDBPathFromConfig(const Poco::Util::AbstractConfiguration & config) const +{ + const auto create_local_disk = [](const auto & path) + { + if (fs::exists(path)) + fs::remove_all(path); + fs::create_directories(path); + + return std::make_shared("LocalRocksDBDisk", path); + }; + if (config.has("keeper_server.rocksdb_path")) + return create_local_disk(config.getString("keeper_server.rocksdb_path")); + + if (config.has("keeper_server.storage_path")) + return create_local_disk(std::filesystem::path{config.getString("keeper_server.storage_path")} / "rocksdb"); + + if (standalone_keeper) + return create_local_disk(std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "rocksdb"); + else + return create_local_disk(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/rocksdb"); +} + void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_) { dispatcher = dispatcher_; @@ -59,6 +159,14 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, initializeFeatureFlags(config); initializeDisks(config); + + #if USE_ROCKSDB + if (config.getBool("keeper_server.coordination_settings.experimental_use_rocksdb", false)) + { + rocksdb_options = std::make_shared(getRocksDBOptionsFromConfig(config)); + digest_enabled = false; /// TODO: support digest + } + #endif } namespace @@ -94,6 +202,8 @@ void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & co { disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator); + rocksdb_storage = getRocksDBPathFromConfig(config); + log_storage = getLogsPathFromConfig(config); if (config.has("keeper_server.latest_log_storage_disk")) @@ -262,6 +372,37 @@ void KeeperContext::dumpConfiguration(WriteBufferFromOwnString & buf) const } } + +void KeeperContext::setRocksDBDisk(DiskPtr disk) +{ + rocksdb_storage = std::move(disk); +} + +DiskPtr KeeperContext::getTemporaryRocksDBDisk() const +{ + DiskPtr rocksdb_disk = getDisk(rocksdb_storage); + if (!rocksdb_disk) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "rocksdb storage is not initialized"); + } + auto uuid_str = formatUUID(UUIDHelpers::generateV4()); + String path_to_create = "rocks_" + std::string(uuid_str.data(), uuid_str.size()); + rocksdb_disk->createDirectory(path_to_create); + return std::make_shared("LocalTmpRocksDBDisk", fullPath(rocksdb_disk, path_to_create)); +} + +void KeeperContext::setRocksDBOptions(std::shared_ptr rocksdb_options_) +{ + if (rocksdb_options_ != nullptr) + rocksdb_options = rocksdb_options_; + else + { + #if USE_ROCKSDB + rocksdb_options = std::make_shared(getRocksDBOptionsFromConfig(Poco::Util::JSONConfiguration())); + #endif + } +} + KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const { const auto create_local_disk = [](const auto & path) diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index e283e65dffa..38013725f56 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -6,6 +6,11 @@ #include #include +namespace rocksdb +{ +struct Options; +} + namespace DB { @@ -62,6 +67,12 @@ public: constexpr KeeperDispatcher * getDispatcher() const { return dispatcher; } + void setRocksDBDisk(DiskPtr disk); + DiskPtr getTemporaryRocksDBDisk() const; + + void setRocksDBOptions(std::shared_ptr rocksdb_options_ = nullptr); + std::shared_ptr getRocksDBOptions() const { return rocksdb_options; } + UInt64 getKeeperMemorySoftLimit() const { return memory_soft_limit; } void updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config); @@ -90,6 +101,7 @@ private: void initializeFeatureFlags(const Poco::Util::AbstractConfiguration & config); void initializeDisks(const Poco::Util::AbstractConfiguration & config); + Storage getRocksDBPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; Storage getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; Storage getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; Storage getStatePathFromConfig(const Poco::Util::AbstractConfiguration & config) const; @@ -111,12 +123,15 @@ private: std::shared_ptr disk_selector; + Storage rocksdb_storage; Storage log_storage; Storage latest_log_storage; Storage snapshot_storage; Storage latest_snapshot_storage; Storage state_file_storage; + std::shared_ptr rocksdb_options; + std::vector old_log_disk_names; std::vector old_snapshot_disk_names; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 8c7e6405153..4a350077596 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -117,13 +117,13 @@ void KeeperDispatcher::requestThread() RaftAppendResult prev_result = nullptr; /// Requests from previous iteration. We store them to be able /// to send errors to the client. - KeeperStorage::RequestsForSessions prev_batch; + KeeperStorageBase::RequestsForSessions prev_batch; const auto & shutdown_called = keeper_context->isShutdownCalled(); while (!shutdown_called) { - KeeperStorage::RequestForSession request; + KeeperStorageBase::RequestForSession request; auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); @@ -153,7 +153,7 @@ void KeeperDispatcher::requestThread() continue; } - KeeperStorage::RequestsForSessions current_batch; + KeeperStorageBase::RequestsForSessions current_batch; size_t current_batch_bytes_size = 0; bool has_read_request = false; @@ -311,7 +311,7 @@ void KeeperDispatcher::responseThread() const auto & shutdown_called = keeper_context->isShutdownCalled(); while (!shutdown_called) { - KeeperStorage::ResponseForSession response_for_session; + KeeperStorageBase::ResponseForSession response_for_session; uint64_t max_wait = configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds(); @@ -402,7 +402,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ return false; } - KeeperStorage::RequestForSession request_info; + KeeperStorageBase::RequestForSession request_info; request_info.request = request; using namespace std::chrono; request_info.time = duration_cast(system_clock::now().time_since_epoch()).count(); @@ -448,7 +448,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshots_queue, keeper_context, snapshot_s3, - [this](uint64_t /*log_idx*/, const KeeperStorage::RequestForSession & request_for_session) + [this](uint64_t /*log_idx*/, const KeeperStorageBase::RequestForSession & request_for_session) { { /// check if we have queue of read requests depending on this request to be committed @@ -540,7 +540,7 @@ void KeeperDispatcher::shutdown() update_configuration_thread.join(); } - KeeperStorage::RequestForSession request_for_session; + KeeperStorageBase::RequestForSession request_for_session; /// Set session expired for all pending requests while (requests_queue && requests_queue->tryPop(request_for_session)) @@ -551,7 +551,7 @@ void KeeperDispatcher::shutdown() setResponse(request_for_session.session_id, response); } - KeeperStorage::RequestsForSessions close_requests; + KeeperStorageBase::RequestsForSessions close_requests; { /// Clear all registered sessions std::lock_guard lock(session_to_response_callback_mutex); @@ -565,7 +565,7 @@ void KeeperDispatcher::shutdown() auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = Coordination::CLOSE_XID; using namespace std::chrono; - KeeperStorage::RequestForSession request_info + KeeperStorageBase::RequestForSession request_info { .session_id = session, .time = duration_cast(system_clock::now().time_since_epoch()).count(), @@ -663,7 +663,7 @@ void KeeperDispatcher::sessionCleanerTask() auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = Coordination::CLOSE_XID; using namespace std::chrono; - KeeperStorage::RequestForSession request_info + KeeperStorageBase::RequestForSession request_info { .session_id = dead_session, .time = duration_cast(system_clock::now().time_since_epoch()).count(), @@ -711,16 +711,16 @@ void KeeperDispatcher::finishSession(int64_t session_id) } } -void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error) +void KeeperDispatcher::addErrorResponses(const KeeperStorageBase::RequestsForSessions & requests_for_sessions, Coordination::Error error) { for (const auto & request_for_session : requests_for_sessions) { - KeeperStorage::ResponsesForSessions responses; + KeeperStorageBase::ResponsesForSessions responses; auto response = request_for_session.request->makeResponse(); response->xid = request_for_session.request->xid; response->zxid = 0; response->error = error; - if (!responses_queue.push(DB::KeeperStorage::ResponseForSession{request_for_session.session_id, response})) + if (!responses_queue.push(DB::KeeperStorageBase::ResponseForSession{request_for_session.session_id, response})) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push error response xid {} zxid {} error message {} to responses queue", response->xid, @@ -730,7 +730,7 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession } nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult( - RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success) + RaftAppendResult & result, KeeperStorageBase::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success) { if (!result->has_result()) result->get(); @@ -755,7 +755,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { /// New session id allocation is a special request, because we cannot process it in normal /// way: get request -> put to raft -> set response for registered callback. - KeeperStorage::RequestForSession request_info; + KeeperStorageBase::RequestForSession request_info; std::shared_ptr request = std::make_shared(); /// Internal session id. It's a temporary number which is unique for each client on this server /// but can be same on different servers. diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 41fb8451ee4..651fd0e1c88 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -26,7 +26,7 @@ using ZooKeeperResponseCallback = std::function; + using RequestsQueue = ConcurrentBoundedQueue; using SessionToResponseCallback = std::unordered_map; using ClusterUpdateQueue = ConcurrentBoundedQueue; @@ -95,18 +95,18 @@ private: /// Add error responses for requests to responses queue. /// Clears requests. - void addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error); + void addErrorResponses(const KeeperStorageBase::RequestsForSessions & requests_for_sessions, Coordination::Error error); /// Forcefully wait for result and sets errors if something when wrong. /// Clears both arguments nuraft::ptr forceWaitAndProcessResult( - RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success); + RaftAppendResult & result, KeeperStorageBase::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success); public: std::mutex read_request_queue_mutex; /// queue of read requests that can be processed after a request with specific session ID and XID is committed - std::unordered_map> read_request_queue; + std::unordered_map> read_request_queue; /// Just allocate some objects, real initialization is done by `intialize method` KeeperDispatcher(); @@ -192,7 +192,7 @@ public: Keeper4LWInfo getKeeper4LWInfo() const; - const KeeperStateMachine & getStateMachine() const + const IKeeperStateMachine & getStateMachine() const { return *server->getKeeperStateMachine(); } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index dc9658e895f..d40e5ef2e50 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -123,7 +123,7 @@ KeeperServer::KeeperServer( SnapshotsQueue & snapshots_queue_, KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, - KeeperStateMachine::CommitCallback commit_callback) + IKeeperStateMachine::CommitCallback commit_callback) : server_id(configuration_and_settings_->server_id) , log(getLogger("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) @@ -134,13 +134,28 @@ KeeperServer::KeeperServer( if (keeper_context->getCoordinationSettings()->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); - state_machine = nuraft::cs_new( - responses_queue_, - snapshots_queue_, - keeper_context, - config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, - commit_callback, - checkAndGetSuperdigest(configuration_and_settings_->super_digest)); +#if USE_ROCKSDB + const auto & coordination_settings = keeper_context->getCoordinationSettings(); + if (coordination_settings->experimental_use_rocksdb) + { + state_machine = nuraft::cs_new>( + responses_queue_, + snapshots_queue_, + keeper_context, + config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, + commit_callback, + checkAndGetSuperdigest(configuration_and_settings_->super_digest)); + LOG_WARNING(log, "Use RocksDB as Keeper backend storage."); + } + else +#endif + state_machine = nuraft::cs_new>( + responses_queue_, + snapshots_queue_, + keeper_context, + config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, + commit_callback, + checkAndGetSuperdigest(configuration_and_settings_->super_digest)); state_manager = nuraft::cs_new( server_id, @@ -522,7 +537,7 @@ namespace { // Serialize the request for the log entry -nuraft::ptr getZooKeeperLogEntry(const KeeperStorage::RequestForSession & request_for_session) +nuraft::ptr getZooKeeperLogEntry(const KeeperStorageBase::RequestForSession & request_for_session) { DB::WriteBufferFromNuraftBuffer write_buf; DB::writeIntBinary(request_for_session.session_id, write_buf); @@ -530,7 +545,7 @@ nuraft::ptr getZooKeeperLogEntry(const KeeperStorage::RequestFor DB::writeIntBinary(request_for_session.time, write_buf); /// we fill with dummy values to eliminate unnecessary copy later on when we will write correct values DB::writeIntBinary(static_cast(0), write_buf); /// zxid - DB::writeIntBinary(KeeperStorage::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag + DB::writeIntBinary(KeeperStorageBase::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag DB::writeIntBinary(static_cast(0), write_buf); /// digest value /// if new fields are added, update KeeperStateMachine::ZooKeeperLogSerializationVersion along with parseRequest function and PreAppendLog callback handler return write_buf.getBuffer(); @@ -538,7 +553,7 @@ nuraft::ptr getZooKeeperLogEntry(const KeeperStorage::RequestFor } -void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & request_for_session) +void KeeperServer::putLocalReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) { if (!request_for_session.request->isReadRequest()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot process non-read request locally"); @@ -546,7 +561,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & state_machine->processReadRequest(request_for_session); } -RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions) +RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorageBase::RequestsForSessions & requests_for_sessions) { std::vector> entries; entries.reserve(requests_for_sessions.size()); @@ -789,7 +804,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ auto entry_buf = entry->get_buf_ptr(); - KeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version; + IKeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version; auto request_for_session = state_machine->parseRequest(*entry_buf, /*final=*/false, &serialization_version); request_for_session->zxid = next_zxid; if (!state_machine->preprocess(*request_for_session)) @@ -799,10 +814,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ /// older versions of Keeper can send logs that are missing some fields size_t bytes_missing = 0; - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) bytes_missing += sizeof(request_for_session->time); - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST) bytes_missing += sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value); if (bytes_missing != 0) @@ -816,19 +831,19 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ size_t write_buffer_header_size = sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value); - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) write_buffer_header_size += sizeof(request_for_session->time); auto * buffer_start = reinterpret_cast(entry_buf->data_begin() + entry_buf->size() - write_buffer_header_size); WriteBufferFromPointer write_buf(buffer_start, write_buffer_header_size); - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) writeIntBinary(request_for_session->time, write_buf); writeIntBinary(request_for_session->zxid, write_buf); writeIntBinary(request_for_session->digest->version, write_buf); - if (request_for_session->digest->version != KeeperStorage::NO_DIGEST) + if (request_for_session->digest->version != KeeperStorageBase::NO_DIGEST) writeIntBinary(request_for_session->digest->value, write_buf); write_buf.finalize(); diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 2eb630b4dd6..f082b5d377e 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -24,7 +24,7 @@ class KeeperServer private: const int server_id; - nuraft::ptr state_machine; + nuraft::ptr state_machine; nuraft::ptr state_manager; @@ -79,26 +79,26 @@ public: SnapshotsQueue & snapshots_queue_, KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, - KeeperStateMachine::CommitCallback commit_callback); + IKeeperStateMachine::CommitCallback commit_callback); /// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings. void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true); /// Put local read request and execute in state machine directly and response into /// responses queue - void putLocalReadRequest(const KeeperStorage::RequestForSession & request); + void putLocalReadRequest(const KeeperStorageBase::RequestForSession & request); bool isRecovering() const { return is_recovering; } bool reconfigEnabled() const { return enable_reconfiguration; } /// Put batch of requests into Raft and get result of put. Responses will be set separately into /// responses_queue. - RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests); + RaftAppendResult putRequestBatch(const KeeperStorageBase::RequestsForSessions & requests); /// Return set of the non-active sessions std::vector getDeadSessions(); - nuraft::ptr getKeeperStateMachine() const { return state_machine; } + nuraft::ptr getKeeperStateMachine() const { return state_machine; } void forceRecovery(); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 8bf42b434cd..3f5ac055470 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -66,7 +66,8 @@ namespace return base; } - void writeNode(const KeeperStorage::Node & node, SnapshotVersion version, WriteBuffer & out) + template + void writeNode(const Node & node, SnapshotVersion version, WriteBuffer & out) { writeBinary(node.getData(), out); @@ -86,7 +87,7 @@ namespace writeBinary(node.aversion, out); writeBinary(node.ephemeralOwner(), out); if (version < SnapshotVersion::V6) - writeBinary(static_cast(node.data_size), out); + writeBinary(static_cast(node.getData().size()), out); writeBinary(node.numChildren(), out); writeBinary(node.pzxid, out); @@ -96,7 +97,8 @@ namespace writeBinary(node.sizeInBytes(), out); } - void readNode(KeeperStorage::Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map) + template + void readNode(Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map) { readVarUInt(node.data_size, in); if (node.data_size != 0) @@ -195,7 +197,8 @@ namespace } } -void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context) +template +void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context) { writeBinary(static_cast(snapshot.version), out); serializeSnapshotMetadata(snapshot.snapshot_meta, out); @@ -205,11 +208,11 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr writeBinary(snapshot.zxid, out); if (keeper_context->digestEnabled()) { - writeBinary(static_cast(KeeperStorage::CURRENT_DIGEST_VERSION), out); + writeBinary(static_cast(Storage::CURRENT_DIGEST_VERSION), out); writeBinary(snapshot.nodes_digest, out); } else - writeBinary(static_cast(KeeperStorage::NO_DIGEST), out); + writeBinary(static_cast(Storage::NO_DIGEST), out); } writeBinary(snapshot.session_id, out); @@ -255,7 +258,6 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr /// slightly bigger than required. if (node.mzxid > snapshot.zxid) break; - writeBinary(path, out); writeNode(node, snapshot.version, out); @@ -282,7 +284,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr writeBinary(session_id, out); writeBinary(timeout, out); - KeeperStorage::AuthIDs ids; + KeeperStorageBase::AuthIDs ids; if (snapshot.session_and_auth.contains(session_id)) ids = snapshot.session_and_auth.at(session_id); @@ -303,7 +305,8 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr } } -void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) +template +void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) { uint8_t version; readBinary(version, in); @@ -312,7 +315,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported snapshot version {}", version); deserialization_result.snapshot_meta = deserializeSnapshotMetadata(in); - KeeperStorage & storage = *deserialization_result.storage; + Storage & storage = *deserialization_result.storage; bool recalculate_digest = keeper_context->digestEnabled(); if (version >= SnapshotVersion::V5) @@ -320,11 +323,11 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial readBinary(storage.zxid, in); uint8_t digest_version; readBinary(digest_version, in); - if (digest_version != KeeperStorage::DigestVersion::NO_DIGEST) + if (digest_version != Storage::DigestVersion::NO_DIGEST) { uint64_t nodes_digest; readBinary(nodes_digest, in); - if (digest_version == KeeperStorage::CURRENT_DIGEST_VERSION) + if (digest_version == Storage::CURRENT_DIGEST_VERSION) { storage.nodes_digest = nodes_digest; recalculate_digest = false; @@ -374,8 +377,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial size_t snapshot_container_size; readBinary(snapshot_container_size, in); - - storage.container.reserve(snapshot_container_size); + if constexpr (!use_rocksdb) + storage.container.reserve(snapshot_container_size); if (recalculate_digest) storage.nodes_digest = 0; @@ -389,7 +392,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial in.readStrict(path_data.get(), path_size); std::string_view path{path_data.get(), path_size}; - KeeperStorage::Node node{}; + typename Storage::Node node{}; readNode(node, in, current_version, storage.acl_map); using enum Coordination::PathMatchResult; @@ -421,7 +424,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial if (keeper_context->ignoreSystemPathOnStartup() || keeper_context->getServerState() != KeeperContext::Phase::INIT) { LOG_ERROR(getLogger("KeeperSnapshotManager"), "{}. Ignoring it", get_error_msg()); - node = KeeperStorage::Node{}; + node = typename Storage::Node{}; } else throw Exception( @@ -433,8 +436,9 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial } auto ephemeral_owner = node.ephemeralOwner(); - if (!node.isEphemeral() && node.numChildren() > 0) - node.getChildren().reserve(node.numChildren()); + if constexpr (!use_rocksdb) + if (!node.isEphemeral() && node.numChildren() > 0) + node.getChildren().reserve(node.numChildren()); if (ephemeral_owner != 0) storage.ephemerals[node.ephemeralOwner()].insert(std::string{path}); @@ -447,36 +451,38 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial LOG_TRACE(getLogger("KeeperSnapshotManager"), "Building structure for children nodes"); - for (const auto & itr : storage.container) + if constexpr (!use_rocksdb) { - if (itr.key != "/") + for (const auto & itr : storage.container) { - auto parent_path = parentNodePath(itr.key); - storage.container.updateValue( - parent_path, [path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseNodeName(path)); }); - } - } - - for (const auto & itr : storage.container) - { - if (itr.key != "/") - { - if (itr.value.numChildren() != static_cast(itr.value.getChildren().size())) + if (itr.key != "/") { + auto parent_path = parentNodePath(itr.key); + storage.container.updateValue( + parent_path, [path = itr.key](typename Storage::Node & value) { value.addChild(getBaseNodeName(path)); }); + } + } + + for (const auto & itr : storage.container) + { + if (itr.key != "/") + { + if (itr.value.numChildren() != static_cast(itr.value.getChildren().size())) + { #ifdef NDEBUG - /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. - LOG_ERROR(getLogger("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" - " is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key); + /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. + LOG_ERROR(getLogger("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" + " is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key); #else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" - " is different from actual children size {} for node {}", - itr.value.numChildren(), itr.value.getChildren().size(), itr.key); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" + " is different from actual children size {} for node {}", + itr.value.numChildren(), itr.value.getChildren().size(), itr.key); #endif + } } } } - size_t active_sessions_size; readBinary(active_sessions_size, in); @@ -493,14 +499,14 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial size_t session_auths_size; readBinary(session_auths_size, in); - KeeperStorage::AuthIDs ids; + typename Storage::AuthIDs ids; size_t session_auth_counter = 0; while (session_auth_counter < session_auths_size) { String scheme, id; readBinary(scheme, in); readBinary(id, in); - ids.emplace_back(KeeperStorage::AuthID{scheme, id}); + ids.emplace_back(typename Storage::AuthID{scheme, id}); session_auth_counter++; } @@ -523,7 +529,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial } } -KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_) +template +KeeperStorageSnapshot::KeeperStorageSnapshot(Storage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_) : storage(storage_) , snapshot_meta(std::make_shared(up_to_log_idx_, 0, std::make_shared())) , session_id(storage->session_id_counter) @@ -540,8 +547,9 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t session_and_auth = storage->session_and_auth; } -KeeperStorageSnapshot::KeeperStorageSnapshot( - KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_) +template +KeeperStorageSnapshot::KeeperStorageSnapshot( + Storage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_) : storage(storage_) , snapshot_meta(snapshot_meta_) , session_id(storage->session_id_counter) @@ -558,12 +566,14 @@ KeeperStorageSnapshot::KeeperStorageSnapshot( session_and_auth = storage->session_and_auth; } -KeeperStorageSnapshot::~KeeperStorageSnapshot() +template +KeeperStorageSnapshot::~KeeperStorageSnapshot() { storage->disableSnapshotMode(); } -KeeperSnapshotManager::KeeperSnapshotManager( +template +KeeperSnapshotManager::KeeperSnapshotManager( size_t snapshots_to_keep_, const KeeperContextPtr & keeper_context_, bool compress_snapshots_zstd_, @@ -651,7 +661,8 @@ KeeperSnapshotManager::KeeperSnapshotManager( moveSnapshotsIfNeeded(); } -SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) +template +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) { ReadBufferFromNuraftBuffer reader(buffer); @@ -680,7 +691,8 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft: return snapshot_file_info; } -nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() +template +nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() { while (!existing_snapshots.empty()) { @@ -701,7 +713,8 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff return nullptr; } -nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const +template +nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const { const auto & [snapshot_path, snapshot_disk, size] = *existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; @@ -710,7 +723,8 @@ nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFrom return writer.getBuffer(); } -nuraft::ptr KeeperSnapshotManager::serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const +template +nuraft::ptr KeeperSnapshotManager::serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const { std::unique_ptr writer = std::make_unique(); auto * buffer_raw_ptr = writer.get(); @@ -720,13 +734,13 @@ nuraft::ptr KeeperSnapshotManager::serializeSnapshotToBuffer(con else compressed_writer = std::make_unique(*writer); - KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); + KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); compressed_writer->finalize(); return buffer_raw_ptr->getBuffer(); } - -bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr buffer) +template +bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr buffer) { static constexpr unsigned char ZSTD_COMPRESSED_MAGIC[4] = {0x28, 0xB5, 0x2F, 0xFD}; @@ -737,7 +751,8 @@ bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr buffer) return memcmp(magic_from_buffer, ZSTD_COMPRESSED_MAGIC, 4) == 0; } -SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr buffer) const +template +SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr buffer) const { bool is_zstd_compressed = isZstdCompressed(buffer); @@ -749,14 +764,15 @@ SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuff else compressed_reader = std::make_unique(*reader); - SnapshotDeserializationResult result; - result.storage = std::make_unique(storage_tick_time, superdigest, keeper_context, /* initialize_system_nodes */ false); - KeeperStorageSnapshot::deserialize(result, *compressed_reader, keeper_context); + SnapshotDeserializationResult result; + result.storage = std::make_unique(storage_tick_time, superdigest, keeper_context, /* initialize_system_nodes */ false); + KeeperStorageSnapshot::deserialize(result, *compressed_reader, keeper_context); result.storage->initializeSystemNodes(); return result; } -SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() +template +SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() { if (existing_snapshots.empty()) return {}; @@ -767,23 +783,27 @@ SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() return deserializeSnapshotFromBuffer(buffer); } -DiskPtr KeeperSnapshotManager::getDisk() const +template +DiskPtr KeeperSnapshotManager::getDisk() const { return keeper_context->getSnapshotDisk(); } -DiskPtr KeeperSnapshotManager::getLatestSnapshotDisk() const +template +DiskPtr KeeperSnapshotManager::getLatestSnapshotDisk() const { return keeper_context->getLatestSnapshotDisk(); } -void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() +template +void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() { while (existing_snapshots.size() > snapshots_to_keep) removeSnapshot(existing_snapshots.begin()->first); } -void KeeperSnapshotManager::moveSnapshotsIfNeeded() +template +void KeeperSnapshotManager::moveSnapshotsIfNeeded() { /// move snapshots to correct disks @@ -813,7 +833,8 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() } -void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) +template +void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) { auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) @@ -823,7 +844,8 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) existing_snapshots.erase(itr); } -SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) +template +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) { auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx(); auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); @@ -842,7 +864,7 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperS else compressed_writer = std::make_unique(*writer); - KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); + KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); compressed_writer->finalize(); compressed_writer->sync(); @@ -864,14 +886,16 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperS return snapshot_file_info; } -size_t KeeperSnapshotManager::getLatestSnapshotIndex() const +template +size_t KeeperSnapshotManager::getLatestSnapshotIndex() const { if (!existing_snapshots.empty()) return existing_snapshots.rbegin()->first; return 0; } -SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const +template +SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const { if (!existing_snapshots.empty()) { @@ -890,4 +914,10 @@ SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const return nullptr; } +template struct KeeperStorageSnapshot; +template class KeeperSnapshotManager; +#if USE_ROCKSDB +template struct KeeperStorageSnapshot; +template class KeeperSnapshotManager; +#endif } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 36bf6c8775c..be9ce386ab1 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -34,10 +34,11 @@ enum SnapshotVersion : uint8_t static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V6; /// What is stored in binary snapshot +template struct SnapshotDeserializationResult { /// Storage - KeeperStoragePtr storage; + std::unique_ptr storage; /// Snapshot metadata (up_to_log_idx and so on) SnapshotMetadataPtr snapshot_meta; /// Cluster config @@ -52,21 +53,31 @@ struct SnapshotDeserializationResult /// /// This representation of snapshot have to be serialized into NuRaft /// buffer and send over network or saved to file. +template struct KeeperStorageSnapshot { +#if USE_ROCKSDB + static constexpr bool use_rocksdb = std::is_same_v; +#else + static constexpr bool use_rocksdb = false; +#endif + public: - KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr); + KeeperStorageSnapshot(Storage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr); KeeperStorageSnapshot( - KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr); + Storage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr); + + KeeperStorageSnapshot(const KeeperStorageSnapshot&) = delete; + KeeperStorageSnapshot(KeeperStorageSnapshot&&) = default; ~KeeperStorageSnapshot(); - static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context); + static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context); - static void deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context); + static void deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context); - KeeperStorage * storage; + Storage * storage; SnapshotVersion version = CURRENT_SNAPSHOT_VERSION; /// Snapshot metadata @@ -77,11 +88,11 @@ public: /// so we have for loop for (i = 0; i < snapshot_container_size; ++i) { doSmth(begin + i); } size_t snapshot_container_size; /// Iterator to the start of the storage - KeeperStorage::Container::const_iterator begin; + Storage::Container::const_iterator begin; /// Active sessions and their timeouts SessionAndTimeout session_and_timeout; /// Sessions credentials - KeeperStorage::SessionAndAuth session_and_auth; + Storage::SessionAndAuth session_and_auth; /// ACLs cache for better performance. Without we cannot deserialize storage. std::unordered_map acl_map; /// Cluster config from snapshot, can be empty @@ -105,14 +116,16 @@ struct SnapshotFileInfo }; using SnapshotFileInfoPtr = std::shared_ptr; - -using KeeperStorageSnapshotPtr = std::shared_ptr; -using CreateSnapshotCallback = std::function(KeeperStorageSnapshotPtr &&, bool)>; - -using SnapshotMetaAndStorage = std::pair; +#if USE_ROCKSDB +using KeeperStorageSnapshotPtr = std::variant>, std::shared_ptr>>; +#else +using KeeperStorageSnapshotPtr = std::variant>>; +#endif +using CreateSnapshotCallback = std::function; /// Class responsible for snapshots serialization and deserialization. Each snapshot /// has it's path on disk and log index. +template class KeeperSnapshotManager { public: @@ -124,18 +137,18 @@ public: size_t storage_tick_time_ = 500); /// Restore storage from latest available snapshot - SnapshotDeserializationResult restoreFromLatestSnapshot(); + SnapshotDeserializationResult restoreFromLatestSnapshot(); /// Compress snapshot and serialize it to buffer - nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; + nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; /// Serialize already compressed snapshot to disk (return path) SnapshotFileInfoPtr serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); /// Serialize snapshot directly to disk - SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); + SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); - SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; + SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; /// Deserialize snapshot with log index up_to_log_idx from disk into compressed nuraft buffer. nuraft::ptr deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index d9b9ba528ee..a4aa1b18746 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -44,7 +44,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -KeeperStateMachine::KeeperStateMachine( +IKeeperStateMachine::IKeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const KeeperContextPtr & keeper_context_, @@ -52,12 +52,6 @@ KeeperStateMachine::KeeperStateMachine( CommitCallback commit_callback_, const std::string & superdigest_) : commit_callback(commit_callback_) - , snapshot_manager( - keeper_context_->getCoordinationSettings()->snapshots_to_keep, - keeper_context_, - keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format, - superdigest_, - keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds()) , responses_queue(responses_queue_) , snapshots_queue(snapshots_queue_) , min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache) @@ -68,6 +62,32 @@ KeeperStateMachine::KeeperStateMachine( { } +template +KeeperStateMachine::KeeperStateMachine( + ResponsesQueue & responses_queue_, + SnapshotsQueue & snapshots_queue_, + // const CoordinationSettingsPtr & coordination_settings_, + const KeeperContextPtr & keeper_context_, + KeeperSnapshotManagerS3 * snapshot_manager_s3_, + IKeeperStateMachine::CommitCallback commit_callback_, + const std::string & superdigest_) + : IKeeperStateMachine( + responses_queue_, + snapshots_queue_, + /// coordination_settings_, + keeper_context_, + snapshot_manager_s3_, + commit_callback_, + superdigest_), + snapshot_manager( + keeper_context_->getCoordinationSettings()->snapshots_to_keep, + keeper_context_, + keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format, + superdigest_, + keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds()) +{ +} + namespace { @@ -78,7 +98,8 @@ bool isLocalDisk(const IDisk & disk) } -void KeeperStateMachine::init() +template +void KeeperStateMachine::init() { /// Do everything without mutexes, no other threads exist. LOG_DEBUG(log, "Totally have {} snapshots", snapshot_manager.totalSnapshots()); @@ -123,7 +144,7 @@ void KeeperStateMachine::init() LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx); if (!storage) - storage = std::make_unique( + storage = std::make_unique( keeper_context->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context); } @@ -131,13 +152,13 @@ namespace { void assertDigest( - const KeeperStorage::Digest & expected, - const KeeperStorage::Digest & actual, + const KeeperStorageBase::Digest & expected, + const KeeperStorageBase::Digest & actual, const Coordination::ZooKeeperRequest & request, uint64_t log_idx, bool committing) { - if (!KeeperStorage::checkDigest(expected, actual)) + if (!KeeperStorageBase::checkDigest(expected, actual)) { LOG_FATAL( getLogger("KeeperStateMachine"), @@ -170,7 +191,8 @@ struct TSA_SCOPED_LOCKABLE LockGuardWithStats final } -nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) +template +nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) { auto result = nuraft::buffer::alloc(sizeof(log_idx)); nuraft::buffer_serializer ss(result); @@ -191,10 +213,10 @@ nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nur return result; } -std::shared_ptr KeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version) +std::shared_ptr IKeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version) { ReadBufferFromNuraftBuffer buffer(data); - auto request_for_session = std::make_shared(); + auto request_for_session = std::make_shared(); readIntBinary(request_for_session->session_id, buffer); int32_t length; @@ -267,7 +289,7 @@ std::shared_ptr KeeperStateMachine::parseReque request_for_session->digest.emplace(); readIntBinary(request_for_session->digest->version, buffer); - if (request_for_session->digest->version != KeeperStorage::DigestVersion::NO_DIGEST || !buffer.eof()) + if (request_for_session->digest->version != KeeperStorageBase::DigestVersion::NO_DIGEST || !buffer.eof()) readIntBinary(request_for_session->digest->value, buffer); } @@ -283,7 +305,8 @@ std::shared_ptr KeeperStateMachine::parseReque return request_for_session; } -bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & request_for_session) +template +bool KeeperStateMachine::preprocess(const KeeperStorageBase::RequestForSession & request_for_session) { const auto op_num = request_for_session.request->getOpNum(); if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) @@ -317,10 +340,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req return true; } -void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) +template +void KeeperStateMachine::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) { LockGuardWithStats lock(storage_and_responses_lock); - KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); + KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed); @@ -330,8 +354,9 @@ void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& req } } -KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( - const KeeperStorage::RequestForSession & request_for_session) +template +KeeperStorageBase::ResponseForSession KeeperStateMachine::processReconfiguration( + const KeeperStorageBase::RequestForSession & request_for_session) { ProfileEvents::increment(ProfileEvents::KeeperReconfigRequest); @@ -340,7 +365,7 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( const int64_t zxid = request_for_session.zxid; using enum Coordination::Error; - auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorage::ResponseForSession + auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorageBase::ResponseForSession { auto res = std::make_shared(); res->xid = request.xid; @@ -397,7 +422,8 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( return { session_id, std::move(response) }; } -nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) +template +nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data, true); if (!request_for_session->zxid) @@ -408,7 +434,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [&](const KeeperStorage::ResponseForSession & response) + auto try_push = [&](const KeeperStorageBase::ResponseForSession & response) { if (!responses_queue.push(response)) { @@ -430,7 +456,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n std::shared_ptr response = std::make_shared(); response->internal_id = session_id_request.internal_id; response->server_id = session_id_request.server_id; - KeeperStorage::ResponseForSession response_for_session; + KeeperStorageBase::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; response_for_session.request = request_for_session->request; @@ -451,7 +477,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } LockGuardWithStats lock(storage_and_responses_lock); - KeeperStorage::ResponsesForSessions responses_for_sessions + KeeperStorageBase::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -482,7 +508,8 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n return nullptr; } -bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) +template +bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx()); nuraft::ptr latest_snapshot_ptr; @@ -509,7 +536,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { /// deserialize and apply snapshot to storage LockGuardWithStats lock(storage_and_responses_lock); - SnapshotDeserializationResult snapshot_deserialization_result; + SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); else @@ -530,7 +557,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } -void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) +void IKeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) { std::lock_guard lock(cluster_config_lock); auto tmp = new_conf->serialize(); @@ -538,7 +565,7 @@ void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptrsetLastCommitIndex(log_idx); } -void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) +void IKeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) { /// Don't rollback anything until the first commit because nothing was preprocessed if (!keeper_context->localLogsPreprocessed()) @@ -554,7 +581,8 @@ void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) rollbackRequest(*request_for_session, false); } -void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing) +template +void KeeperStateMachine::rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) { if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; @@ -563,7 +591,8 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession storage->rollbackRequest(request_for_session.zxid, allow_missing); } -void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing) +template +void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) { if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; @@ -571,14 +600,15 @@ void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSe storage->rollbackRequest(request_for_session.zxid, allow_missing); } -nuraft::ptr KeeperStateMachine::last_snapshot() +nuraft::ptr IKeeperStateMachine::last_snapshot() { /// Just return the latest snapshot. std::lock_guard lock(snapshots_lock); return latest_snapshot_meta; } -void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) +template +void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) { LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx()); @@ -587,14 +617,15 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. LockGuardWithStats lock(storage_and_responses_lock); - snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); + snapshot_task.snapshot = std::make_shared>(storage.get(), snapshot_meta_copy, getClusterConfig()); } /// create snapshot task for background execution (in snapshot thread) - snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) + snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot_, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); bool ret = false; + auto && snapshot = std::get>>(std::move(snapshot_)); if (!execute_only_cleanup) { try @@ -683,7 +714,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res LOG_WARNING(log, "Cannot push snapshot task into queue"); } -void KeeperStateMachine::save_logical_snp_obj( +template +void KeeperStateMachine::save_logical_snp_obj( nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool /*is_first_obj*/, bool /*is_last_obj*/) { LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id); @@ -748,7 +780,7 @@ static int bufferFromFile(LoggerPtr log, const std::string & path, nuraft::ptr & data_out, bool & is_last_obj) { LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id); @@ -788,7 +820,8 @@ int KeeperStateMachine::read_logical_snp_obj( return 1; } -void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) +template +void KeeperStateMachine::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) { /// Pure local request, just process it with storage LockGuardWithStats lock(storage_and_responses_lock); @@ -804,103 +837,120 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi } } -void KeeperStateMachine::shutdownStorage() +template +void KeeperStateMachine::shutdownStorage() { LockGuardWithStats lock(storage_and_responses_lock); storage->finalize(); } -std::vector KeeperStateMachine::getDeadSessions() +template +std::vector KeeperStateMachine::getDeadSessions() { LockGuardWithStats lock(storage_and_responses_lock); return storage->getDeadSessions(); } -int64_t KeeperStateMachine::getNextZxid() const +template +int64_t KeeperStateMachine::getNextZxid() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getNextZXID(); } -KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const +template +KeeperStorageBase::Digest KeeperStateMachine::getNodesDigest() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesDigest(false); } -uint64_t KeeperStateMachine::getLastProcessedZxid() const +template +uint64_t KeeperStateMachine::getLastProcessedZxid() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getZXID(); } -uint64_t KeeperStateMachine::getNodesCount() const +template +uint64_t KeeperStateMachine::getNodesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesCount(); } -uint64_t KeeperStateMachine::getTotalWatchesCount() const +template +uint64_t KeeperStateMachine::getTotalWatchesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalWatchesCount(); } -uint64_t KeeperStateMachine::getWatchedPathsCount() const +template +uint64_t KeeperStateMachine::getWatchedPathsCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getWatchedPathsCount(); } -uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const +template +uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionsWithWatchesCount(); } -uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const +template +uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalEphemeralNodesCount(); } -uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const +template +uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionWithEphemeralNodesCount(); } -void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const +template +void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatches(buf); } -void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const +template +void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatchesByPath(buf); } -void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const +template +void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { LockGuardWithStats lock(storage_and_responses_lock); storage->dumpSessionsAndEphemerals(buf); } -uint64_t KeeperStateMachine::getApproximateDataSize() const +template +uint64_t KeeperStateMachine::getApproximateDataSize() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getApproximateDataSize(); } -uint64_t KeeperStateMachine::getKeyArenaSize() const +template +uint64_t KeeperStateMachine::getKeyArenaSize() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getArenaDataSize(); } -uint64_t KeeperStateMachine::getLatestSnapshotSize() const +template +uint64_t KeeperStateMachine::getLatestSnapshotSize() const { auto snapshot_info = [&] { @@ -923,7 +973,7 @@ uint64_t KeeperStateMachine::getLatestSnapshotSize() const return size; } -ClusterConfigPtr KeeperStateMachine::getClusterConfig() const +ClusterConfigPtr IKeeperStateMachine::getClusterConfig() const { std::lock_guard lock(cluster_config_lock); if (cluster_config) @@ -935,11 +985,18 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const return nullptr; } -void KeeperStateMachine::recalculateStorageStats() +template +void KeeperStateMachine::recalculateStorageStats() { LockGuardWithStats lock(storage_and_responses_lock); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); } + +template class KeeperStateMachine; +#if USE_ROCKSDB +template class KeeperStateMachine; +#endif + } diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 6357fd170df..6afd413d782 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -11,26 +11,24 @@ namespace DB { -using ResponsesQueue = ConcurrentBoundedQueue; +using ResponsesQueue = ConcurrentBoundedQueue; using SnapshotsQueue = ConcurrentBoundedQueue; -/// ClickHouse Keeper state machine. Wrapper for KeeperStorage. -/// Responsible for entries commit, snapshots creation and so on. -class KeeperStateMachine : public nuraft::state_machine +class IKeeperStateMachine : public nuraft::state_machine { public: - using CommitCallback = std::function; + using CommitCallback = std::function; - KeeperStateMachine( + IKeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, - CommitCallback commit_callback_ = {}, - const std::string & superdigest_ = ""); + CommitCallback commit_callback_, + const std::string & superdigest_); /// Read state from the latest snapshot - void init(); + virtual void init() = 0; enum ZooKeeperLogSerializationVersion { @@ -47,89 +45,66 @@ public: /// /// final - whether it's the final time we will fetch the request so we can safely remove it from cache /// serialization_version - information about which fields were parsed from the buffer so we can modify the buffer accordingly - std::shared_ptr parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr); + std::shared_ptr parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr); - bool preprocess(const KeeperStorage::RequestForSession & request_for_session); + virtual bool preprocess(const KeeperStorageBase::RequestForSession & request_for_session) = 0; - nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; - - nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT - - /// Save new cluster config to our snapshot (copy of the config stored in StateManager) void commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) override; /// NOLINT void rollback(uint64_t log_idx, nuraft::buffer & data) override; // allow_missing - whether the transaction we want to rollback can be missing from storage // (can happen in case of exception during preprocessing) - void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing); - - void rollbackRequestNoLock( - const KeeperStorage::RequestForSession & request_for_session, - bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS; + virtual void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) = 0; uint64_t last_commit_index() override { return keeper_context->lastCommittedIndex(); } - /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. - bool apply_snapshot(nuraft::snapshot & s) override; - nuraft::ptr last_snapshot() override; /// Create new snapshot from current state. - void create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override; + void create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override = 0; /// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot. - void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override; + void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override = 0; - /// Better name is `serialize snapshot` -- save existing snapshot (created by create_snapshot) into - /// in-memory buffer data_out. int read_logical_snp_obj( nuraft::snapshot & s, void *& user_snp_ctx, uint64_t obj_id, nuraft::ptr & data_out, bool & is_last_obj) override; - // This should be used only for tests or keeper-data-dumper because it violates - // TSA -- we can't acquire the lock outside of this class or return a storage under lock - // in a reasonable way. - KeeperStorage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS - { - return *storage; - } - - void shutdownStorage(); + virtual void shutdownStorage() = 0; ClusterConfigPtr getClusterConfig() const; - /// Process local read request - void processReadRequest(const KeeperStorage::RequestForSession & request_for_session); + virtual void processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) = 0; - std::vector getDeadSessions(); + virtual std::vector getDeadSessions() = 0; - int64_t getNextZxid() const; + virtual int64_t getNextZxid() const = 0; - KeeperStorage::Digest getNodesDigest() const; + virtual KeeperStorageBase::Digest getNodesDigest() const = 0; /// Introspection functions for 4lw commands - uint64_t getLastProcessedZxid() const; + virtual uint64_t getLastProcessedZxid() const = 0; - uint64_t getNodesCount() const; - uint64_t getTotalWatchesCount() const; - uint64_t getWatchedPathsCount() const; - uint64_t getSessionsWithWatchesCount() const; + virtual uint64_t getNodesCount() const = 0; + virtual uint64_t getTotalWatchesCount() const = 0; + virtual uint64_t getWatchedPathsCount() const = 0; + virtual uint64_t getSessionsWithWatchesCount() const = 0; - void dumpWatches(WriteBufferFromOwnString & buf) const; - void dumpWatchesByPath(WriteBufferFromOwnString & buf) const; - void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const; + virtual void dumpWatches(WriteBufferFromOwnString & buf) const = 0; + virtual void dumpWatchesByPath(WriteBufferFromOwnString & buf) const = 0; + virtual void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const = 0; - uint64_t getSessionWithEphemeralNodesCount() const; - uint64_t getTotalEphemeralNodesCount() const; - uint64_t getApproximateDataSize() const; - uint64_t getKeyArenaSize() const; - uint64_t getLatestSnapshotSize() const; + virtual uint64_t getSessionWithEphemeralNodesCount() const = 0; + virtual uint64_t getTotalEphemeralNodesCount() const = 0; + virtual uint64_t getApproximateDataSize() const = 0; + virtual uint64_t getKeyArenaSize() const = 0; + virtual uint64_t getLatestSnapshotSize() const = 0; - void recalculateStorageStats(); + virtual void recalculateStorageStats() = 0; - void reconfigure(const KeeperStorage::RequestForSession& request_for_session); + virtual void reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) = 0; -private: +protected: CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. @@ -137,12 +112,9 @@ private: std::shared_ptr latest_snapshot_info; nuraft::ptr latest_snapshot_buf = nullptr; - /// Main state machine logic - KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); + CoordinationSettingsPtr coordination_settings; /// Save/Load and Serialize/Deserialize logic for snapshots. - KeeperSnapshotManager snapshot_manager; - /// Put processed responses into this queue ResponsesQueue & responses_queue; @@ -159,7 +131,7 @@ private: /// for request. mutable std::mutex storage_and_responses_lock; - std::unordered_map>> parsed_request_cache; + std::unordered_map>> parsed_request_cache; uint64_t min_request_size_to_cache{0}; /// we only need to protect the access to the map itself /// requests can be modified from anywhere without lock because a single request @@ -181,7 +153,104 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; - KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session) - TSA_REQUIRES(storage_and_responses_lock); + virtual KeeperStorageBase::ResponseForSession processReconfiguration( + const KeeperStorageBase::RequestForSession& request_for_session) + TSA_REQUIRES(storage_and_responses_lock) = 0; + +}; + +/// ClickHouse Keeper state machine. Wrapper for KeeperStorage. +/// Responsible for entries commit, snapshots creation and so on. +template +class KeeperStateMachine : public IKeeperStateMachine +{ +public: + /// using CommitCallback = std::function; + + KeeperStateMachine( + ResponsesQueue & responses_queue_, + SnapshotsQueue & snapshots_queue_, + /// const CoordinationSettingsPtr & coordination_settings_, + const KeeperContextPtr & keeper_context_, + KeeperSnapshotManagerS3 * snapshot_manager_s3_, + CommitCallback commit_callback_ = {}, + const std::string & superdigest_ = ""); + + /// Read state from the latest snapshot + void init() override; + + bool preprocess(const KeeperStorageBase::RequestForSession & request_for_session) override; + + nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; + + nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT + + // allow_missing - whether the transaction we want to rollback can be missing from storage + // (can happen in case of exception during preprocessing) + void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) override; + + void rollbackRequestNoLock( + const KeeperStorageBase::RequestForSession & request_for_session, + bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS; + + /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. + bool apply_snapshot(nuraft::snapshot & s) override; + + /// Create new snapshot from current state. + void create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override; + + /// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot. + void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override; + + // This should be used only for tests or keeper-data-dumper because it violates + // TSA -- we can't acquire the lock outside of this class or return a storage under lock + // in a reasonable way. + Storage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS + { + return *storage; + } + + void shutdownStorage() override; + + /// Process local read request + void processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) override; + + std::vector getDeadSessions() override; + + int64_t getNextZxid() const override; + + KeeperStorageBase::Digest getNodesDigest() const override; + + /// Introspection functions for 4lw commands + uint64_t getLastProcessedZxid() const override; + + uint64_t getNodesCount() const override; + uint64_t getTotalWatchesCount() const override; + uint64_t getWatchedPathsCount() const override; + uint64_t getSessionsWithWatchesCount() const override; + + void dumpWatches(WriteBufferFromOwnString & buf) const override; + void dumpWatchesByPath(WriteBufferFromOwnString & buf) const override; + void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const override; + + uint64_t getSessionWithEphemeralNodesCount() const override; + uint64_t getTotalEphemeralNodesCount() const override; + uint64_t getApproximateDataSize() const override; + uint64_t getKeyArenaSize() const override; + uint64_t getLatestSnapshotSize() const override; + + void recalculateStorageStats() override; + + void reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) override; + +private: + /// Main state machine logic + std::unique_ptr storage; //TSA_PT_GUARDED_BY(storage_and_responses_lock); + + /// Save/Load and Serialize/Deserialize logic for snapshots. + KeeperSnapshotManager snapshot_manager; + + KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session) + TSA_REQUIRES(storage_and_responses_lock) override; }; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index efb57ae96e2..acdf209baae 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -64,10 +65,11 @@ String getSHA1(const String & userdata) return String{digest_id.begin(), digest_id.end()}; } +template bool fixupACL( const std::vector & request_acls, int64_t session_id, - const KeeperStorage::UncommittedState & uncommitted_state, + const UncommittedState & uncommitted_state, std::vector & result_acls) { if (request_acls.empty()) @@ -80,7 +82,7 @@ bool fixupACL( { uncommitted_state.forEachAuthInSession( session_id, - [&](const KeeperStorage::AuthID & auth_id) + [&](const KeeperStorageBase::AuthID & auth_id) { valid_found = true; Coordination::ACL new_acl = request_acl; @@ -111,10 +113,10 @@ bool fixupACL( return valid_found; } -KeeperStorage::ResponsesForSessions processWatchesImpl( - const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) +KeeperStorageBase::ResponsesForSessions processWatchesImpl( + const String & path, KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches, Coordination::Event event_type) { - KeeperStorage::ResponsesForSessions result; + KeeperStorageBase::ResponsesForSessions result; auto watch_it = watches.find(path); if (watch_it != watches.end()) { @@ -125,7 +127,7 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watch_response->type = event_type; watch_response->state = Coordination::State::CONNECTED; for (auto watcher_session : watch_it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); + result.push_back(KeeperStorageBase::ResponseForSession{watcher_session, watch_response}); watches.erase(watch_it); } @@ -161,7 +163,7 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watch_list_response->state = Coordination::State::CONNECTED; for (auto watcher_session : watch_it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + result.push_back(KeeperStorageBase::ResponseForSession{watcher_session, watch_list_response}); list_watches.erase(watch_it); } @@ -170,7 +172,8 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( } // When this function is updated, update CURRENT_DIGEST_VERSION!! -uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node) +template +uint64_t calculateDigest(std::string_view path, const Node & node) { SipHash hash; @@ -205,7 +208,71 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node } -KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other) +void KeeperRocksNodeInfo::copyStats(const Coordination::Stat & stat) +{ + czxid = stat.czxid; + mzxid = stat.mzxid; + pzxid = stat.pzxid; + + mtime = stat.mtime; + setCtime(stat.ctime); + + version = stat.version; + cversion = stat.cversion; + aversion = stat.aversion; + + if (stat.ephemeralOwner == 0) + { + is_ephemeral_and_ctime.is_ephemeral = false; + setNumChildren(stat.numChildren); + } + else + { + setEphemeralOwner(stat.ephemeralOwner); + } +} + +void KeeperRocksNode::invalidateDigestCache() const +{ + if (serialized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "We modify node after serialized it"); + digest = 0; +} + +UInt64 KeeperRocksNode::getDigest(std::string_view path) const +{ + if (!digest) + digest = calculateDigest(path, *this); + return digest; +} + +String KeeperRocksNode::getEncodedString() +{ + if (serialized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "We modify node after serialized it"); + serialized = true; + + WriteBufferFromOwnString buffer; + const KeeperRocksNodeInfo & node_info = *this; + writePODBinary(node_info, buffer); + writeBinary(getData(), buffer); + return buffer.str(); +} + +void KeeperRocksNode::decodeFromString(const String &buffer_str) +{ + ReadBufferFromOwnString buffer(buffer_str); + KeeperRocksNodeInfo & node_info = *this; + readPODBinary(node_info, buffer); + readVarUInt(data_size, buffer); + if (data_size) + { + data = std::unique_ptr(new char[data_size]); + buffer.readStrict(data.get(), data_size); + } +} + +KeeperMemNode & KeeperMemNode::operator=(const KeeperMemNode & other) { if (this == &other) return *this; @@ -233,12 +300,12 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other) return *this; } -KeeperStorage::Node::Node(const Node & other) +KeeperMemNode::KeeperMemNode(const KeeperMemNode & other) { *this = other; } -KeeperStorage::Node & KeeperStorage::Node::operator=(Node && other) noexcept +KeeperMemNode & KeeperMemNode::operator=(KeeperMemNode && other) noexcept { if (this == &other) return *this; @@ -265,17 +332,17 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(Node && other) noexcept return *this; } -KeeperStorage::Node::Node(Node && other) noexcept +KeeperMemNode::KeeperMemNode(KeeperMemNode && other) noexcept { *this = std::move(other); } -bool KeeperStorage::Node::empty() const +bool KeeperMemNode::empty() const { return data_size == 0 && mzxid == 0; } -void KeeperStorage::Node::copyStats(const Coordination::Stat & stat) +void KeeperMemNode::copyStats(const Coordination::Stat & stat) { czxid = stat.czxid; mzxid = stat.mzxid; @@ -299,7 +366,7 @@ void KeeperStorage::Node::copyStats(const Coordination::Stat & stat) } } -void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) const +void KeeperMemNode::setResponseStat(Coordination::Stat & response_stat) const { response_stat.czxid = czxid; response_stat.mzxid = mzxid; @@ -315,12 +382,12 @@ void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) co } -uint64_t KeeperStorage::Node::sizeInBytes() const +uint64_t KeeperMemNode::sizeInBytes() const { - return sizeof(Node) + children.size() * sizeof(StringRef) + data_size; + return sizeof(KeeperMemNode) + children.size() * sizeof(StringRef) + data_size; } -void KeeperStorage::Node::setData(const String & new_data) +void KeeperMemNode::setData(const String & new_data) { data_size = static_cast(new_data.size()); if (data_size != 0) @@ -330,22 +397,22 @@ void KeeperStorage::Node::setData(const String & new_data) } } -void KeeperStorage::Node::addChild(StringRef child_path) +void KeeperMemNode::addChild(StringRef child_path) { children.insert(child_path); } -void KeeperStorage::Node::removeChild(StringRef child_path) +void KeeperMemNode::removeChild(StringRef child_path) { children.erase(child_path); } -void KeeperStorage::Node::invalidateDigestCache() const +void KeeperMemNode::invalidateDigestCache() const { cached_digest = 0; } -UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const +UInt64 KeeperMemNode::getDigest(const std::string_view path) const { if (cached_digest == 0) cached_digest = calculateDigest(path, *this); @@ -353,7 +420,7 @@ UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const return cached_digest; }; -void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other) +void KeeperMemNode::shallowCopy(const KeeperMemNode & other) { czxid = other.czxid; mzxid = other.mzxid; @@ -380,19 +447,25 @@ void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other) cached_digest = other.cached_digest; } -KeeperStorage::KeeperStorage( + +template +KeeperStorage::KeeperStorage( int64_t tick_time_ms, const String & superdigest_, const KeeperContextPtr & keeper_context_, const bool initialize_system_nodes) : session_expiry_queue(tick_time_ms), keeper_context(keeper_context_), superdigest(superdigest_) { + if constexpr (use_rocksdb) + container.initialize(keeper_context); Node root_node; container.insert("/", root_node); - addDigest(root_node, "/"); + if constexpr (!use_rocksdb) + addDigest(root_node, "/"); if (initialize_system_nodes) initializeSystemNodes(); } -void KeeperStorage::initializeSystemNodes() +template +void KeeperStorage::initializeSystemNodes() { if (initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes initialized twice"); @@ -404,21 +477,25 @@ void KeeperStorage::initializeSystemNodes() container.insert(keeper_system_path, system_node); // store digest for the empty node because we won't update // its stats - addDigest(system_node, keeper_system_path); + if constexpr (!use_rocksdb) + addDigest(system_node, keeper_system_path); // update root and the digest based on it auto current_root_it = container.find("/"); chassert(current_root_it != container.end()); - removeDigest(current_root_it->value, "/"); + if constexpr (!use_rocksdb) + removeDigest(current_root_it->value, "/"); auto updated_root_it = container.updateValue( "/", [](KeeperStorage::Node & node) { node.increaseNumChildren(); - node.addChild(getBaseNodeName(keeper_system_path)); + if constexpr (!use_rocksdb) + node.addChild(getBaseNodeName(keeper_system_path)); } ); - addDigest(updated_root_it->value, "/"); + if constexpr (!use_rocksdb) + addDigest(updated_root_it->value, "/"); } // insert child system nodes @@ -427,17 +504,22 @@ void KeeperStorage::initializeSystemNodes() chassert(path.starts_with(keeper_system_path)); Node child_system_node; child_system_node.setData(data); - auto [map_key, _] = container.insert(std::string{path}, child_system_node); - /// Take child path from key owned by map. - auto child_path = getBaseNodeName(map_key->getKey()); - container.updateValue( - parentNodePath(StringRef(path)), - [child_path](auto & parent) - { - // don't update stats so digest is okay - parent.addChild(child_path); - } - ); + if constexpr (use_rocksdb) + container.insert(std::string{path}, child_system_node); + else + { + auto [map_key, _] = container.insert(std::string{path}, child_system_node); + /// Take child path from key owned by map. + auto child_path = getBaseNodeName(map_key->getKey()); + container.updateValue( + parentNodePath(StringRef(path)), + [child_path](auto & parent) + { + // don't update stats so digest is okay + parent.addChild(child_path); + } + ); + } } initialized = true; @@ -454,12 +536,13 @@ struct Overloaded : Ts... template Overloaded(Ts...) -> Overloaded; -std::shared_ptr KeeperStorage::UncommittedState::tryGetNodeFromStorage(StringRef path) const +template +std::shared_ptr KeeperStorage::UncommittedState::tryGetNodeFromStorage(StringRef path) const { if (auto node_it = storage.container.find(path); node_it != storage.container.end()) { const auto & committed_node = node_it->value; - auto node = std::make_shared(); + auto node = std::make_shared::Node>(); node->shallowCopy(committed_node); return node; } @@ -467,7 +550,8 @@ std::shared_ptr KeeperStorage::UncommittedState::tryGetNode return nullptr; } -void KeeperStorage::UncommittedState::applyDelta(const Delta & delta) +template +void KeeperStorage::UncommittedState::applyDelta(const Delta & delta) { chassert(!delta.path.empty()); if (!nodes.contains(delta.path)) @@ -514,7 +598,8 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta) delta.operation); } -bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, std::function predicate) const +template +bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, std::function predicate) const { const auto check_auth = [&](const auto & auth_ids) { @@ -552,7 +637,8 @@ bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, return check_auth(auth_it->second); } -void KeeperStorage::UncommittedState::addDelta(Delta new_delta) +template +void KeeperStorage::UncommittedState::addDelta(Delta new_delta) { const auto & added_delta = deltas.emplace_back(std::move(new_delta)); @@ -572,13 +658,15 @@ void KeeperStorage::UncommittedState::addDelta(Delta new_delta) } } -void KeeperStorage::UncommittedState::addDeltas(std::vector new_deltas) +template +void KeeperStorage::UncommittedState::addDeltas(std::vector new_deltas) { for (auto & delta : new_deltas) addDelta(std::move(delta)); } -void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) +template +void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) { chassert(deltas.empty() || deltas.front().zxid >= commit_zxid); @@ -638,7 +726,8 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) } } -void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) +template +void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) { // we can only rollback the last zxid (if there is any) // if there is a delta with a larger zxid, we have invalid state @@ -734,7 +823,8 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) } } -std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) const +template +std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) const { if (auto node_it = nodes.find(path.toView()); node_it != nodes.end()) return node_it->second.node; @@ -742,7 +832,8 @@ std::shared_ptr KeeperStorage::UncommittedState::getNode(St return tryGetNodeFromStorage(path); } -Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const +template +Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const { if (auto node_it = nodes.find(path.toView()); node_it != nodes.end()) return node_it->second.acls; @@ -754,7 +845,8 @@ Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) cons return storage.acl_map.convertNumber(node_it->value.acl_id); } -void KeeperStorage::UncommittedState::forEachAuthInSession(int64_t session_id, std::function func) const +template +void KeeperStorage::UncommittedState::forEachAuthInSession(int64_t session_id, std::function func) const { const auto call_for_each_auth = [&func](const auto & auth_ids) { @@ -793,7 +885,8 @@ namespace } -void KeeperStorage::applyUncommittedState(KeeperStorage & other, int64_t last_log_idx) +template +void KeeperStorage::applyUncommittedState(KeeperStorage & other, int64_t last_log_idx) { std::unordered_set zxids_to_apply; for (const auto & transaction : uncommitted_transactions) @@ -819,7 +912,8 @@ void KeeperStorage::applyUncommittedState(KeeperStorage & other, int64_t last_lo } } -Coordination::Error KeeperStorage::commit(int64_t commit_zxid) +template +Coordination::Error KeeperStorage::commit(int64_t commit_zxid) { // Deltas are added with increasing ZXIDs // If there are no deltas for the commit_zxid (e.g. read requests), we instantly return @@ -833,7 +927,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) auto result = std::visit( [&, &path = delta.path](DeltaType & operation) -> Coordination::Error { - if constexpr (std::same_as) + if constexpr (std::same_as) { if (!createNode( path, @@ -844,7 +938,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { auto node_it = container.find(path); if (node_it == container.end()) @@ -853,20 +947,22 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) if (operation.version != -1 && operation.version != node_it->value.version) onStorageInconsistency(); - removeDigest(node_it->value, path); + if constexpr (!use_rocksdb) + removeDigest(node_it->value, path); auto updated_node = container.updateValue(path, operation.update_fn); - addDigest(updated_node->value, path); + if constexpr (!use_rocksdb) + addDigest(updated_node->value, path); return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { if (!removeNode(path, operation.version)) onStorageInconsistency(); return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { auto node_it = container.find(path); if (node_it == container.end()) @@ -880,18 +976,18 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) uint64_t acl_id = acl_map.convertACLs(operation.acls); acl_map.addUsage(acl_id); - container.updateValue(path, [acl_id](KeeperStorage::Node & node) { node.acl_id = acl_id; }); + container.updateValue(path, [acl_id](Node & node) { node.acl_id = acl_id; }); return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) return operation.error; - else if constexpr (std::same_as) + else if constexpr (std::same_as) { finish_subdelta = true; return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); return Coordination::Error::ZOK; @@ -918,7 +1014,8 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) return Coordination::Error::ZOK; } -bool KeeperStorage::createNode( +template +bool KeeperStorage::createNode( const std::string & path, String data, const Coordination::Stat & stat, @@ -936,7 +1033,7 @@ bool KeeperStorage::createNode( if (container.contains(path)) return false; - KeeperStorage::Node created_node; + Node created_node; uint64_t acl_id = acl_map.convertACLs(node_acls); acl_map.addUsage(acl_id); @@ -944,23 +1041,31 @@ bool KeeperStorage::createNode( created_node.acl_id = acl_id; created_node.copyStats(stat); created_node.setData(data); - auto [map_key, _] = container.insert(path, created_node); - /// Take child path from key owned by map. - auto child_path = getBaseNodeName(map_key->getKey()); - container.updateValue( - parent_path, - [child_path](KeeperStorage::Node & parent) - { - parent.addChild(child_path); - chassert(parent.numChildren() == static_cast(parent.getChildren().size())); - } - ); + if constexpr (use_rocksdb) + { + container.insert(path, created_node); + } + else + { + auto [map_key, _] = container.insert(path, created_node); + /// Take child path from key owned by map. + auto child_path = getBaseNodeName(map_key->getKey()); + container.updateValue( + parent_path, + [child_path](KeeperMemNode & parent) + { + parent.addChild(child_path); + chassert(parent.numChildren() == static_cast(parent.getChildren().size())); + } + ); - addDigest(map_key->getMapped()->value, map_key->getKey().toView()); + addDigest(map_key->getMapped()->value, map_key->getKey().toView()); + } return true; }; -bool KeeperStorage::removeNode(const std::string & path, int32_t version) +template +bool KeeperStorage::removeNode(const std::string & path, int32_t version) { auto node_it = container.find(path); if (node_it == container.end()) @@ -976,71 +1081,84 @@ bool KeeperStorage::removeNode(const std::string & path, int32_t version) prev_node.shallowCopy(node_it->value); acl_map.removeUsage(node_it->value.acl_id); - container.updateValue( - parentNodePath(path), - [child_basename = getBaseNodeName(node_it->key)](KeeperStorage::Node & parent) - { - parent.removeChild(child_basename); - chassert(parent.numChildren() == static_cast(parent.getChildren().size())); - } - ); + if constexpr (use_rocksdb) + container.erase(path); + else + { + container.updateValue( + parentNodePath(path), + [child_basename = getBaseNodeName(node_it->key)](KeeperMemNode & parent) + { + parent.removeChild(child_basename); + chassert(parent.numChildren() == static_cast(parent.getChildren().size())); + } + ); - container.erase(path); + container.erase(path); - removeDigest(prev_node, path); + removeDigest(prev_node, path); + } return true; } +template struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) : zk_request(zk_request_) { } - virtual Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const = 0; - virtual std::vector - preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const + + virtual Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const = 0; + + virtual std::vector + preprocess(Storage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const { return {}; } // process the request using locally committed data virtual Coordination::ZooKeeperResponsePtr - processLocal(KeeperStorage & /*storage*/, int64_t /*zxid*/) const + processLocal(Storage & /*storage*/, int64_t /*zxid*/) const { throw Exception{DB::ErrorCodes::LOGICAL_ERROR, "Cannot process the request locally"}; } - virtual KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const + virtual KeeperStorageBase::ResponsesForSessions + processWatches(KeeperStorageBase::Watches & /*watches*/, KeeperStorageBase::Watches & /*list_watches*/) const { return {}; } - virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } + + virtual bool checkAuth(Storage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } virtual ~KeeperStorageRequestProcessor() = default; }; -struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t zxid) const override + process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); response_ptr->error = storage.commit(zxid); return response_ptr; } }; -struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + Coordination::ZooKeeperResponsePtr - process(KeeperStorage & /* storage */, int64_t /* zxid */) const override + process(Storage & /* storage */, int64_t /* zxid */) const override { - auto response = zk_request->makeResponse(); + auto response = this->zk_request->makeResponse(); dynamic_cast(*response).path - = dynamic_cast(*zk_request).path; + = dynamic_cast(*this->zk_request).path; return response; } }; @@ -1048,7 +1166,8 @@ struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProc namespace { -Coordination::ACLs getNodeACLs(KeeperStorage & storage, StringRef path, bool is_local) +template +Coordination::ACLs getNodeACLs(Storage & storage, StringRef path, bool is_local) { if (is_local) { @@ -1076,7 +1195,8 @@ void handleSystemNodeModification(const KeeperContext & keeper_context, std::str } -bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) +template +bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) { const auto node_acls = getNodeACLs(*this, path, is_local); if (node_acls.empty()) @@ -1103,7 +1223,8 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session return false; } -void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::string & path) +template +void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::string & path) { auto ephemerals_it = ephemerals.find(session_id); if (ephemerals_it == ephemerals.end()) @@ -1114,43 +1235,44 @@ void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::strin ephemerals.erase(ephemerals_it); } -struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); + return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); } - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - auto path = zk_request->getPath(); + auto path = this->zk_request->getPath(); return storage.checkACL(parentNodePath(path), Coordination::ACL::Create, session_id, is_local); } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperCreateRequest); - Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*this->zk_request); - std::vector new_deltas; + std::vector new_deltas; auto parent_path = parentNodePath(request.path); auto parent_node = storage.uncommitted_state.getNode(parent_path); if (parent_node == nullptr) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; else if (parent_node->isEphemeral()) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; std::string path_created = request.path; if (request.is_sequential) { if (request.not_exists) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; auto seq_num = parent_node->seqNum(); @@ -1166,30 +1288,30 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr auto error_msg = fmt::format("Trying to create a node inside the internal Keeper path ({}) which is not allowed. Path: {}", keeper_system_path, path_created); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } if (storage.uncommitted_state.getNode(path_created)) { - if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) + if (this->zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) return new_deltas; - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; } if (getBaseNodeName(path_created).size == 0) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; Coordination::ACLs node_acls; if (!fixupACL(request.acls, session_id, storage.uncommitted_state, node_acls)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; if (request.is_ephemeral) storage.ephemerals[session_id].emplace(path_created); int32_t parent_cversion = request.parent_cversion; - auto parent_update = [parent_cversion, zxid](KeeperStorage::Node & node) + auto parent_update = [parent_cversion, zxid](Storage::Node & node) { /// Increment sequential number even if node is not sequential node.increaseSeqNum(); @@ -1202,7 +1324,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr node.increaseNumChildren(); }; - new_deltas.emplace_back(std::string{parent_path}, zxid, KeeperStorage::UpdateNodeDelta{std::move(parent_update)}); + new_deltas.emplace_back(std::string{parent_path}, zxid, typename Storage::UpdateNodeDelta{std::move(parent_update)}); Coordination::Stat stat; stat.czxid = zxid; @@ -1219,20 +1341,20 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr new_deltas.emplace_back( std::move(path_created), zxid, - KeeperStorage::CreateNodeDelta{stat, std::move(node_acls), request.data}); + typename Storage::CreateNodeDelta{stat, std::move(node_acls), request.data}); digest = storage.calculateNodesDigest(digest, new_deltas); return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); if (storage.uncommitted_state.deltas.begin()->zxid != zxid) { - response.path_created = zk_request->getPath(); + response.path_created = this->zk_request->getPath(); response.error = Coordination::Error::ZOK; return response_ptr; } @@ -1248,7 +1370,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr deltas.begin(), deltas.end(), [zxid](const auto & delta) - { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); + { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); response.path_created = create_delta_it->path; response.error = Coordination::Error::ZOK; @@ -1256,20 +1378,21 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr } }; -struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperGetRequest); - Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*this->zk_request); if (request.path == Coordination::keeper_api_feature_flags_path || request.path == Coordination::keeper_config_path @@ -1277,17 +1400,17 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce return {}; if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1327,40 +1450,42 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperGetRequest); return processImpl(storage, zxid); } }; -struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(parentNodePath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); + return storage.checkACL(parentNodePath(this->zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperRemoveRequest); - Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*this->zk_request); - std::vector new_deltas; + std::vector new_deltas; if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH) { auto error_msg = fmt::format("Trying to delete an internal Keeper path ({}) which is not allowed", request.path); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } const auto update_parent_pzxid = [&]() @@ -1372,9 +1497,9 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr new_deltas.emplace_back( std::string{parent_path}, zxid, - KeeperStorage::UpdateNodeDelta + typename Storage::UpdateNodeDelta { - [zxid](KeeperStorage::Node & parent) + [zxid](Storage::Node & parent) { parent.pzxid = std::max(parent.pzxid, zxid); } @@ -1388,12 +1513,12 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr { if (request.restored_from_zookeeper_log) update_parent_pzxid(); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; } else if (request.version != -1 && request.version != node->version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; else if (node->numChildren() != 0) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNOTEMPTY}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNOTEMPTY}}; if (request.restored_from_zookeeper_log) update_parent_pzxid(); @@ -1401,13 +1526,13 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr new_deltas.emplace_back( std::string{parentNodePath(request.path)}, zxid, - KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) + typename Storage::UpdateNodeDelta{[](typename Storage::Node & parent) { ++parent.cversion; parent.decreaseNumChildren(); }}); - new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version, node->ephemeralOwner()}); + new_deltas.emplace_back(request.path, zxid, typename Storage::RemoveNodeDelta{request.version, node->ephemeralOwner()}); if (node->isEphemeral()) storage.unregisterEphemeralPath(node->ephemeralOwner(), request.path); @@ -1417,44 +1542,45 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); response.error = storage.commit(zxid); return response_ptr; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); + return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); } }; -struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperExistsRequest); - Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*this->zk_request); if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperExistsResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1483,55 +1609,57 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperExistsRequest); return processImpl(storage, zxid); } }; -struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperSetRequest); - Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetRequest & request = dynamic_cast(*this->zk_request); - std::vector new_deltas; + std::vector new_deltas; if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH) { auto error_msg = fmt::format("Trying to update an internal Keeper path ({}) which is not allowed", request.path); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; auto node = storage.uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; new_deltas.emplace_back( request.path, zxid, - KeeperStorage::UpdateNodeDelta{ - [zxid, data = request.data, time](KeeperStorage::Node & value) + typename Storage::UpdateNodeDelta{ + [zxid, data = request.data, time](typename Storage::Node & value) { value.version++; value.mzxid = zxid; @@ -1543,9 +1671,9 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce new_deltas.emplace_back( parentNodePath(request.path).toString(), zxid, - KeeperStorage::UpdateNodeDelta + typename Storage::UpdateNodeDelta { - [](KeeperStorage::Node & parent) + [](Storage::Node & parent) { parent.cversion++; } @@ -1556,13 +1684,13 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { auto & container = storage.container; - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperSetResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetRequest & request = dynamic_cast(*this->zk_request); if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { @@ -1580,40 +1708,41 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce return response_ptr; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); + return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); } }; -struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperListRequest); - Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperListRequest & request = dynamic_cast(*this->zk_request); if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperListRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1640,33 +1769,55 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if (path_prefix.empty()) throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Path cannot be empty"); - const auto & children = node_it->value.getChildren(); + const auto & get_children = [&]() + { + if constexpr (Storage::use_rocksdb) + return container.getChildren(request.path); + else + return node_it->value.getChildren(); + }; + const auto & children = get_children(); response.names.reserve(children.size()); - const auto add_child = [&](const auto child) + const auto add_child = [&](const auto & child) { using enum Coordination::ListRequestType; auto list_request_type = ALL; if (auto * filtered_list = dynamic_cast(&request)) + { list_request_type = filtered_list->list_request_type; + } if (list_request_type == ALL) return true; - auto child_path = (std::filesystem::path(request.path) / child.toView()).generic_string(); - auto child_it = container.find(child_path); - if (child_it == container.end()) - onStorageInconsistency(); + bool is_ephemeral; + if constexpr (!Storage::use_rocksdb) + { + auto child_path = (std::filesystem::path(request.path) / child.toView()).generic_string(); + auto child_it = container.find(child_path); + if (child_it == container.end()) + onStorageInconsistency(); + is_ephemeral = child_it->value.isEphemeral(); + } + else + { + is_ephemeral = child.second.isEphemeral(); + } - const auto is_ephemeral = child_it->value.isEphemeral(); return (is_ephemeral && list_request_type == EPHEMERAL_ONLY) || (!is_ephemeral && list_request_type == PERSISTENT_ONLY); }; - for (const auto child : children) + for (const auto & child : children) { if (add_child(child)) - response.names.push_back(child.toString()); + { + if constexpr (Storage::use_rocksdb) + response.names.push_back(child.first); + else + response.names.push_back(child.toString()); + } } node_it->value.setResponseStat(response.stat); @@ -1676,63 +1827,64 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperListRequest); return processImpl(storage, zxid); } }; -struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { explicit KeeperStorageCheckRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) - : KeeperStorageRequestProcessor(zk_request_) + : KeeperStorageRequestProcessor(zk_request_) { - check_not_exists = zk_request->getOpNum() == Coordination::OpNum::CheckNotExists; + check_not_exists = this->zk_request->getOpNum() == Coordination::OpNum::CheckNotExists; } - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - auto path = zk_request->getPath(); + auto path = this->zk_request->getPath(); return storage.checkACL(check_not_exists ? parentNodePath(path) : path, Coordination::ACL::Read, session_id, is_local); } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); - Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*this->zk_request); auto node = storage.uncommitted_state.getNode(request.path); if (check_not_exists) { if (node && (request.version == -1 || request.version == node->version)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; } else { if (!node) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; if (request.version != -1 && request.version != node->version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; } return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperCheckResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1774,12 +1926,12 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); return processImpl(storage, zxid); @@ -1790,55 +1942,56 @@ private: }; -struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override { - Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*this->zk_request); if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH) { auto error_msg = fmt::format("Trying to update an internal Keeper path ({}) which is not allowed", request.path); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } auto & uncommitted_state = storage.uncommitted_state; if (!uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; auto node = uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->aversion) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; Coordination::ACLs node_acls; if (!fixupACL(request.acls, session_id, uncommitted_state, node_acls)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; - std::vector new_deltas + std::vector new_deltas { { request.path, zxid, - KeeperStorage::SetACLDelta{std::move(node_acls), request.version} + typename Storage::SetACLDelta{std::move(node_acls), request.version} }, { request.path, zxid, - KeeperStorage::UpdateNodeDelta + typename Storage::UpdateNodeDelta { - [](KeeperStorage::Node & n) { ++n.aversion; } + [](typename Storage::Node & n) { ++n.aversion; } } } }; @@ -1848,11 +2001,11 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*this->zk_request); if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { @@ -1870,32 +2023,33 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr } }; -struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { - Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*this->zk_request); if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1924,23 +2078,24 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } }; -struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor { using OperationType = Coordination::ZooKeeperMultiRequest::OperationType; std::optional operation_type; - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { for (const auto & concrete_request : concrete_requests) if (!concrete_request->checkAuth(storage, session_id, is_local)) @@ -1948,11 +2103,11 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return true; } - std::vector concrete_requests; + std::vector>> concrete_requests; explicit KeeperStorageMultiRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) - : KeeperStorageRequestProcessor(zk_request_) + : KeeperStorageRequestProcessor(zk_request_) { - Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*this->zk_request); concrete_requests.reserve(request.requests.size()); const auto check_operation_type = [&](OperationType type) @@ -1970,34 +2125,34 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro case Coordination::OpNum::Create: case Coordination::OpNum::CreateIfNotExists: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Remove: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Set: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Check: case Coordination::OpNum::CheckNotExists: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Get: check_operation_type(OperationType::Read); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Exists: check_operation_type(OperationType::Read); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::List: case Coordination::OpNum::FilteredList: case Coordination::OpNum::SimpleList: check_operation_type(OperationType::Read); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; default: throw DB::Exception( @@ -2010,8 +2165,8 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro chassert(request.requests.empty() || operation_type.has_value()); } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperMultiRequest); std::vector response_errors; @@ -2023,7 +2178,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro if (!new_deltas.empty()) { - if (auto * error = std::get_if(&new_deltas.back().operation); + if (auto * error = std::get_if(&new_deltas.back().operation); error && *operation_type == OperationType::Write) { storage.uncommitted_state.rollback(zxid); @@ -2034,10 +2189,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); } - return {KeeperStorage::Delta{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + return {typename Storage::Delta{zxid, typename Storage::FailedMultiDelta{std::move(response_errors)}}}; } } - new_deltas.emplace_back(zxid, KeeperStorage::SubDeltaEnd{}); + new_deltas.emplace_back(zxid, typename Storage::SubDeltaEnd{}); response_errors.push_back(Coordination::Error::ZOK); // manually add deltas so that the result of previous request in the transaction is used in the next request @@ -2049,15 +2204,15 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return {}; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); auto & deltas = storage.uncommitted_state.deltas; // the deltas will have at least SubDeltaEnd or FailedMultiDelta chassert(!deltas.empty()); - if (auto * failed_multi = std::get_if(&deltas.front().operation)) + if (auto * failed_multi = std::get_if(&deltas.front().operation)) { for (size_t i = 0; i < concrete_requests.size(); ++i) { @@ -2079,10 +2234,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperMultiReadRequest); - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); for (size_t i = 0; i < concrete_requests.size(); ++i) @@ -2094,10 +2249,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override { - KeeperStorage::ResponsesForSessions result; + typename Storage::ResponsesForSessions result; for (const auto & generic_request : concrete_requests) { auto responses = generic_request->processWatches(watches, list_watches); @@ -2107,47 +2262,50 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro } }; -struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - Coordination::ZooKeeperResponsePtr process(KeeperStorage &, int64_t) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + Coordination::ZooKeeperResponsePtr process(Storage &, int64_t) const override { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Called process on close request"); } }; -struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { - Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*this->zk_request); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); if (auth_request.scheme != "digest" || std::count(auth_request.data.begin(), auth_request.data.end(), ':') != 1) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZAUTHFAILED}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZAUTHFAILED}}; - std::vector new_deltas; - auto auth_digest = KeeperStorage::generateDigest(auth_request.data); + std::vector new_deltas; + auto auth_digest = Storage::generateDigest(auth_request.data); if (auth_digest == storage.superdigest) { - KeeperStorage::AuthID auth{"super", ""}; - new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(auth)}); + typename Storage::AuthID auth{"super", ""}; + new_deltas.emplace_back(zxid, typename Storage::AddAuthDelta{session_id, std::move(auth)}); } else { - KeeperStorage::AuthID new_auth{auth_request.scheme, auth_digest}; + typename Storage::AuthID new_auth{auth_request.scheme, auth_digest}; if (!storage.uncommitted_state.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) - new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); + new_deltas.emplace_back(zxid, typename Storage::AddAuthDelta{session_id, std::move(new_auth)}); } return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) @@ -2157,7 +2315,8 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc } }; -void KeeperStorage::finalize() +template +void KeeperStorage::finalize() { if (finalized) throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage already finalized"); @@ -2172,25 +2331,26 @@ void KeeperStorage::finalize() session_expiry_queue.clear(); } -bool KeeperStorage::isFinalized() const +template +bool KeeperStorage::isFinalized() const { return finalized; } - +template class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable { public: - using Creator = std::function; + using Creator = std::function>(const Coordination::ZooKeeperRequestPtr &)>; using OpNumToRequest = std::unordered_map; - static KeeperStorageRequestProcessorsFactory & instance() + static KeeperStorageRequestProcessorsFactory & instance() { - static KeeperStorageRequestProcessorsFactory factory; + static KeeperStorageRequestProcessorsFactory factory; return factory; } - KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const + std::shared_ptr> get(const Coordination::ZooKeeperRequestPtr & zk_request) const { auto request_it = op_num_to_request.find(zk_request->getOpNum()); if (request_it == op_num_to_request.end()) @@ -2210,39 +2370,41 @@ private: KeeperStorageRequestProcessorsFactory(); }; -template -void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory) +template +void registerKeeperRequestProcessor(Factory & factory) { factory.registerRequest( num, [](const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); } -KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() +template +KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() { - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); } -UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vector & new_deltas) const +template +UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vector & new_deltas) const { if (!keeper_context->digestEnabled()) return current_digest; @@ -2277,7 +2439,7 @@ UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vec auto updated_node_it = updated_nodes.find(delta.path); if (updated_node_it == updated_nodes.end()) { - node = std::make_shared(); + node = std::make_shared(); node->shallowCopy(*uncommitted_state.getNode(delta.path)); current_digest -= node->getDigest(delta.path); updated_nodes.emplace(delta.path, node); @@ -2303,7 +2465,8 @@ UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vec return current_digest; } -void KeeperStorage::preprocessRequest( +template +void KeeperStorage::preprocessRequest( const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, @@ -2376,7 +2539,7 @@ void KeeperStorage::preprocessRequest( uncommitted_state.addDeltas(std::move(new_deltas)); }); - KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + auto request_processor = KeeperStorageRequestProcessorsFactory>::instance().get(zk_request); if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { @@ -2419,7 +2582,8 @@ void KeeperStorage::preprocessRequest( new_deltas = request_processor->preprocess(*this, transaction.zxid, session_id, time, new_digest, *keeper_context); } -KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( +template +KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional new_last_zxid, @@ -2460,7 +2624,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( uncommitted_transactions.pop_front(); } - KeeperStorage::ResponsesForSessions results; + ResponsesForSessions results; /// ZooKeeper update sessions expirity for each request, not only for heartbeats session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); @@ -2496,7 +2660,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { - KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + auto storage_request = KeeperStorageRequestProcessorsFactory>::instance().get(zk_request); auto response = storage_request->process(*this, zxid); response->xid = zk_request->xid; response->zxid = getZXID(); @@ -2505,7 +2669,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } else /// normal requests proccession { - KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + auto request_processor = KeeperStorageRequestProcessorsFactory>::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; if (is_local) @@ -2568,7 +2732,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( return results; } -void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) +template +void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) { if (allow_missing && (uncommitted_transactions.empty() || uncommitted_transactions.back().zxid < rollback_zxid)) return; @@ -2594,7 +2759,8 @@ void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) } } -KeeperStorage::Digest KeeperStorage::getNodesDigest(bool committed) const +template +KeeperStorageBase::Digest KeeperStorage::getNodesDigest(bool committed) const { if (!keeper_context->digestEnabled()) return {.version = DigestVersion::NO_DIGEST}; @@ -2605,13 +2771,15 @@ KeeperStorage::Digest KeeperStorage::getNodesDigest(bool committed) const return uncommitted_transactions.back().nodes_digest; } -void KeeperStorage::removeDigest(const Node & node, const std::string_view path) +template +void KeeperStorage::removeDigest(const Node & node, const std::string_view path) { if (keeper_context->digestEnabled()) nodes_digest -= node.getDigest(path); } -void KeeperStorage::addDigest(const Node & node, const std::string_view path) +template +void KeeperStorage::addDigest(const Node & node, const std::string_view path) { if (keeper_context->digestEnabled()) { @@ -2620,7 +2788,8 @@ void KeeperStorage::addDigest(const Node & node, const std::string_view path) } } -void KeeperStorage::clearDeadWatches(int64_t session_id) +template +void KeeperStorage::clearDeadWatches(int64_t session_id) { /// Clear all watches for this session auto watches_it = sessions_and_watchers.find(session_id); @@ -2653,7 +2822,8 @@ void KeeperStorage::clearDeadWatches(int64_t session_id) } } -void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const +template +void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const { for (const auto & [session_id, watches_paths] : sessions_and_watchers) { @@ -2663,7 +2833,8 @@ void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const } } -void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const +template +void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { auto write_int_container = [&buf](const auto & session_ids) { @@ -2686,7 +2857,8 @@ void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const } } -void KeeperStorage::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const +template +void KeeperStorage::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { auto write_str_set = [&buf](const std::unordered_set & ephemeral_paths) { @@ -2711,7 +2883,8 @@ void KeeperStorage::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) co } } -uint64_t KeeperStorage::getTotalWatchesCount() const +template +uint64_t KeeperStorage::getTotalWatchesCount() const { uint64_t ret = 0; for (const auto & [session, paths] : sessions_and_watchers) @@ -2720,12 +2893,14 @@ uint64_t KeeperStorage::getTotalWatchesCount() const return ret; } -uint64_t KeeperStorage::getSessionsWithWatchesCount() const +template +uint64_t KeeperStorage::getSessionsWithWatchesCount() const { return sessions_and_watchers.size(); } -uint64_t KeeperStorage::getTotalEphemeralNodesCount() const +template +uint64_t KeeperStorage::getTotalEphemeralNodesCount() const { uint64_t ret = 0; for (const auto & [session_id, nodes] : ephemerals) @@ -2734,12 +2909,13 @@ uint64_t KeeperStorage::getTotalEphemeralNodesCount() const return ret; } -void KeeperStorage::recalculateStats() +template +void KeeperStorage::recalculateStats() { container.recalculateDataSize(); } -bool KeeperStorage::checkDigest(const Digest & first, const Digest & second) +bool KeeperStorageBase::checkDigest(const Digest & first, const Digest & second) { if (first.version != second.version) return true; @@ -2750,13 +2926,18 @@ bool KeeperStorage::checkDigest(const Digest & first, const Digest & second) return first.value == second.value; } -String KeeperStorage::generateDigest(const String & userdata) +template +String KeeperStorage::generateDigest(const String & userdata) { std::vector user_password; boost::split(user_password, userdata, [](char character) { return character == ':'; }); return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } +template class KeeperStorage>; +#if USE_ROCKSDB +template class KeeperStorage>; +#endif } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index f7812ad8877..4a9286d4835 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -8,188 +8,384 @@ #include +#include "config.h" +#if USE_ROCKSDB +#include +#endif + namespace DB { class KeeperContext; using KeeperContextPtr = std::shared_ptr; -struct KeeperStorageRequestProcessor; -using KeeperStorageRequestProcessorPtr = std::shared_ptr; using ResponseCallback = std::function; using ChildrenSet = absl::flat_hash_set; using SessionAndTimeout = std::unordered_map; -struct KeeperStorageSnapshot; - -/// Keeper state machine almost equal to the ZooKeeper's state machine. -/// Implements all logic of operations, data changes, sessions allocation. -/// In-memory and not thread safe. -class KeeperStorage +/// KeeperRocksNodeInfo is used in RocksDB keeper. +/// It is serialized directly as POD to RocksDB. +struct KeeperRocksNodeInfo { -public: - /// Node should have as minimal size as possible to reduce memory footprint - /// of stored nodes - /// New fields should be added to the struct only if it's really necessary - struct Node + int64_t czxid{0}; + int64_t mzxid{0}; + int64_t pzxid{0}; + uint64_t acl_id = 0; /// 0 -- no ACL by default + + int64_t mtime{0}; + + int32_t version{0}; + int32_t cversion{0}; + int32_t aversion{0}; + + int32_t seq_num = 0; + mutable UInt64 digest = 0; /// we cached digest for this node. + + /// as ctime can't be negative because it stores the timestamp when the + /// node was created, we can use the MSB for a bool + struct { - int64_t czxid{0}; - int64_t mzxid{0}; - int64_t pzxid{0}; - uint64_t acl_id = 0; /// 0 -- no ACL by default + bool is_ephemeral : 1; + int64_t ctime : 63; + } is_ephemeral_and_ctime{false, 0}; - int64_t mtime{0}; - - std::unique_ptr data{nullptr}; - uint32_t data_size{0}; - - int32_t version{0}; - int32_t cversion{0}; - int32_t aversion{0}; - - mutable uint64_t cached_digest = 0; - - Node() = default; - - Node & operator=(const Node & other); - Node(const Node & other); - - Node & operator=(Node && other) noexcept; - Node(Node && other) noexcept; - - bool empty() const; - - bool isEphemeral() const - { - return is_ephemeral_and_ctime.is_ephemeral; - } - - int64_t ephemeralOwner() const - { - if (isEphemeral()) - return ephemeral_or_children_data.ephemeral_owner; - - return 0; - } - - void setEphemeralOwner(int64_t ephemeral_owner) - { - is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0; - ephemeral_or_children_data.ephemeral_owner = ephemeral_owner; - } - - int32_t numChildren() const - { - if (isEphemeral()) - return 0; - - return ephemeral_or_children_data.children_info.num_children; - } - - void setNumChildren(int32_t num_children) - { - ephemeral_or_children_data.children_info.num_children = num_children; - } - - void increaseNumChildren() - { - chassert(!isEphemeral()); - ++ephemeral_or_children_data.children_info.num_children; - } - - void decreaseNumChildren() - { - chassert(!isEphemeral()); - --ephemeral_or_children_data.children_info.num_children; - } - - int32_t seqNum() const - { - if (isEphemeral()) - return 0; - - return ephemeral_or_children_data.children_info.seq_num; - } - - void setSeqNum(int32_t seq_num) - { - ephemeral_or_children_data.children_info.seq_num = seq_num; - } - - void increaseSeqNum() - { - chassert(!isEphemeral()); - ++ephemeral_or_children_data.children_info.seq_num; - } - - int64_t ctime() const - { - return is_ephemeral_and_ctime.ctime; - } - - void setCtime(uint64_t ctime) - { - is_ephemeral_and_ctime.ctime = ctime; - } - - void copyStats(const Coordination::Stat & stat); - - void setResponseStat(Coordination::Stat & response_stat) const; - - /// Object memory size - uint64_t sizeInBytes() const; - - void setData(const String & new_data); - - std::string_view getData() const noexcept { return {data.get(), data_size}; } - - void addChild(StringRef child_path); - - void removeChild(StringRef child_path); - - const auto & getChildren() const noexcept { return children; } - auto & getChildren() { return children; } - - // Invalidate the calculated digest so it's recalculated again on the next - // getDigest call - void invalidateDigestCache() const; - - // get the calculated digest of the node - UInt64 getDigest(std::string_view path) const; - - // copy only necessary information for preprocessing and digest calculation - // (e.g. we don't need to copy list of children) - void shallowCopy(const Node & other); - private: - /// as ctime can't be negative because it stores the timestamp when the - /// node was created, we can use the MSB for a bool + /// ephemeral notes cannot have children so a node can set either + /// ephemeral_owner OR seq_num + num_children + union + { + int64_t ephemeral_owner; struct { - bool is_ephemeral : 1; - int64_t ctime : 63; - } is_ephemeral_and_ctime{false, 0}; + int32_t seq_num; + int32_t num_children; + } children_info; + } ephemeral_or_children_data{0}; - /// ephemeral notes cannot have children so a node can set either - /// ephemeral_owner OR seq_num + num_children - union - { - int64_t ephemeral_owner; - struct - { - int32_t seq_num; - int32_t num_children; - } children_info; - } ephemeral_or_children_data{0}; + bool isEphemeral() const + { + return is_ephemeral_and_ctime.is_ephemeral; + } - ChildrenSet children{}; - }; + int64_t ephemeralOwner() const + { + if (isEphemeral()) + return ephemeral_or_children_data.ephemeral_owner; -#if !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER) - static_assert( - sizeof(ListNode) <= 144, - "std::list node containing ListNode is > 160 bytes (sizeof(ListNode) + 16 bytes for pointers) which will increase " - "memory consumption"); + return 0; + } + + void setEphemeralOwner(int64_t ephemeral_owner) + { + is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0; + ephemeral_or_children_data.ephemeral_owner = ephemeral_owner; + } + + int32_t numChildren() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.num_children; + } + + void setNumChildren(int32_t num_children) + { + ephemeral_or_children_data.children_info.num_children = num_children; + } + + /// dummy interface for test + void addChild(StringRef) {} + auto getChildren() const + { + return std::vector(numChildren()); + } + + void increaseNumChildren() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.num_children; + } + + void decreaseNumChildren() + { + chassert(!isEphemeral()); + --ephemeral_or_children_data.children_info.num_children; + } + + int32_t seqNum() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.seq_num; + } + + void setSeqNum(int32_t seq_num_) + { + ephemeral_or_children_data.children_info.seq_num = seq_num_; + } + + void increaseSeqNum() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.seq_num; + } + + int64_t ctime() const + { + return is_ephemeral_and_ctime.ctime; + } + + void setCtime(uint64_t ctime) + { + is_ephemeral_and_ctime.ctime = ctime; + } + + void copyStats(const Coordination::Stat & stat); +}; + +/// KeeperRocksNode is the memory structure used by RocksDB +struct KeeperRocksNode : public KeeperRocksNodeInfo +{ +#if USE_ROCKSDB + friend struct RocksDBContainer; #endif + using Meta = KeeperRocksNodeInfo; + + uint64_t size_bytes = 0; // only for compatible, should be deprecated + + uint64_t sizeInBytes() const { return data_size + sizeof(KeeperRocksNodeInfo); } + void setData(String new_data) + { + data_size = static_cast(new_data.size()); + if (data_size != 0) + { + data = std::unique_ptr(new char[new_data.size()]); + memcpy(data.get(), new_data.data(), data_size); + } + } + + void shallowCopy(const KeeperRocksNode & other) + { + czxid = other.czxid; + mzxid = other.mzxid; + pzxid = other.pzxid; + acl_id = other.acl_id; /// 0 -- no ACL by default + + mtime = other.mtime; + + is_ephemeral_and_ctime = other.is_ephemeral_and_ctime; + + ephemeral_or_children_data = other.ephemeral_or_children_data; + + data_size = other.data_size; + if (data_size != 0) + { + data = std::unique_ptr(new char[data_size]); + memcpy(data.get(), other.data.get(), data_size); + } + + version = other.version; + cversion = other.cversion; + aversion = other.aversion; + + /// cached_digest = other.cached_digest; + } + void invalidateDigestCache() const; + UInt64 getDigest(std::string_view path) const; + String getEncodedString(); + void decodeFromString(const String & buffer_str); + void recalculateSize() {} + std::string_view getData() const noexcept { return {data.get(), data_size}; } + + void setResponseStat(Coordination::Stat & response_stat) const + { + response_stat.czxid = czxid; + response_stat.mzxid = mzxid; + response_stat.ctime = ctime(); + response_stat.mtime = mtime; + response_stat.version = version; + response_stat.cversion = cversion; + response_stat.aversion = aversion; + response_stat.ephemeralOwner = ephemeralOwner(); + response_stat.dataLength = static_cast(data_size); + response_stat.numChildren = numChildren(); + response_stat.pzxid = pzxid; + } + + void reset() + { + serialized = false; + } + bool empty() const + { + return data_size == 0 && mzxid == 0; + } + std::unique_ptr data{nullptr}; + uint32_t data_size{0}; +private: + bool serialized = false; +}; + +/// KeeperMemNode should have as minimal size as possible to reduce memory footprint +/// of stored nodes +/// New fields should be added to the struct only if it's really necessary +struct KeeperMemNode +{ + int64_t czxid{0}; + int64_t mzxid{0}; + int64_t pzxid{0}; + uint64_t acl_id = 0; /// 0 -- no ACL by default + + int64_t mtime{0}; + + std::unique_ptr data{nullptr}; + uint32_t data_size{0}; + + int32_t version{0}; + int32_t cversion{0}; + int32_t aversion{0}; + + mutable uint64_t cached_digest = 0; + + KeeperMemNode() = default; + + KeeperMemNode & operator=(const KeeperMemNode & other); + KeeperMemNode(const KeeperMemNode & other); + + KeeperMemNode & operator=(KeeperMemNode && other) noexcept; + KeeperMemNode(KeeperMemNode && other) noexcept; + + bool empty() const; + + bool isEphemeral() const + { + return is_ephemeral_and_ctime.is_ephemeral; + } + + int64_t ephemeralOwner() const + { + if (isEphemeral()) + return ephemeral_or_children_data.ephemeral_owner; + + return 0; + } + + void setEphemeralOwner(int64_t ephemeral_owner) + { + is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0; + ephemeral_or_children_data.ephemeral_owner = ephemeral_owner; + } + + int32_t numChildren() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.num_children; + } + + void setNumChildren(int32_t num_children) + { + ephemeral_or_children_data.children_info.num_children = num_children; + } + + void increaseNumChildren() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.num_children; + } + + void decreaseNumChildren() + { + chassert(!isEphemeral()); + --ephemeral_or_children_data.children_info.num_children; + } + + int32_t seqNum() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.seq_num; + } + + void setSeqNum(int32_t seq_num) + { + ephemeral_or_children_data.children_info.seq_num = seq_num; + } + + void increaseSeqNum() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.seq_num; + } + + int64_t ctime() const + { + return is_ephemeral_and_ctime.ctime; + } + + void setCtime(uint64_t ctime) + { + is_ephemeral_and_ctime.ctime = ctime; + } + + void copyStats(const Coordination::Stat & stat); + + void setResponseStat(Coordination::Stat & response_stat) const; + + /// Object memory size + uint64_t sizeInBytes() const; + + void setData(const String & new_data); + + std::string_view getData() const noexcept { return {data.get(), data_size}; } + + void addChild(StringRef child_path); + + void removeChild(StringRef child_path); + + const auto & getChildren() const noexcept { return children; } + auto & getChildren() { return children; } + + // Invalidate the calculated digest so it's recalculated again on the next + // getDigest call + void invalidateDigestCache() const; + + // get the calculated digest of the node + UInt64 getDigest(std::string_view path) const; + + // copy only necessary information for preprocessing and digest calculation + // (e.g. we don't need to copy list of children) + void shallowCopy(const KeeperMemNode & other); +private: + /// as ctime can't be negative because it stores the timestamp when the + /// node was created, we can use the MSB for a bool + struct + { + bool is_ephemeral : 1; + int64_t ctime : 63; + } is_ephemeral_and_ctime{false, 0}; + + /// ephemeral notes cannot have children so a node can set either + /// ephemeral_owner OR seq_num + num_children + union + { + int64_t ephemeral_owner; + struct + { + int32_t seq_num; + int32_t num_children; + } children_info; + } ephemeral_or_children_data{0}; + + ChildrenSet children{}; +}; + +class KeeperStorageBase +{ +public: enum DigestVersion : uint8_t { @@ -200,7 +396,11 @@ public: V4 = 4 // 0 is not a valid digest value }; - static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V4; + struct Digest + { + DigestVersion version{DigestVersion::NO_DIGEST}; + uint64_t value{0}; + }; struct ResponseForSession { @@ -210,16 +410,6 @@ public: }; using ResponsesForSessions = std::vector; - struct Digest - { - DigestVersion version{DigestVersion::NO_DIGEST}; - uint64_t value{0}; - }; - - static bool checkDigest(const Digest & first, const Digest & second); - - static String generateDigest(const String & userdata); - struct RequestForSession { int64_t session_id; @@ -229,6 +419,7 @@ public: std::optional digest; int64_t log_idx{0}; }; + using RequestsForSessions = std::vector; struct AuthID { @@ -238,9 +429,6 @@ public: bool operator==(const AuthID & other) const { return scheme == other.scheme && id == other.id; } }; - using RequestsForSessions = std::vector; - - using Container = SnapshotableHashTable; using Ephemerals = std::unordered_map>; using SessionAndWatcher = std::unordered_map>; using SessionIDs = std::unordered_set; @@ -250,6 +438,38 @@ public: using SessionAndAuth = std::unordered_map; using Watches = std::unordered_map; + static bool checkDigest(const Digest & first, const Digest & second); + +}; + +/// Keeper state machine almost equal to the ZooKeeper's state machine. +/// Implements all logic of operations, data changes, sessions allocation. +/// In-memory and not thread safe. +template +class KeeperStorage : public KeeperStorageBase +{ +public: + using Container = Container_; + using Node = Container::Node; + +#if !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER) + static_assert( + sizeof(ListNode) <= 144, + "std::list node containing ListNode is > 160 bytes (sizeof(ListNode) + 16 bytes for pointers) which will increase " + "memory consumption"); +#endif + + +#if USE_ROCKSDB + static constexpr bool use_rocksdb = std::is_same_v>; +#else + static constexpr bool use_rocksdb = false; +#endif + + static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V4; + + static String generateDigest(const String & userdata); + int64_t session_id_counter{1}; SessionAndAuth session_and_auth; @@ -393,7 +613,7 @@ public: std::unordered_map, Hash, Equal> deltas_for_path; std::list deltas; - KeeperStorage & storage; + KeeperStorage & storage; }; UncommittedState uncommitted_state{*this}; @@ -530,10 +750,16 @@ public: /// Set of methods for creating snapshots /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. - void enableSnapshotMode(size_t up_to_version) { container.enableSnapshotMode(up_to_version); } + void enableSnapshotMode(size_t up_to_version) + { + container.enableSnapshotMode(up_to_version); + } /// Turn off snapshot mode. - void disableSnapshotMode() { container.disableSnapshotMode(); } + void disableSnapshotMode() + { + container.disableSnapshotMode(); + } Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); } @@ -572,6 +798,9 @@ private: void addDigest(const Node & node, std::string_view path); }; -using KeeperStoragePtr = std::unique_ptr; +using KeeperMemoryStorage = KeeperStorage>; +#if USE_ROCKSDB +using KeeperRocksStorage = KeeperStorage>; +#endif } diff --git a/src/Coordination/RocksDBContainer.h b/src/Coordination/RocksDBContainer.h new file mode 100644 index 00000000000..12b40bbb87e --- /dev/null +++ b/src/Coordination/RocksDBContainer.h @@ -0,0 +1,460 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ROCKSDB_ERROR; + extern const int LOGICAL_ERROR; +} + +/// The key-value format of rocks db will be +/// - key: Int8 (depth of the path) + String (path) +/// - value: SizeOf(keeperRocksNodeInfo) (meta of the node) + String (data) + +template +struct RocksDBContainer +{ + using Node = Node_; + +private: + /// MockNode is only use in test to mock `getChildren()` and `getData()` + struct MockNode + { + std::vector children; + std::string data; + MockNode(size_t children_num, std::string_view data_) + : children(std::vector(children_num)), + data(data_) + { + } + + std::vector getChildren() { return children; } + std::string getData() { return data; } + }; + + UInt16 getKeyDepth(const std::string & key) + { + UInt16 depth = 0; + for (size_t i = 0; i < key.size(); i++) + { + if (key[i] == '/' && i + 1 != key.size()) + depth ++; + } + return depth; + } + + std::string getEncodedKey(const std::string & key, bool child_prefix = false) + { + WriteBufferFromOwnString key_buffer; + UInt16 depth = getKeyDepth(key) + (child_prefix ? 1 : 0); + writeIntBinary(depth, key_buffer); + writeString(key, key_buffer); + return key_buffer.str(); + } + + static std::string_view getDecodedKey(const std::string_view & key) + { + return std::string_view(key.begin() + 2, key.end()); + } + + + struct KVPair + { + StringRef key; + Node value; + }; + + using ValueUpdater = std::function; + +public: + + /// This is an iterator wrapping rocksdb iterator and the kv result. + struct const_iterator + { + std::shared_ptr iter; + + std::shared_ptr pair; + + const_iterator() = default; + + explicit const_iterator(std::shared_ptr pair_) : pair(std::move(pair_)) {} + + explicit const_iterator(rocksdb::Iterator * iter_) : iter(iter_) + { + updatePairFromIter(); + } + + const KVPair & operator * () const + { + return *pair; + } + + const KVPair * operator->() const + { + return pair.get(); + } + + bool operator != (const const_iterator & other) const + { + return !(*this == other); + } + + bool operator == (const const_iterator & other) const + { + if (pair == nullptr && other == nullptr) + return true; + if (pair == nullptr || other == nullptr) + return false; + return pair->key.toView() == other->key.toView() && iter == other.iter; + } + + bool operator == (std::nullptr_t) const + { + return iter == nullptr; + } + + bool operator != (std::nullptr_t) const + { + return iter != nullptr; + } + + explicit operator bool() const + { + return iter != nullptr; + } + + const_iterator & operator ++() + { + iter->Next(); + updatePairFromIter(); + return *this; + } + + private: + void updatePairFromIter() + { + if (iter && iter->Valid()) + { + auto new_pair = std::make_shared(); + new_pair->key = StringRef(getDecodedKey(iter->key().ToStringView())); + ReadBufferFromOwnString buffer(iter->value().ToStringView()); + typename Node::Meta & meta = new_pair->value; + readPODBinary(meta, buffer); + readVarUInt(new_pair->value.data_size, buffer); + if (new_pair->value.data_size) + { + new_pair->value.data = std::unique_ptr(new char[new_pair->value.data_size]); + buffer.readStrict(new_pair->value.data.get(), new_pair->value.data_size); + } + pair = new_pair; + } + else + { + pair = nullptr; + iter = nullptr; + } + } + }; + + bool initialized = false; + + const const_iterator end_ptr; + + void initialize(const KeeperContextPtr & context) + { + DiskPtr disk = context->getTemporaryRocksDBDisk(); + if (disk == nullptr) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get rocksdb disk"); + } + auto options = context->getRocksDBOptions(); + if (options == nullptr) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get rocksdb options"); + } + rocksdb_dir = disk->getPath(); + rocksdb::DB * db; + auto status = rocksdb::DB::Open(*options, rocksdb_dir, &db); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", + rocksdb_dir, status.ToString()); + } + rocksdb_ptr = std::unique_ptr(db); + write_options.disableWAL = true; + initialized = true; + } + + ~RocksDBContainer() + { + if (initialized) + { + rocksdb_ptr->Close(); + rocksdb_ptr = nullptr; + + std::filesystem::remove_all(rocksdb_dir); + } + } + + std::vector> getChildren(const std::string & key_) + { + rocksdb::ReadOptions read_options; + read_options.total_order_seek = true; + + std::string key = key_; + if (!key.ends_with('/')) + key += '/'; + size_t len = key.size() + 2; + + auto iter = std::unique_ptr(rocksdb_ptr->NewIterator(read_options)); + std::string encoded_string = getEncodedKey(key, true); + rocksdb::Slice prefix(encoded_string); + std::vector> result; + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) + { + Node node; + ReadBufferFromOwnString buffer(iter->value().ToStringView()); + typename Node::Meta & meta = node; + /// We do not read data here + readPODBinary(meta, buffer); + std::string real_key(iter->key().data() + len, iter->key().size() - len); + // std::cout << "real key: " << real_key << std::endl; + result.emplace_back(std::move(real_key), std::move(node)); + } + + return result; + } + + bool contains(const std::string & path) + { + const std::string & encoded_key = getEncodedKey(path); + std::string buffer_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &buffer_str); + if (status.IsNotFound()) + return false; + if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during executing contains. The error message is {}.", status.ToString()); + return true; + } + + const_iterator find(StringRef key_) + { + /// rocksdb::PinnableSlice slice; + const std::string & encoded_key = getEncodedKey(key_.toString()); + std::string buffer_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &buffer_str); + if (status.IsNotFound()) + return end(); + if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during executing find. The error message is {}.", status.ToString()); + ReadBufferFromOwnString buffer(buffer_str); + auto kv = std::make_shared(); + kv->key = key_; + typename Node::Meta & meta = kv->value; + readPODBinary(meta, buffer); + /// TODO: Sometimes we don't need to load data. + readVarUInt(kv->value.data_size, buffer); + if (kv->value.data_size) + { + kv->value.data = std::unique_ptr(new char[kv->value.data_size]); + buffer.readStrict(kv->value.data.get(), kv->value.data_size); + } + return const_iterator(kv); + } + + MockNode getValue(StringRef key) + { + auto it = find(key); + chassert(it != end()); + return MockNode(it->value.numChildren(), it->value.getData()); + } + + const_iterator updateValue(StringRef key_, ValueUpdater updater) + { + /// rocksdb::PinnableSlice slice; + const std::string & key = key_.toString(); + const std::string & encoded_key = getEncodedKey(key); + std::string buffer_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &buffer_str); + if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during find. The error message is {}.", status.ToString()); + auto kv = std::make_shared(); + kv->key = key_; + kv->value.decodeFromString(buffer_str); + /// storage->removeDigest(node, key); + updater(kv->value); + insertOrReplace(key, kv->value); + return const_iterator(kv); + } + + bool insert(const std::string & key, Node & value) + { + std::string value_str; + const std::string & encoded_key = getEncodedKey(key); + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &value_str); + if (status.ok()) + { + return false; + } + else if (status.IsNotFound()) + { + status = rocksdb_ptr->Put(write_options, encoded_key, value.getEncodedString()); + if (status.ok()) + { + counter++; + return true; + } + } + + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during insert. The error message is {}.", status.ToString()); + } + + void insertOrReplace(const std::string & key, Node & value) + { + const std::string & encoded_key = getEncodedKey(key); + /// storage->addDigest(value, key); + std::string value_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &value_str); + bool increase_counter = false; + if (status.IsNotFound()) + increase_counter = true; + else if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during get. The error message is {}.", status.ToString()); + + status = rocksdb_ptr->Put(write_options, encoded_key, value.getEncodedString()); + if (status.ok()) + counter += increase_counter; + else + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during insert. The error message is {}.", status.ToString()); + } + + using KeyPtr = std::unique_ptr; + + /// To be compatible with SnapshotableHashTable, will remove later; + KeyPtr allocateKey(size_t size) + { + return KeyPtr{new char[size]}; + } + + void insertOrReplace(KeyPtr key_data, size_t key_size, Node value) + { + std::string key(key_data.get(), key_size); + insertOrReplace(key, value); + } + + bool erase(const std::string & key) + { + /// storage->removeDigest(value, key); + const std::string & encoded_key = getEncodedKey(key); + + auto status = rocksdb_ptr->Delete(write_options, encoded_key); + if (status.IsNotFound()) + return false; + if (status.ok()) + { + counter--; + return true; + } + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during erase. The error message is {}.", status.ToString()); + } + + void recalculateDataSize() {} + void reverse(size_t size_) {(void)size_;} + + uint64_t getApproximateDataSize() const + { + /// use statistics from rocksdb + return counter * sizeof(Node); + } + + void enableSnapshotMode(size_t version) + { + chassert(!snapshot_mode); + snapshot_mode = true; + snapshot_up_to_version = version; + snapshot_size = counter; + ++current_version; + + snapshot = rocksdb_ptr->GetSnapshot(); + } + + void disableSnapshotMode() + { + chassert(snapshot_mode); + snapshot_mode = false; + rocksdb_ptr->ReleaseSnapshot(snapshot); + } + + void clearOutdatedNodes() {} + + std::pair snapshotSizeWithVersion() const + { + if (!snapshot_mode) + return std::make_pair(counter, current_version); + else + return std::make_pair(snapshot_size, current_version); + } + + const_iterator begin() const + { + rocksdb::ReadOptions read_options; + read_options.total_order_seek = true; + if (snapshot_mode) + read_options.snapshot = snapshot; + auto * iter = rocksdb_ptr->NewIterator(read_options); + iter->SeekToFirst(); + return const_iterator(iter); + } + + const_iterator end() const + { + return end_ptr; + } + + size_t size() const + { + return counter; + } + + uint64_t getArenaDataSize() const + { + return 0; + } + + uint64_t keyArenaSize() const + { + return 0; + } + +private: + String rocksdb_dir; + + std::unique_ptr rocksdb_ptr; + rocksdb::WriteOptions write_options; + + const rocksdb::Snapshot * snapshot; + + bool snapshot_mode{false}; + size_t current_version{0}; + size_t snapshot_up_to_version{0}; + size_t snapshot_size{0}; + size_t counter{0}; + +}; + +} diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 5f2b14e17b0..85452558496 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -212,9 +212,9 @@ private: updateDataSize(INSERT_OR_REPLACE, key.size, new_value_size, old_value_size, !snapshot_mode); } - public: + using Node = V; using iterator = typename List::iterator; using const_iterator = typename List::const_iterator; using ValueUpdater = std::function; @@ -364,6 +364,7 @@ public: { auto map_it = map.find(key); if (map_it != map.end()) + /// return std::make_shared(KVPair{map_it->getMapped()->key, map_it->getMapped()->value}); return map_it->getMapped(); return list.end(); } diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index c205db942b9..99d71b85e78 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -43,7 +43,8 @@ void deserializeSnapshotMagic(ReadBuffer & in) throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); } -int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) +template +int64_t deserializeSessionAndTimeout(Storage & storage, ReadBuffer & in) { int32_t count; Coordination::read(count, in); @@ -62,7 +63,8 @@ int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) return max_session_id; } -void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) +template +void deserializeACLMap(Storage & storage, ReadBuffer & in) { int32_t count; Coordination::read(count, in); @@ -90,7 +92,8 @@ void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) } } -int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerPtr log) +template +int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log) { int64_t max_zxid = 0; std::string path; @@ -98,7 +101,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP size_t count = 0; while (path != "/") { - KeeperStorage::Node node{}; + typename Storage::Node node{}; String data; Coordination::read(data, in); node.setData(data); @@ -146,14 +149,15 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP if (itr.key != "/") { auto parent_path = parentNodePath(itr.key); - storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); }); + storage.container.updateValue(parent_path, [my_path = itr.key] (typename Storage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); }); } } return max_zxid; } -void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, LoggerPtr log) +template +void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log) { LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path); int64_t zxid = getZxidFromName(snapshot_path); @@ -192,9 +196,11 @@ void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::st LOG_INFO(log, "Finished, snapshot ZXID {}", storage.zxid); } -void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, LoggerPtr log) +namespace fs = std::filesystem; + +template +void deserializeKeeperStorageFromSnapshotsDir(Storage & storage, const std::string & path, LoggerPtr log) { - namespace fs = std::filesystem; std::map existing_snapshots; for (const auto & p : fs::directory_iterator(path)) { @@ -480,7 +486,8 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) } -bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, LoggerPtr /*log*/) +template +bool deserializeTxn(Storage & storage, ReadBuffer & in, LoggerPtr /*log*/) { int64_t checksum; Coordination::read(checksum, in); @@ -535,7 +542,8 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, LoggerPtr /*log*/) return true; } -void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, LoggerPtr log) +template +void deserializeLogAndApplyToStorage(Storage & storage, const std::string & log_path, LoggerPtr log) { ReadBufferFromFile reader(log_path); @@ -559,9 +567,9 @@ void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string LOG_INFO(log, "Finished {} deserialization, totally read {} records", log_path, counter); } -void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, LoggerPtr log) +template +void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log) { - namespace fs = std::filesystem; std::map existing_logs; for (const auto & p : fs::directory_iterator(path)) { @@ -595,4 +603,9 @@ void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string } } +template void deserializeKeeperStorageFromSnapshot(KeeperMemoryStorage & storage, const std::string & snapshot_path, LoggerPtr log); +template void deserializeKeeperStorageFromSnapshotsDir(KeeperMemoryStorage & storage, const std::string & path, LoggerPtr log); +template void deserializeLogAndApplyToStorage(KeeperMemoryStorage & storage, const std::string & log_path, LoggerPtr log); +template void deserializeLogsAndApplyToStorage(KeeperMemoryStorage & storage, const std::string & path, LoggerPtr log); + } diff --git a/src/Coordination/ZooKeeperDataReader.h b/src/Coordination/ZooKeeperDataReader.h index 648dc95adcf..5520d1d3215 100644 --- a/src/Coordination/ZooKeeperDataReader.h +++ b/src/Coordination/ZooKeeperDataReader.h @@ -5,12 +5,16 @@ namespace DB { -void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, LoggerPtr log); +template +void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log); -void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, LoggerPtr log); +template +void deserializeKeeperStorageFromSnapshotsDir(Storage & storage, const std::string & path, LoggerPtr log); -void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, LoggerPtr log); +template +void deserializeLogAndApplyToStorage(Storage & storage, const std::string & log_path, LoggerPtr log); -void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, LoggerPtr log); +template +void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log); } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index b65e9358a72..beae6254562 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -61,10 +61,22 @@ struct CompressionParam std::string extension; }; -class CoordinationTest : public ::testing::TestWithParam +template +struct TestParam { -protected: - DB::KeeperContextPtr keeper_context = std::make_shared(true, std::make_shared()); + using Storage = Storage_; + static constexpr bool enable_compression = enable_compression_; +}; + +template +class CoordinationTest : public ::testing::Test +{ +public: + using Storage = typename TestType::Storage; + static constexpr bool enable_compression = TestType::enable_compression; + std::string extension; + + DB::KeeperContextPtr keeper_context; LoggerPtr log{getLogger("CoordinationTest")}; void SetUp() override @@ -73,7 +85,12 @@ protected: Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); + auto settings = std::make_shared(); + settings->experimental_use_rocksdb = true; + keeper_context = std::make_shared(true, settings); keeper_context->setLocalLogsPreprocessed(); + keeper_context->setRocksDBOptions(); + extension = enable_compression ? ".zstd" : ""; } void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } @@ -83,13 +100,24 @@ protected: keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } + void setRocksDBDirectory(const std::string & path) + { + keeper_context->setRocksDBDisk(std::make_shared("RocksDisk", path)); + } + void setStateFileDirectory(const std::string & path) { keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; -TEST_P(CoordinationTest, RaftServerConfigParse) +using Implementation = testing::Types, + TestParam, + TestParam, + TestParam>; +TYPED_TEST_SUITE(CoordinationTest, Implementation); + +TYPED_TEST(CoordinationTest, RaftServerConfigParse) { auto parse = Coordination::RaftServerConfig::parse; using Cfg = std::optional; @@ -114,7 +142,7 @@ TEST_P(CoordinationTest, RaftServerConfigParse) (Cfg{{1, "2001:0db8:85a3:0000:0000:8a2e:0370:7334:80"}})); } -TEST_P(CoordinationTest, RaftServerClusterConfigParse) +TYPED_TEST(CoordinationTest, RaftServerClusterConfigParse) { auto parse = Coordination::parseRaftServers; using Cfg = DB::RaftServerConfig; @@ -130,14 +158,14 @@ TEST_P(CoordinationTest, RaftServerClusterConfigParse) (Servers{Cfg{1, "host:80"}, Cfg{2, "host:81"}})); } -TEST_P(CoordinationTest, BuildTest) +TYPED_TEST(CoordinationTest, BuildTest) { DB::InMemoryLogStore store; DB::SummingStateMachine machine; EXPECT_EQ(1, 1); } -TEST_P(CoordinationTest, BufferSerde) +TYPED_TEST(CoordinationTest, BufferSerde) { Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Get); request->xid = 3; @@ -261,13 +289,13 @@ nuraft::ptr getBuffer(int64_t number) return ret; } -TEST_P(CoordinationTest, TestSummingRaft1) +TYPED_TEST(CoordinationTest, TestSummingRaft1) { ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); - setStateFileDirectory("."); + this->setLogDirectory("./logs"); + this->setStateFileDirectory("."); - SummingRaftServer s1(1, "localhost", 44444, keeper_context); + SummingRaftServer s1(1, "localhost", 44444, this->keeper_context); SCOPE_EXIT(if (std::filesystem::exists("./state")) std::filesystem::remove("./state");); /// Single node is leader @@ -280,7 +308,7 @@ TEST_P(CoordinationTest, TestSummingRaft1) while (s1.state_machine->getValue() != 143) { - LOG_INFO(log, "Waiting s1 to apply entry"); + LOG_INFO(this->log, "Waiting s1 to apply entry"); std::this_thread::sleep_for(std::chrono::milliseconds(100)); } @@ -296,16 +324,16 @@ DB::LogEntryPtr getLogEntry(const std::string & s, size_t term) return nuraft::cs_new(term, bufwriter.getBuffer()); } -TEST_P(CoordinationTest, ChangelogTestSimple) +TYPED_TEST(CoordinationTest, ChangelogTestSimple) { - auto params = GetParam(); + /// ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -328,16 +356,15 @@ void waitDurableLogs(nuraft::log_store & log_store) } -TEST_P(CoordinationTest, ChangelogTestFile) +TYPED_TEST(CoordinationTest, ChangelogTestFile) { - auto params = GetParam(); ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -345,9 +372,9 @@ TEST_P(CoordinationTest, ChangelogTestFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); for (const auto & p : fs::directory_iterator("./logs")) - EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin" + params.extension); + EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin" + this->extension); changelog.append(entry); changelog.append(entry); @@ -358,20 +385,20 @@ TEST_P(CoordinationTest, ChangelogTestFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); } -TEST_P(CoordinationTest, ChangelogReadWrite) +TYPED_TEST(CoordinationTest, ChangelogReadWrite) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -386,9 +413,9 @@ TEST_P(CoordinationTest, ChangelogReadWrite) waitDurableLogs(changelog); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 10); EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term()); @@ -404,16 +431,16 @@ TEST_P(CoordinationTest, ChangelogReadWrite) EXPECT_EQ(10, entries_from_range->size()); } -TEST_P(CoordinationTest, ChangelogWriteAt) +TYPED_TEST(CoordinationTest, ChangelogWriteAt) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -436,9 +463,9 @@ TEST_P(CoordinationTest, ChangelogWriteAt) EXPECT_EQ(changelog.next_slot(), 8); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), changelog.size()); @@ -448,16 +475,16 @@ TEST_P(CoordinationTest, ChangelogWriteAt) } -TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) +TYPED_TEST(CoordinationTest, ChangelogTestAppendAfterRead) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 7; ++i) { @@ -470,13 +497,13 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 7); @@ -489,8 +516,8 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) EXPECT_EQ(changelog_reader.size(), 10); waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); size_t logs_count = 0; for (const auto & _ [[maybe_unused]] : fs::directory_iterator("./logs")) @@ -505,9 +532,9 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); logs_count = 0; for (const auto & _ [[maybe_unused]] : fs::directory_iterator("./logs")) @@ -534,16 +561,16 @@ void assertFileDeleted(std::string path) } -TEST_P(CoordinationTest, ChangelogTestCompaction) +TYPED_TEST(CoordinationTest, ChangelogTestCompaction) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 3; ++i) @@ -557,7 +584,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.size(), 3); - keeper_context->setLastCommitIndex(2); + this->keeper_context->setLastCommitIndex(2); changelog.compact(2); EXPECT_EQ(changelog.size(), 1); @@ -565,7 +592,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.next_slot(), 4); EXPECT_EQ(changelog.last_entry()->get_term(), 20); // nothing should be deleted - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); auto e1 = getLogEntry("hello world", 30); changelog.append(e1); @@ -579,15 +606,15 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); - keeper_context->setLastCommitIndex(6); + this->keeper_context->setLastCommitIndex(6); changelog.compact(6); std::this_thread::sleep_for(std::chrono::microseconds(1000)); - assertFileDeleted("./logs/changelog_1_5.bin" + params.extension); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + assertFileDeleted("./logs/changelog_1_5.bin" + this->extension); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); EXPECT_EQ(changelog.size(), 1); EXPECT_EQ(changelog.start_index(), 7); @@ -595,9 +622,9 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.last_entry()->get_term(), 60); /// And we able to read it DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(7, 0); EXPECT_EQ(changelog_reader.size(), 1); @@ -606,16 +633,16 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 60); } -TEST_P(CoordinationTest, ChangelogTestBatchOperations) +TYPED_TEST(CoordinationTest, ChangelogTestBatchOperations) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -631,9 +658,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) auto entries = changelog.pack(1, 5); DB::KeeperLogStore apply_changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); apply_changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -661,18 +688,18 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) EXPECT_EQ(apply_changelog.entry_at(12)->get_term(), 40); } -TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) +TYPED_TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); nuraft::ptr entries; { DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -689,11 +716,11 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) } ChangelogDirTest test1("./logs1"); - setLogDirectory("./logs1"); + this->setLogDirectory("./logs1"); DB::KeeperLogStore changelog_new( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_new.init(1, 0); EXPECT_EQ(changelog_new.size(), 0); @@ -716,23 +743,23 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) EXPECT_EQ(changelog_new.next_slot(), 11); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(5, 0); } -TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) +TYPED_TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -744,13 +771,13 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); EXPECT_EQ(changelog.size(), 33); @@ -764,19 +791,19 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); DB::KeeperLogStore changelog_read( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 7); EXPECT_EQ(changelog_read.start_index(), 1); @@ -784,16 +811,16 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555); } -TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) +TYPED_TEST(CoordinationTest, ChangelogTestWriteAtFileBorder) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -805,13 +832,13 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); EXPECT_EQ(changelog.size(), 33); @@ -825,19 +852,19 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); DB::KeeperLogStore changelog_read( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 11); EXPECT_EQ(changelog_read.start_index(), 1); @@ -845,16 +872,16 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555); } -TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) +TYPED_TEST(CoordinationTest, ChangelogTestWriteAtAllFiles) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) { @@ -865,13 +892,13 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); EXPECT_EQ(changelog.size(), 33); @@ -885,26 +912,26 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); } -TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) +TYPED_TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -916,19 +943,19 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_EQ(changelog.size(), 35); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + this->extension)); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); auto entry = getLogEntry("36_hello_world", 360); @@ -938,14 +965,14 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_EQ(changelog_reader.size(), 36); waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin" + this->extension)); } namespace @@ -966,18 +993,18 @@ void assertBrokenFileRemoved(const fs::path & directory, const fs::path & filena } -TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) +TYPED_TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) { static const fs::path log_folder{"./logs"}; - auto params = GetParam(); + ChangelogDirTest test(log_folder); - setLogDirectory(log_folder); + this->setLogDirectory(log_folder); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -989,36 +1016,36 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) EXPECT_EQ(changelog.size(), 35); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); DB::WriteBufferFromFile plain_buf( - "./logs/changelog_11_15.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + "./logs/changelog_11_15.bin" + this->extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(0); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); changelog_reader.end_of_append_batch(0, 0); EXPECT_EQ(changelog_reader.size(), 10); EXPECT_EQ(changelog_reader.last_entry()->get_term(), 90); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); - assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + params.extension); + assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + this->extension); auto entry = getLogEntry("h", 7777); changelog_reader.append(entry); @@ -1028,35 +1055,35 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); - assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + params.extension); + assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + this->extension); DB::KeeperLogStore changelog_reader2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 11); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); } /// Truncating all entries -TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) +TYPED_TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1067,22 +1094,22 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) changelog.end_of_append_batch(0, 0); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + this->extension)); DB::WriteBufferFromFile plain_buf( - "./logs/changelog_1_20.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + "./logs/changelog_1_20.bin" + this->extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(30); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 0); - EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); - assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + params.extension); + EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + this->extension)); + assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + this->extension); auto entry = getLogEntry("hello_world", 7777); changelog_reader.append(entry); changelog_reader.end_of_append_batch(0, 0); @@ -1093,9 +1120,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); DB::KeeperLogStore changelog_reader2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 1); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); @@ -1104,15 +1131,15 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) /// Truncating only some entries from the end /// For compressed logs we have no reliable way of knowing how many log entries were lost /// after we truncate some bytes from the end -TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) +TYPED_TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) { ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1134,7 +1161,7 @@ TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) DB::KeeperLogStore changelog_reader( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 19); @@ -1151,10 +1178,10 @@ TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); } -TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) +TYPED_TEST(CoordinationTest, ChangelogTestMixedLogTypes) { ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); std::vector changelog_files; @@ -1186,7 +1213,7 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) DB::KeeperLogStore changelog( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1207,7 +1234,7 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) DB::KeeperLogStore changelog_compressed( DB::LogFileSettings{.force_sync = true, .compress_logs = true, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_compressed.init(1, 0); verify_changelog_files(); @@ -1229,7 +1256,7 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) DB::KeeperLogStore changelog( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); verify_changelog_files(); @@ -1247,16 +1274,16 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) } } -TEST_P(CoordinationTest, ChangelogTestLostFiles) +TYPED_TEST(CoordinationTest, ChangelogTestLostFiles) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1267,30 +1294,30 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) changelog.end_of_append_batch(0, 0); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + this->extension)); - fs::remove("./logs/changelog_1_20.bin" + params.extension); + fs::remove("./logs/changelog_1_20.bin" + this->extension); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); - assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + params.extension); + assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + this->extension); } -TEST_P(CoordinationTest, ChangelogTestLostFiles2) +TYPED_TEST(CoordinationTest, ChangelogTestLostFiles2) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 10}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1302,24 +1329,24 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_40.bin" + this->extension)); // we have a gap in our logs, we need to remove all the logs after the gap - fs::remove("./logs/changelog_21_30.bin" + params.extension); + fs::remove("./logs/changelog_21_30.bin" + this->extension); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 10}, DB::FlushSettings(), - keeper_context); + this->keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); - EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + this->extension)); - assertBrokenFileRemoved("./logs", "changelog_31_40.bin" + params.extension); + assertBrokenFileRemoved("./logs", "changelog_31_40.bin" + this->extension); } struct IntNode { @@ -1335,7 +1362,7 @@ struct IntNode bool operator!=(const int & rhs) const { return rhs != this->value; } }; -TEST_P(CoordinationTest, SnapshotableHashMapSimple) +TYPED_TEST(CoordinationTest, SnapshotableHashMapSimple) { DB::SnapshotableHashTable hello; EXPECT_TRUE(hello.insert("hello", 5).second); @@ -1350,7 +1377,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapSimple) EXPECT_EQ(hello.size(), 0); } -TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) +TYPED_TEST(CoordinationTest, SnapshotableHashMapTrySnapshot) { DB::SnapshotableHashTable map_snp; EXPECT_TRUE(map_snp.insert("/hello", 7).second); @@ -1427,7 +1454,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) map_snp.disableSnapshotMode(); } -TEST_P(CoordinationTest, SnapshotableHashMapDataSize) +TYPED_TEST(CoordinationTest, SnapshotableHashMapDataSize) { /// int DB::SnapshotableHashTable hello; @@ -1465,7 +1492,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) EXPECT_EQ(hello.getApproximateDataSize(), 0); /// Node - using Node = DB::KeeperStorage::Node; + using Node = DB::KeeperMemoryStorage::Node; DB::SnapshotableHashTable world; Node n1; n1.setData("1234"); @@ -1504,9 +1531,10 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) EXPECT_EQ(world.getApproximateDataSize(), 0); } -void addNode(DB::KeeperStorage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner = 0) +template +void addNode(Storage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner = 0) { - using Node = DB::KeeperStorage::Node; + using Node = typename Storage::Node; Node node{}; node.setData(data); node.setEphemeralOwner(ephemeral_owner); @@ -1522,15 +1550,20 @@ void addNode(DB::KeeperStorage & storage, const std::string & path, const std::s }); } -TEST_P(CoordinationTest, TestStorageSnapshotSimple) +TYPED_TEST(CoordinationTest, TestStorageSnapshotSimple) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); addNode(storage, "/hello1", "world", 1); addNode(storage, "/hello2", "somedata", 3); storage.session_id_counter = 5; @@ -1540,7 +1573,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) storage.getSessionID(130); storage.getSessionID(130); - DB::KeeperStorageSnapshot snapshot(&storage, 2); + DB::KeeperStorageSnapshot snapshot(&storage, 2); EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 2); EXPECT_EQ(snapshot.session_id, 7); @@ -1549,7 +1582,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 2); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + this->extension)); auto debuf = manager.deserializeSnapshotBufferFromDisk(2); @@ -1572,15 +1605,20 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) EXPECT_EQ(restored_storage->session_and_timeout.size(), 2); } -TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) +TYPED_TEST(CoordinationTest, TestStorageSnapshotMoreWrites) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); storage.getSessionID(130); for (size_t i = 0; i < 50; ++i) @@ -1588,7 +1626,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } - DB::KeeperStorageSnapshot snapshot(&storage, 50); + DB::KeeperStorageSnapshot snapshot(&storage, 50); EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 50); EXPECT_EQ(snapshot.snapshot_container_size, 54); @@ -1601,7 +1639,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); auto debuf = manager.deserializeSnapshotBufferFromDisk(50); @@ -1615,15 +1653,20 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) } -TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) +TYPED_TEST(CoordinationTest, TestStorageSnapshotManySnapshots) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); storage.getSessionID(130); for (size_t j = 1; j <= 5; ++j) @@ -1633,17 +1676,17 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } - DB::KeeperStorageSnapshot snapshot(&storage, j * 50); + DB::KeeperStorageSnapshot snapshot(&storage, j * 50); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, j * 50); - EXPECT_TRUE(fs::exists(std::string{"./snapshots/snapshot_"} + std::to_string(j * 50) + ".bin" + params.extension)); + EXPECT_TRUE(fs::exists(std::string{"./snapshots/snapshot_"} + std::to_string(j * 50) + ".bin" + this->extension)); } - EXPECT_FALSE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./snapshots/snapshot_100.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_150.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_200.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./snapshots/snapshot_100.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_150.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_200.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin" + this->extension)); auto [restored_storage, meta, _] = manager.restoreFromLatestSnapshot(); @@ -1656,21 +1699,26 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) } } -TEST_P(CoordinationTest, TestStorageSnapshotMode) +TYPED_TEST(CoordinationTest, TestStorageSnapshotMode) { - auto params = GetParam(); - ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest test("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + Storage storage(500, "", this->keeper_context); for (size_t i = 0; i < 50; ++i) { addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } { - DB::KeeperStorageSnapshot snapshot(&storage, 50); + DB::KeeperStorageSnapshot snapshot(&storage, 50); for (size_t i = 0; i < 50; ++i) { addNode(storage, "/hello_" + std::to_string(i), "wlrd_" + std::to_string(i)); @@ -1685,12 +1733,15 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) storage.container.erase("/hello_" + std::to_string(i)); } EXPECT_EQ(storage.container.size(), 29); - EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 105); + if constexpr (Storage::use_rocksdb) + EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 54); + else + EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 105); EXPECT_EQ(storage.container.snapshotSizeWithVersion().second, 1); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); } - EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); EXPECT_EQ(storage.container.size(), 29); storage.clearGarbageAfterSnapshot(); EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 29); @@ -1710,28 +1761,33 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) } } -TEST_P(CoordinationTest, TestStorageSnapshotBroken) +TYPED_TEST(CoordinationTest, TestStorageSnapshotBroken) { - auto params = GetParam(); - ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest test("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + Storage storage(500, "", this->keeper_context); for (size_t i = 0; i < 50; ++i) { addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } { - DB::KeeperStorageSnapshot snapshot(&storage, 50); + DB::KeeperStorageSnapshot snapshot(&storage, 50); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); } - EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); /// Let's corrupt file DB::WriteBufferFromFile plain_buf( - "./snapshots/snapshot_50.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + "./snapshots/snapshot_50.bin" + this->extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(34); plain_buf.sync(); @@ -1747,7 +1803,7 @@ nuraft::ptr getBufferFromZKRequest(int64_t session_id, int64_t z auto time = duration_cast(system_clock::now().time_since_epoch()).count(); DB::writeIntBinary(time, buf); DB::writeIntBinary(zxid, buf); - DB::writeIntBinary(DB::KeeperStorage::DigestVersion::NO_DIGEST, buf); + DB::writeIntBinary(DB::KeeperMemoryStorage::DigestVersion::NO_DIGEST, buf); return buf.getBuffer(); } @@ -1758,6 +1814,7 @@ getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Co return nuraft::cs_new(term, buffer); } +template void testLogAndStateMachine( DB::CoordinationSettingsPtr settings, uint64_t total_logs, @@ -1768,12 +1825,15 @@ void testLogAndStateMachine( ChangelogDirTest snapshots("./snapshots"); ChangelogDirTest logs("./logs"); + ChangelogDirTest rocks("./rocksdb"); auto get_keeper_context = [&] { auto local_keeper_context = std::make_shared(true, settings); local_keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); local_keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + local_keeper_context->setRocksDBDisk(std::make_shared("RocksDisk", "./rocksdb")); + local_keeper_context->setRocksDBOptions(); return local_keeper_context; }; @@ -1781,7 +1841,7 @@ void testLogAndStateMachine( SnapshotsQueue snapshots_queue{1}; auto keeper_context = get_keeper_context(); - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); DB::KeeperLogStore changelog( @@ -1828,7 +1888,7 @@ void testLogAndStateMachine( SnapshotsQueue snapshots_queue1{1}; keeper_context = get_keeper_context(); - auto restore_machine = std::make_shared(queue, snapshots_queue1, keeper_context, nullptr); + auto restore_machine = std::make_shared>(queue, snapshots_queue1, keeper_context, nullptr); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); @@ -1864,11 +1924,12 @@ void testLogAndStateMachine( } } -TEST_P(CoordinationTest, TestStateMachineAndLogStore) +TYPED_TEST(CoordinationTest, TestStateMachineAndLogStore) { using namespace Coordination; using namespace DB; - auto params = GetParam(); + + using Storage = typename TestFixture::Storage; { CoordinationSettingsPtr settings = std::make_shared(); @@ -1876,78 +1937,83 @@ TEST_P(CoordinationTest, TestStateMachineAndLogStore) settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 37, params.enable_compression); + testLogAndStateMachine(settings, 37, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 11, params.enable_compression); + testLogAndStateMachine(settings, 11, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression); + testLogAndStateMachine(settings, 40, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 20; settings->rotate_log_storage_interval = 30; - testLogAndStateMachine(settings, 40, params.enable_compression); + testLogAndStateMachine(settings, 40, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 0; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression); + testLogAndStateMachine(settings, 40, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 1; settings->reserved_log_items = 1; settings->rotate_log_storage_interval = 32; - testLogAndStateMachine(settings, 32, params.enable_compression); + testLogAndStateMachine(settings, 32, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 7; settings->rotate_log_storage_interval = 1; - testLogAndStateMachine(settings, 33, params.enable_compression); + testLogAndStateMachine(settings, 33, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 33, params.enable_compression); + testLogAndStateMachine(settings, 33, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 45, params.enable_compression); + testLogAndStateMachine(settings, 45, this->enable_compression); } } -TEST_P(CoordinationTest, TestEphemeralNodeRemove) +TYPED_TEST(CoordinationTest, TestEphemeralNodeRemove) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); std::shared_ptr request_c = std::make_shared(); @@ -1970,21 +2036,27 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) } -TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitted) +TYPED_TEST(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitted) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; - String digest = KeeperStorage::generateDigest(user_auth_data); + String digest = KeeperMemoryStorage::generateDigest(user_auth_data); std::shared_ptr auth_req = std::make_shared(); auth_req->scheme = "digest"; @@ -2020,20 +2092,25 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte EXPECT_EQ(acls[0].permissions, 31); } -TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) +TYPED_TEST(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; int64_t session_without_auth = 1; int64_t session_with_auth = 2; size_t term = 0; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); auto & storage = state_machine->getStorageUnsafe(); @@ -2200,22 +2277,26 @@ TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) } } -TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) +TYPED_TEST(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + using Storage = typename TestFixture::Storage; + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; - String digest = KeeperStorage::generateDigest(user_auth_data); + String digest = KeeperMemoryStorage::generateDigest(user_auth_data); std::shared_ptr auth_req = std::make_shared(); auth_req->scheme = "digest"; @@ -2258,17 +2339,17 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) } -TEST_P(CoordinationTest, TestRotateIntervalChanges) +TYPED_TEST(CoordinationTest, TestRotateIntervalChanges) { using namespace Coordination; - auto params = GetParam(); + ChangelogDirTest snapshots("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); { DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2284,12 +2365,12 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) } - EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + this->extension)); DB::KeeperLogStore changelog_1( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 10}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_1.init(0, 50); for (size_t i = 0; i < 55; ++i) { @@ -2302,13 +2383,13 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_1); - EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + this->extension)); DB::KeeperLogStore changelog_2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 7}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_2.init(98, 55); for (size_t i = 0; i < 17; ++i) @@ -2322,20 +2403,20 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_2); - keeper_context->setLastCommitIndex(105); + this->keeper_context->setLastCommitIndex(105); changelog_2.compact(105); std::this_thread::sleep_for(std::chrono::microseconds(1000)); - assertFileDeleted("./logs/changelog_1_100.bin" + params.extension); - EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension)); + assertFileDeleted("./logs/changelog_1_100.bin" + this->extension); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + this->extension)); DB::KeeperLogStore changelog_3( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_3.init(116, 3); for (size_t i = 0; i < 17; ++i) { @@ -2348,20 +2429,20 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_3); - keeper_context->setLastCommitIndex(125); + this->keeper_context->setLastCommitIndex(125); changelog_3.compact(125); std::this_thread::sleep_for(std::chrono::microseconds(1000)); - assertFileDeleted("./logs/changelog_101_110.bin" + params.extension); - assertFileDeleted("./logs/changelog_111_117.bin" + params.extension); - assertFileDeleted("./logs/changelog_118_124.bin" + params.extension); + assertFileDeleted("./logs/changelog_101_110.bin" + this->extension); + assertFileDeleted("./logs/changelog_111_117.bin" + this->extension); + assertFileDeleted("./logs/changelog_118_124.bin" + this->extension); - EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_132_136.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_137_141.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_132_136.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_137_141.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin" + this->extension)); } -TEST_P(CoordinationTest, TestSessionExpiryQueue) +TYPED_TEST(CoordinationTest, TestSessionExpiryQueue) { using namespace Coordination; SessionExpiryQueue queue(500); @@ -2379,16 +2460,15 @@ TEST_P(CoordinationTest, TestSessionExpiryQueue) } -TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) +TYPED_TEST(CoordinationTest, TestCompressedLogsMultipleRewrite) { using namespace Coordination; - auto test_params = GetParam(); ChangelogDirTest logs("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2403,9 +2483,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog); DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog1.init(0, 3); for (size_t i = 55; i < 70; ++i) { @@ -2419,9 +2499,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog1); DB::KeeperLogStore changelog2( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog2.init(0, 3); for (size_t i = 70; i < 80; ++i) { @@ -2433,16 +2513,21 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) } } -TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) +TYPED_TEST(CoordinationTest, TestStorageSnapshotDifferentCompressions) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); addNode(storage, "/hello1", "world", 1); addNode(storage, "/hello2", "somedata", 3); storage.session_id_counter = 5; @@ -2452,13 +2537,13 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) storage.getSessionID(130); storage.getSessionID(130); - DB::KeeperStorageSnapshot snapshot(&storage, 2); + DB::KeeperStorageSnapshot snapshot(&storage, 2); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 2); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + this->extension)); - DB::KeeperSnapshotManager new_manager(3, keeper_context, !params.enable_compression); + DB::KeeperSnapshotManager new_manager(3, this->keeper_context, !this->enable_compression); auto debuf = new_manager.deserializeSnapshotBufferFromDisk(2); @@ -2480,17 +2565,17 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) EXPECT_EQ(restored_storage->session_and_timeout.size(), 2); } -TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) +TYPED_TEST(CoordinationTest, ChangelogInsertThreeTimesSmooth) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); { - LOG_INFO(log, "================First time====================="); + LOG_INFO(this->log, "================First time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2500,11 +2585,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } { - LOG_INFO(log, "================Second time====================="); + LOG_INFO(this->log, "================Second time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2514,11 +2599,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } { - LOG_INFO(log, "================Third time====================="); + LOG_INFO(this->log, "================Third time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2528,11 +2613,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } { - LOG_INFO(log, "================Fourth time====================="); + LOG_INFO(this->log, "================Fourth time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2543,18 +2628,18 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } -TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) +TYPED_TEST(CoordinationTest, ChangelogInsertMultipleTimesSmooth) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); for (size_t i = 0; i < 36; ++i) { - LOG_INFO(log, "================First time====================="); + LOG_INFO(this->log, "================First time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t j = 0; j < 7; ++j) { @@ -2566,24 +2651,24 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) } DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); EXPECT_EQ(changelog.next_slot(), 36 * 7 + 1); } -TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) +TYPED_TEST(CoordinationTest, ChangelogInsertThreeTimesHard) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); { - LOG_INFO(log, "================First time====================="); + LOG_INFO(this->log, "================First time====================="); DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog1.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog1.append(entry); @@ -2593,11 +2678,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } { - LOG_INFO(log, "================Second time====================="); + LOG_INFO(this->log, "================Second time====================="); DB::KeeperLogStore changelog2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog2.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog2.append(entry); @@ -2607,11 +2692,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } { - LOG_INFO(log, "================Third time====================="); + LOG_INFO(this->log, "================Third time====================="); DB::KeeperLogStore changelog3( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog3.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog3.append(entry); @@ -2621,11 +2706,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } { - LOG_INFO(log, "================Fourth time====================="); + LOG_INFO(this->log, "================Fourth time====================="); DB::KeeperLogStore changelog4( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog4.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog4.append(entry); @@ -2635,18 +2720,23 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } } -TEST_P(CoordinationTest, TestStorageSnapshotEqual) +TYPED_TEST(CoordinationTest, TestStorageSnapshotEqual) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); std::optional snapshot_hash; for (size_t i = 0; i < 15; ++i) { - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); - DB::KeeperStorage storage(500, "", keeper_context); + Storage storage(500, "", this->keeper_context); addNode(storage, "/hello", ""); for (size_t j = 0; j < 5000; ++j) { @@ -2662,7 +2752,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotEqual) for (size_t j = 0; j < 3333; ++j) storage.getSessionID(130 * j); - DB::KeeperStorageSnapshot snapshot(&storage, storage.zxid); + DB::KeeperStorageSnapshot snapshot(&storage, storage.zxid); auto buf = manager.serializeSnapshotToBuffer(snapshot); @@ -2679,17 +2769,16 @@ TEST_P(CoordinationTest, TestStorageSnapshotEqual) } -TEST_P(CoordinationTest, TestLogGap) +TYPED_TEST(CoordinationTest, TestLogGap) { using namespace Coordination; - auto test_params = GetParam(); ChangelogDirTest logs("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2702,13 +2791,13 @@ TEST_P(CoordinationTest, TestLogGap) } DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog1.init(61, 3); /// Logs discarded - EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + test_params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + this->extension)); EXPECT_EQ(changelog1.start_index(), 61); EXPECT_EQ(changelog1.next_slot(), 61); } @@ -2720,12 +2809,17 @@ ResponseType getSingleResponse(const auto & responses) return dynamic_cast(*responses[0].response); } -TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) +TYPED_TEST(CoordinationTest, TestUncommittedStateBasicCrud) { using namespace DB; using namespace Coordination; - DB::KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; constexpr std::string_view path = "/test"; @@ -2837,12 +2931,17 @@ TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) ASSERT_FALSE(get_committed_data()); } -TEST_P(CoordinationTest, TestListRequestTypes) +TYPED_TEST(CoordinationTest, TestListRequestTypes) { using namespace DB; using namespace Coordination; - KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; int32_t zxid = 0; @@ -2919,18 +3018,18 @@ TEST_P(CoordinationTest, TestListRequestTypes) } } -TEST_P(CoordinationTest, TestDurableState) +TYPED_TEST(CoordinationTest, TestDurableState) { ChangelogDirTest logs("./logs"); - setLogDirectory("./logs"); - setStateFileDirectory("."); + this->setLogDirectory("./logs"); + this->setStateFileDirectory("."); auto state = nuraft::cs_new(); std::optional state_manager; const auto reload_state_manager = [&] { - state_manager.emplace(1, "localhost", 9181, keeper_context); + state_manager.emplace(1, "localhost", 9181, this->keeper_context); state_manager->loadLogStore(1, 0); }; @@ -2993,10 +3092,15 @@ TEST_P(CoordinationTest, TestDurableState) } } -TEST_P(CoordinationTest, TestFeatureFlags) +TYPED_TEST(CoordinationTest, TestFeatureFlags) { using namespace Coordination; - KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; auto request = std::make_shared(); request->path = DB::keeper_api_feature_flags_path; auto responses = storage.processRequest(request, 0, std::nullopt, true, true); @@ -3008,14 +3112,19 @@ TEST_P(CoordinationTest, TestFeatureFlags) ASSERT_FALSE(feature_flags.isEnabled(KeeperFeatureFlag::CHECK_NOT_EXISTS)); } -TEST_P(CoordinationTest, TestSystemNodeModify) +TYPED_TEST(CoordinationTest, TestSystemNodeModify) { using namespace Coordination; int64_t zxid{0}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + // On INIT we abort when a system path is modified - keeper_context->setServerState(KeeperContext::Phase::RUNNING); - KeeperStorage storage{500, "", keeper_context}; + this->keeper_context->setServerState(KeeperContext::Phase::RUNNING); + Storage storage{500, "", this->keeper_context}; const auto assert_create = [&](const std::string_view path, const auto expected_code) { auto request = std::make_shared(); @@ -3040,11 +3149,11 @@ TEST_P(CoordinationTest, TestSystemNodeModify) assert_create("/keeper1/test", Error::ZOK); } -TEST_P(CoordinationTest, ChangelogTestMaxLogSize) +TYPED_TEST(CoordinationTest, ChangelogTestMaxLogSize) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); uint64_t last_entry_index{0}; size_t i{0}; @@ -3052,9 +3161,9 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) SCOPED_TRACE("Small rotation interval, big size limit"); DB::KeeperLogStore changelog( DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}, + .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (; i < 100; ++i) @@ -3072,9 +3181,9 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) SCOPED_TRACE("Large rotation interval, small size limit"); DB::KeeperLogStore changelog( DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100'000, .max_size = 4000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); @@ -3094,20 +3203,25 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) SCOPED_TRACE("Final verify all logs"); DB::KeeperLogStore changelog( DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100'000, .max_size = 4000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); } } -TEST_P(CoordinationTest, TestCheckNotExistsRequest) +TYPED_TEST(CoordinationTest, TestCheckNotExistsRequest) { using namespace DB; using namespace Coordination; - KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; int32_t zxid = 0; @@ -3175,18 +3289,23 @@ TEST_P(CoordinationTest, TestCheckNotExistsRequest) } } -TEST_P(CoordinationTest, TestReapplyingDeltas) +TYPED_TEST(CoordinationTest, TestReapplyingDeltas) { using namespace DB; using namespace Coordination; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + static constexpr int64_t initial_zxid = 100; const auto create_request = std::make_shared(); create_request->path = "/test/data"; create_request->is_sequential = true; - const auto process_create = [](KeeperStorage & storage, const auto & request, int64_t zxid) + const auto process_create = [](Storage & storage, const auto & request, int64_t zxid) { storage.preprocessRequest(request, 1, 0, zxid); auto responses = storage.processRequest(request, 1, zxid); @@ -3207,19 +3326,19 @@ TEST_P(CoordinationTest, TestReapplyingDeltas) process_create(storage, create_request, zxid); }; - KeeperStorage storage1{500, "", keeper_context}; + Storage storage1{500, "", this->keeper_context}; commit_initial_data(storage1); for (int64_t zxid = initial_zxid + 1; zxid < initial_zxid + 50; ++zxid) storage1.preprocessRequest(create_request, 1, 0, zxid, /*check_acl=*/true, /*digest=*/std::nullopt, /*log_idx=*/zxid); /// create identical new storage - KeeperStorage storage2{500, "", keeper_context}; + Storage storage2{500, "", this->keeper_context}; commit_initial_data(storage2); storage1.applyUncommittedState(storage2, initial_zxid); - const auto commit_unprocessed = [&](KeeperStorage & storage) + const auto commit_unprocessed = [&](Storage & storage) { for (int64_t zxid = initial_zxid + 1; zxid < initial_zxid + 50; ++zxid) { @@ -3232,7 +3351,7 @@ TEST_P(CoordinationTest, TestReapplyingDeltas) commit_unprocessed(storage1); commit_unprocessed(storage2); - const auto get_children = [&](KeeperStorage & storage) + const auto get_children = [&](Storage & storage) { const auto list_request = std::make_shared(); list_request->path = "/test"; @@ -3252,8 +3371,8 @@ TEST_P(CoordinationTest, TestReapplyingDeltas) ASSERT_TRUE(children1_set == children2_set); } -INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, - CoordinationTest, - ::testing::ValuesIn(std::initializer_list{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}})); +/// INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, +/// CoordinationTest, +/// ::testing::ValuesIn(std::initializer_list{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}})); #endif diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index cd7f1437cf7..59761d827e1 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -786,7 +786,7 @@ struct SetupNodeCollector if (snapshot_result.storage == nullptr) { std::cerr << "No initial snapshot found" << std::endl; - initial_storage = std::make_unique( + initial_storage = std::make_unique( /* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); initial_storage->initializeSystemNodes(); } @@ -932,7 +932,7 @@ struct SetupNodeCollector std::cerr << "Generating snapshot with starting data" << std::endl; DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(initial_storage->getZXID(), 1, std::make_shared()); - DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); + DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); snapshot_manager->serializeSnapshotToDisk(snapshot); new_nodes = false; @@ -940,9 +940,9 @@ struct SetupNodeCollector std::mutex nodes_mutex; DB::KeeperContextPtr keeper_context; - Coordination::KeeperStoragePtr initial_storage; + std::shared_ptr initial_storage; std::unordered_set nodes_created_during_replay; - std::optional snapshot_manager; + std::optional> snapshot_manager; bool new_nodes = false; }; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index db6dac52190..60170d67a2e 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -16,7 +16,7 @@ using namespace Coordination; using namespace DB; -void dumpMachine(std::shared_ptr machine) +void dumpMachine(std::shared_ptr> machine) { auto & storage = machine->getStorageUnsafe(); std::queue keys; @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", argv[1])); - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index();