From 7fc8ee726e3ef2dfb7d778fbb1a70fb147a33067 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Thu, 18 Jul 2024 12:32:16 +0200 Subject: [PATCH 01/15] add replication lag and recovery time metrics --- src/Databases/DatabaseReplicated.cpp | 44 +++++++++---------- src/Databases/DatabaseReplicated.h | 10 ++++- src/Databases/DatabaseReplicatedWorker.cpp | 4 ++ src/Databases/DatabaseReplicatedWorker.h | 4 ++ src/Storages/System/StorageSystemClusters.cpp | 33 +++++++++----- src/Storages/System/StorageSystemClusters.h | 2 +- .../test_recovery_time_metric/__init__.py | 0 .../configs/config.xml | 41 +++++++++++++++++ .../test_recovery_time_metric/test.py | 26 +++++++++++ 9 files changed, 129 insertions(+), 35 deletions(-) create mode 100644 tests/integration/test_recovery_time_metric/__init__.py create mode 100644 tests/integration/test_recovery_time_metric/configs/config.xml create mode 100644 tests/integration/test_recovery_time_metric/test.py diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7ce2859e962..b11b9382732 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -338,42 +338,40 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const return std::make_shared(getContext()->getSettingsRef(), shards, params); } -std::vector DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const +ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const { - Strings paths; + ReplicasInfo res; + + auto zookeeper = getZooKeeper(); const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); - for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) - { - for (const auto & replica : addresses_with_failover[shard_index]) - { - String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); - paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); - } - } try { - auto current_zookeeper = getZooKeeper(); - auto res = current_zookeeper->exists(paths); + UInt32 max_log_ptr = parse(zookeeper->get(zookeeper_path + "/max_log_ptr")); - std::vector statuses; - statuses.resize(paths.size()); - - for (size_t i = 0; i < res.size(); ++i) - if (res[i].error == Coordination::Error::ZOK) - statuses[i] = 1; - - return statuses; - } - catch (...) + for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) + { + for (const auto & replica : addresses_with_failover[shard_index]) + { + String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); + UInt32 log_ptr = parse(zookeeper->get(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr")); + bool is_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / full_name / "active"); + res.push_back(ReplicaInfo{ + .is_active = is_active, + .replication_lag = max_log_ptr - log_ptr, + .recovery_time = replica.is_local ? ddl_worker->getCurrentInitializationDurationMs() : 0, + }); + } + } + return res; + } catch (...) { tryLogCurrentException(log); return {}; } } - void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) { const auto & config_prefix = fmt::format("named_collections.{}", collection_name); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index eab5b2ff931..db02b5ef30f 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -17,6 +17,14 @@ using ZooKeeperPtr = std::shared_ptr; class Cluster; using ClusterPtr = std::shared_ptr; +struct ReplicaInfo +{ + bool is_active; + UInt32 replication_lag; + UInt64 recovery_time; +}; +using ReplicasInfo = std::vector; + class DatabaseReplicated : public DatabaseAtomic { public: @@ -84,7 +92,7 @@ public: static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop); - std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; + ReplicasInfo tryGetReplicasInfo(const ClusterPtr & cluster_) const; void renameDatabase(ContextPtr query_context, const String & new_name) override; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 1ef88dc03bc..cea2d123f87 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,6 +32,8 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { + initialization_duration_timer.emplace(); + while (!stop_flag) { try @@ -69,6 +71,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; + initialization_duration_timer.reset(); return true; } catch (...) @@ -78,6 +81,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } + initialization_duration_timer.reset(); return false; } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 41edf2221b8..aea3b71173d 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -36,6 +36,8 @@ public: DatabaseReplicated * const database, bool committed = false); /// NOLINT UInt32 getLogPointer() const; + + UInt64 getCurrentInitializationDurationMs() const { return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; } private: bool initializeMainThread() override; void initializeReplication(); @@ -56,6 +58,8 @@ private: ZooKeeperPtr active_node_holder_zookeeper; /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; + + std::optional initialization_duration_timer; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 160c8d6270e..0da4bd70cbd 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -31,6 +31,8 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription() {"database_shard_name", std::make_shared(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."}, {"database_replica_name", std::make_shared(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."}, {"is_active", std::make_shared(std::make_shared()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."}, + {"replication_lag", std::make_shared(std::make_shared()), "The replication lag of the `Replicated` database replica (for clusters that belong to a Replicated database)."}, + {"recovery_time", std::make_shared(std::make_shared()), "The recovery time of the `Replicated` database replica (for clusters that belong to a Replicated database), in milliseconds."}, }; description.setAliases({ @@ -46,24 +48,23 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co writeCluster(res_columns, name_and_cluster, {}); const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & name_and_database : databases) + for (const auto & [database_name, database] : databases) { - if (const auto * replicated = typeid_cast(name_and_database.second.get())) + if (const auto * replicated = typeid_cast(database.get())) { - if (auto database_cluster = replicated->tryGetCluster()) - writeCluster(res_columns, {name_and_database.first, database_cluster}, - replicated->tryGetAreReplicasActive(database_cluster)); + writeCluster(res_columns, {database_name, database_cluster}, + replicated->tryGetReplicasInfo(database_cluster)); if (auto database_cluster = replicated->tryGetAllGroupsCluster()) - writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, - replicated->tryGetAreReplicasActive(database_cluster)); + writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + database_name, database_cluster}, + replicated->tryGetReplicasInfo(database_cluster)); } } } void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, - const std::vector & is_active) + const ReplicasInfo & replicas_info) { const String & cluster_name = name_and_cluster.first; const ClusterPtr & cluster = name_and_cluster.second; @@ -99,10 +100,22 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count()); res_columns[i++]->insert(address.database_shard_name); res_columns[i++]->insert(address.database_replica_name); - if (is_active.empty()) + if (replicas_info.empty()) + { res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + } else - res_columns[i++]->insert(is_active[replica_idx++]); + { + const auto & replica_info = replicas_info[replica_idx++]; + res_columns[i++]->insert(replica_info.is_active); + res_columns[i++]->insert(replica_info.replication_lag); + if (replica_info.recovery_time != 0) + res_columns[i++]->insert(replica_info.recovery_time); + else + res_columns[i++]->insertDefault(); + } } } } diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 0f7c792261d..ead123aa79e 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -27,7 +27,7 @@ protected: using NameAndCluster = std::pair>; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; - static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector & is_active); + static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const ReplicasInfo & replicas_info); }; } diff --git a/tests/integration/test_recovery_time_metric/__init__.py b/tests/integration/test_recovery_time_metric/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_recovery_time_metric/configs/config.xml b/tests/integration/test_recovery_time_metric/configs/config.xml new file mode 100644 index 00000000000..bad9b1fa9ea --- /dev/null +++ b/tests/integration/test_recovery_time_metric/configs/config.xml @@ -0,0 +1,41 @@ + + 9000 + + + + + + + + + default + + + + + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 20000 + + + + 1 + localhost + 9444 + + + + + + + localhost + 2181 + + 20000 + + + diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py new file mode 100644 index 00000000000..9ceb0cce288 --- /dev/null +++ b/tests/integration/test_recovery_time_metric/test.py @@ -0,0 +1,26 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_recovery_time_metric(start_cluster): + node.query("CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');") + node.query("CREATE TABLE rdb.t (x UInt32) ENGINE = MergeTree ORDER BY x;") + node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) + node.restart_clickhouse() + assert ( + node.query("SELECT any(recovery_time) FROM system.clusters;") + != "0\n" + ) + From a6d4db342b2fc83e385d549ba5ce9ebf9e63064e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 18 Jul 2024 16:45:40 +0000 Subject: [PATCH 02/15] Automatic style fix --- .../integration/test_recovery_time_metric/test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 9ceb0cce288..90155f81ba2 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -2,7 +2,9 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/config.xml"], with_zookeeper=True) +node = cluster.add_instance( + "node", main_configs=["configs/config.xml"], with_zookeeper=True +) @pytest.fixture(scope="module") @@ -15,12 +17,10 @@ def start_cluster(): def test_recovery_time_metric(start_cluster): - node.query("CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');") + node.query( + "CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');" + ) node.query("CREATE TABLE rdb.t (x UInt32) ENGINE = MergeTree ORDER BY x;") node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) node.restart_clickhouse() - assert ( - node.query("SELECT any(recovery_time) FROM system.clusters;") - != "0\n" - ) - + assert node.query("SELECT any(recovery_time) FROM system.clusters;") != "0\n" From 3f66b39a18a7bf271a9a9f97dfc075866e2409eb Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 19 Jul 2024 14:30:27 +0200 Subject: [PATCH 03/15] test replication lag metric --- .../0_stateless/03206_replication_lag_metric.reference | 4 ++++ .../queries/0_stateless/03206_replication_lag_metric.sql | 9 +++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03206_replication_lag_metric.reference create mode 100644 tests/queries/0_stateless/03206_replication_lag_metric.sql diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.reference b/tests/queries/0_stateless/03206_replication_lag_metric.reference new file mode 100644 index 00000000000..02f4a7264b1 --- /dev/null +++ b/tests/queries/0_stateless/03206_replication_lag_metric.reference @@ -0,0 +1,4 @@ +0 +2 +0 +2 diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.sql b/tests/queries/0_stateless/03206_replication_lag_metric.sql new file mode 100644 index 00000000000..6b86553fcaf --- /dev/null +++ b/tests/queries/0_stateless/03206_replication_lag_metric.sql @@ -0,0 +1,9 @@ +CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1'); +CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2'); + +SET distributed_ddl_task_timeout = 0; +CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id; +SELECT replication_lag FROM system.clusters; + +DROP DATABASE rdb1; +DROP DATABASE rdb2; From 245626e5789064fda39ccc7288b83162284a3617 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 19 Jul 2024 14:30:44 +0200 Subject: [PATCH 04/15] small fix --- src/Storages/System/StorageSystemClusters.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index ead123aa79e..f6e08734896 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -1,10 +1,10 @@ #pragma once +#include #include #include #include - namespace DB { From 79ef630d85cb445a743ee2d5950197709d75325f Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 19 Jul 2024 15:25:08 +0200 Subject: [PATCH 05/15] fix tests --- .../0_stateless/02117_show_create_table_system.reference | 2 ++ tests/queries/0_stateless/03206_replication_lag_metric.sql | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 8f62eda9233..28356632a66 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -52,6 +52,8 @@ CREATE TABLE system.clusters `database_shard_name` String, `database_replica_name` String, `is_active` Nullable(UInt8), + `replication_lag` Nullable(UInt32), + `recovery_time` Nullable(UInt64), `name` String ALIAS cluster ) ENGINE = SystemClusters diff --git a/tests/queries/0_stateless/03206_replication_lag_metric.sql b/tests/queries/0_stateless/03206_replication_lag_metric.sql index 6b86553fcaf..998c332a11c 100644 --- a/tests/queries/0_stateless/03206_replication_lag_metric.sql +++ b/tests/queries/0_stateless/03206_replication_lag_metric.sql @@ -1,9 +1,11 @@ +-- Tags: no-parallel + CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1'); CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2'); SET distributed_ddl_task_timeout = 0; CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id; -SELECT replication_lag FROM system.clusters; +SELECT replication_lag FROM system.clusters WHERE cluster IN ('rdb1', 'rdb2') ORDER BY cluster ASC, replica_num ASC; DROP DATABASE rdb1; DROP DATABASE rdb2; From c2ac13291f3bf201f7189bd36f2c9be7c06aa886 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 09:06:13 +0100 Subject: [PATCH 06/15] fix tests --- tests/integration/test_recovery_time_metric/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 90155f81ba2..e4a44103b76 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True + "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True, ) From d040e436f3a1f8594070b04ec10cbf7391f6994a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 22 Jul 2024 08:18:58 +0000 Subject: [PATCH 07/15] Automatic style fix --- tests/integration/test_recovery_time_metric/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index e4a44103b76..4dad844b950 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -3,7 +3,10 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True, + "node", + main_configs=["configs/config.xml"], + with_zookeeper=True, + stay_alive=True, ) From 5fa2db8e4828b004ac10d625df62efcc8711dc98 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 16:59:17 +0100 Subject: [PATCH 08/15] fix 01293_show_clusters stateless test --- tests/queries/0_stateless/01293_show_clusters.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index e140f207022..9569fcf2e37 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,3 +1,3 @@ test_shard_localhost -test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default -test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default +test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default 0 NULL +test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default 0 NULL From 7f5c58f599d34f690c4a04e4223a2f86a433d0e9 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Mon, 22 Jul 2024 20:58:56 +0100 Subject: [PATCH 09/15] fxs --- tests/queries/0_stateless/01293_show_clusters.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index 9569fcf2e37..e140f207022 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,3 +1,3 @@ test_shard_localhost -test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default 0 NULL -test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default 0 NULL +test_cluster_one_shard_two_replicas 1 1 0 1 127.0.0.1 127.0.0.1 9000 1 default +test_cluster_one_shard_two_replicas 1 1 0 2 127.0.0.2 127.0.0.2 9000 0 default From 932033fca9bdacbfdb544fac5389e03fa7732eeb Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 23 Jul 2024 10:55:45 +0100 Subject: [PATCH 10/15] use atomic to avoid data race --- src/Databases/DatabaseReplicatedWorker.cpp | 12 +++++++++--- src/Databases/DatabaseReplicatedWorker.h | 5 +++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index cea2d123f87..a9a74c5f56a 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,7 +32,8 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { - initialization_duration_timer.emplace(); + initialization_duration_timer.restart(); + initializing.store(true, std::memory_order_release); while (!stop_flag) { @@ -71,7 +72,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; - initialization_duration_timer.reset(); + initializing.store(false, std::memory_order_relaxed); return true; } catch (...) @@ -81,7 +82,7 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } - initialization_duration_timer.reset(); + initializing.store(false, std::memory_order_relaxed); return false; } @@ -463,4 +464,9 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const return max_id.load(); } +UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const +{ + return initializing.load(std::memory_order_acquire) ? initialization_duration_timer.elapsedMilliseconds() : 0; +} + } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index aea3b71173d..3e5887be825 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -37,7 +37,7 @@ public: UInt32 getLogPointer() const; - UInt64 getCurrentInitializationDurationMs() const { return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; } + UInt64 getCurrentInitializationDurationMs() const; private: bool initializeMainThread() override; void initializeReplication(); @@ -59,7 +59,8 @@ private: /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; - std::optional initialization_duration_timer; + Stopwatch initialization_duration_timer; + std::atomic initializing = false; }; } From aa26291ff25b16539efa3d50a540fc32a05b702d Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 30 Jul 2024 18:15:24 +0100 Subject: [PATCH 11/15] fxs --- src/Databases/DatabaseReplicated.cpp | 57 +++++++++++++++---- src/Databases/DatabaseReplicatedWorker.cpp | 21 +++++-- src/Databases/DatabaseReplicatedWorker.h | 4 +- src/Storages/System/StorageSystemClusters.cpp | 6 +- .../test_recovery_time_metric/test.py | 36 ++++++++++-- 5 files changed, 98 insertions(+), 26 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index b11b9382732..06cea65d62e 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -340,31 +341,63 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const { - ReplicasInfo res; + Strings paths_get, paths_exists; + + paths_get.emplace_back(fs::path(zookeeper_path) / "max_log_ptr"); - auto zookeeper = getZooKeeper(); const auto & addresses_with_failover = cluster_->getShardsAddresses(); const auto & shards_info = cluster_->getShardsInfo(); - + for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) + { + for (const auto & replica : addresses_with_failover[shard_index]) + { + String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); + paths_exists.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active"); + paths_get.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr"); + } + } + try { - UInt32 max_log_ptr = parse(zookeeper->get(zookeeper_path + "/max_log_ptr")); + auto current_zookeeper = getZooKeeper(); + auto get_res = current_zookeeper->get(paths_get); + auto exist_res = current_zookeeper->exists(paths_exists); + chassert(get_res.size() == exist_res.size() + 1); + auto max_log_ptr_zk = get_res[0]; + if (max_log_ptr_zk.error != Coordination::Error::ZOK) + throw Coordination::Exception(max_log_ptr_zk.error); + + UInt32 max_log_ptr = parse(max_log_ptr_zk.data); + + ReplicasInfo replicas_info; + replicas_info.resize(exist_res.size()); + + size_t global_replica_index = 0; for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { for (const auto & replica : addresses_with_failover[shard_index]) { - String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name); - UInt32 log_ptr = parse(zookeeper->get(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr")); - bool is_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / full_name / "active"); - res.push_back(ReplicaInfo{ - .is_active = is_active, - .replication_lag = max_log_ptr - log_ptr, + auto replica_active = exist_res[global_replica_index]; + auto replica_log_ptr = get_res[global_replica_index + 1]; + + if (replica_active.error != Coordination::Error::ZOK && replica_active.error != Coordination::Error::ZNONODE) + throw Coordination::Exception(replica_active.error); + + if (replica_log_ptr.error != Coordination::Error::ZOK) + throw Coordination::Exception(replica_log_ptr.error); + + replicas_info[global_replica_index] = ReplicaInfo{ + .is_active = replica_active.error == Coordination::Error::ZOK, + .replication_lag = max_log_ptr - parse(replica_log_ptr.data), .recovery_time = replica.is_local ? ddl_worker->getCurrentInitializationDurationMs() : 0, - }); + }; + + ++global_replica_index; } } - return res; + + return replicas_info; } catch (...) { tryLogCurrentException(log); diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index a9a74c5f56a..4e7408aa96e 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -32,8 +32,11 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db bool DatabaseReplicatedDDLWorker::initializeMainThread() { - initialization_duration_timer.restart(); - initializing.store(true, std::memory_order_release); + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.emplace(); + initialization_duration_timer->start(); + } while (!stop_flag) { @@ -72,7 +75,10 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() initializeReplication(); initialized = true; - initializing.store(false, std::memory_order_relaxed); + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.reset(); + } return true; } catch (...) @@ -82,7 +88,11 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() } } - initializing.store(false, std::memory_order_relaxed); + { + std::lock_guard lock(initialization_duration_timer_mutex); + initialization_duration_timer.reset(); + } + return false; } @@ -466,7 +476,8 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const { - return initializing.load(std::memory_order_acquire) ? initialization_duration_timer.elapsedMilliseconds() : 0; + std::lock_guard lock(initialization_duration_timer_mutex); + return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0; } } diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 3e5887be825..2309c831839 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -59,8 +59,8 @@ private: /// It will remove "active" node when database is detached zkutil::EphemeralNodeHolderPtr active_node_holder; - Stopwatch initialization_duration_timer; - std::atomic initializing = false; + std::optional initialization_duration_timer; + mutable std::mutex initialization_duration_timer_mutex; }; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 0da4bd70cbd..4b9802c9089 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -71,7 +71,7 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam const auto & shards_info = cluster->getShardsInfo(); const auto & addresses_with_failover = cluster->getShardsAddresses(); - size_t replica_idx = 0; + size_t global_replica_idx = 0; for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { const auto & shard_info = shards_info[shard_index]; @@ -108,7 +108,7 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam } else { - const auto & replica_info = replicas_info[replica_idx++]; + const auto & replica_info = replicas_info[global_replica_idx]; res_columns[i++]->insert(replica_info.is_active); res_columns[i++]->insert(replica_info.replication_lag); if (replica_info.recovery_time != 0) @@ -116,6 +116,8 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam else res_columns[i++]->insertDefault(); } + + ++global_replica_idx; } } } diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 4dad844b950..8f369d7759c 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -5,7 +5,6 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", main_configs=["configs/config.xml"], - with_zookeeper=True, stay_alive=True, ) @@ -21,9 +20,36 @@ def start_cluster(): def test_recovery_time_metric(start_cluster): node.query( - "CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1');" + """ + CREATE DATABASE rdb + ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1') + """ ) - node.query("CREATE TABLE rdb.t (x UInt32) ENGINE = MergeTree ORDER BY x;") - node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) + + node.query( + """ + CREATE TABLE rdb.t + ( + `x` UInt32 + ) + ENGINE = MergeTree + ORDER BY x + """ + ) + + node.exec_in_container( + ["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"] + ) + node.restart_clickhouse() - assert node.query("SELECT any(recovery_time) FROM system.clusters;") != "0\n" + + ret = int( + node.query( + """ + SELECT recovery_time + FROM system.clusters + WHERE cluster = 'rdb' + """ + ).strip() + ) + assert ret > 0 From 8d2b804c670d0941acc4fff059859017c1bd93c2 Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Tue, 30 Jul 2024 21:57:08 +0100 Subject: [PATCH 12/15] fxs --- tests/integration/test_recovery_time_metric/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 8f369d7759c..628f2e744e0 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -37,9 +37,7 @@ def test_recovery_time_metric(start_cluster): """ ) - node.exec_in_container( - ["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"] - ) + node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"]) node.restart_clickhouse() From e664a144788b48c029f56548242baaeed82a80ff Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Wed, 31 Jul 2024 08:49:14 +0100 Subject: [PATCH 13/15] fix style --- src/Databases/DatabaseReplicated.cpp | 4 ++-- src/Storages/System/StorageSystemClusters.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 06cea65d62e..b2be593d326 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -356,7 +356,7 @@ ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) paths_get.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr"); } } - + try { auto current_zookeeper = getZooKeeper(); @@ -396,7 +396,7 @@ ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) ++global_replica_index; } } - + return replicas_info; } catch (...) { diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 4b9802c9089..d03b600b6ef 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -116,7 +116,7 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam else res_columns[i++]->insertDefault(); } - + ++global_replica_idx; } } From 27f4f468b976e445e8b0dbc198ea9f0a9c62855b Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Wed, 31 Jul 2024 14:55:00 +0100 Subject: [PATCH 14/15] make it possible to rerun test_recovery_time_metric multiple times --- tests/integration/test_recovery_time_metric/test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/test_recovery_time_metric/test.py b/tests/integration/test_recovery_time_metric/test.py index 628f2e744e0..6fcf2fad423 100644 --- a/tests/integration/test_recovery_time_metric/test.py +++ b/tests/integration/test_recovery_time_metric/test.py @@ -21,6 +21,7 @@ def start_cluster(): def test_recovery_time_metric(start_cluster): node.query( """ + DROP DATABASE IF EXISTS rdb; CREATE DATABASE rdb ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1') """ @@ -28,6 +29,7 @@ def test_recovery_time_metric(start_cluster): node.query( """ + DROP TABLE IF EXISTS rdb.t; CREATE TABLE rdb.t ( `x` UInt32 @@ -51,3 +53,9 @@ def test_recovery_time_metric(start_cluster): ).strip() ) assert ret > 0 + + node.query( + """ + DROP DATABASE rdb + """ + ) From 01ca36cb5a157ab961dbd4460acc7e2ebb37e72a Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Fri, 2 Aug 2024 09:37:47 +0100 Subject: [PATCH 15/15] empty