diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 27e870bda78..6a1217299d4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -71,7 +71,10 @@ void ReplicatedMergeTreeRestartingThread::run() bool old_val = false; if (storage.is_readonly.compare_exchange_strong(old_val, true)) + { + incr_readonly = true; CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); + } partialShutdown(); } @@ -112,7 +115,10 @@ void ReplicatedMergeTreeRestartingThread::run() bool old_val = true; if (storage.is_readonly.compare_exchange_strong(old_val, false)) + { + incr_readonly = false; CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + } first_time = false; } @@ -349,6 +355,13 @@ void ReplicatedMergeTreeRestartingThread::shutdown() task->deactivate(); LOG_TRACE(log, "Restarting thread finished"); + // For detach table query, we should reset the ReadonlyReplica metric. + if (incr_readonly) + { + CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + incr_readonly = false; + } + /// Stop other tasks. partialShutdown(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index bb032d9df8c..986253a2206 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -36,6 +36,9 @@ private: Poco::Logger * log; std::atomic need_stop {false}; + // We need it besides `storage.is_readonly`, bacause `shutdown()` may be called many times, that way `storage.is_readonly` will not change. + bool incr_readonly = false; + /// The random data we wrote into `/replicas/me/is_active`. String active_node_identifier; diff --git a/tests/integration/test_system_metrics/__init__.py b/tests/integration/test_system_metrics/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_metrics/configs/remote_servers.xml b/tests/integration/test_system_metrics/configs/remote_servers.xml new file mode 100644 index 00000000000..a6e80ce2b08 --- /dev/null +++ b/tests/integration/test_system_metrics/configs/remote_servers.xml @@ -0,0 +1,19 @@ + + + + + true + + shard_0 + node1 + 9000 + + + shard_0 + node2 + 9000 + + + + + diff --git a/tests/integration/test_system_metrics/test.py b/tests/integration/test_system_metrics/test.py new file mode 100644 index 00000000000..60035b446fe --- /dev/null +++ b/tests/integration/test_system_metrics/test.py @@ -0,0 +1,62 @@ +import time + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry +from helpers.network import PartitionManager + + +def fill_nodes(nodes, shard): + for node in nodes: + node.query( + ''' + CREATE DATABASE test; + + CREATE TABLE test.test_table(date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + '''.format(shard=shard, replica=node.name)) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + fill_nodes([node1, node2], 1) + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + +def test_readonly_metrics(start_cluster): + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") == "0\n" + + with PartitionManager() as pm: + ## make node1 readonly -> heal -> readonly -> heal -> detach table -> heal -> attach table + pm.drop_instance_zk_connections(node1) + time.sleep(3) + assert "1\n" == node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") + + pm.heal_all() + time.sleep(3) + assert "0\n" == node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") + + pm.drop_instance_zk_connections(node1) + time.sleep(3) + assert "1\n" == node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") + node1.query("DETACH TABLE test.test_table") + assert "0\n" == node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") + + pm.heal_all() + node1.query("ATTACH TABLE test.test_table") + time.sleep(5) + assert "0\n" == node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'")