Merge pull request #15592 from sundy-li/readonly-metrics

sub ReadonlyReplica metric when detach readonly tables
This commit is contained in:
alexey-milovidov 2020-10-20 19:17:52 +03:00 committed by GitHub
commit 75a02112bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 96 additions and 0 deletions

View File

@ -71,7 +71,10 @@ void ReplicatedMergeTreeRestartingThread::run()
bool old_val = false;
if (storage.is_readonly.compare_exchange_strong(old_val, true))
{
incr_readonly = true;
CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
}
partialShutdown();
}
@ -112,7 +115,10 @@ void ReplicatedMergeTreeRestartingThread::run()
bool old_val = true;
if (storage.is_readonly.compare_exchange_strong(old_val, false))
{
incr_readonly = false;
CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
}
first_time = false;
}
@ -349,6 +355,13 @@ void ReplicatedMergeTreeRestartingThread::shutdown()
task->deactivate();
LOG_TRACE(log, "Restarting thread finished");
/// For detach table query, we should reset the ReadonlyReplica metric.
if (incr_readonly)
{
CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
incr_readonly = false;
}
/// Stop other tasks.
partialShutdown();
}

View File

@ -36,6 +36,9 @@ private:
Poco::Logger * log;
std::atomic<bool> need_stop {false};
// We need it besides `storage.is_readonly`, bacause `shutdown()` may be called many times, that way `storage.is_readonly` will not change.
bool incr_readonly = false;
/// The random data we wrote into `/replicas/me/is_active`.
String active_node_identifier;

View File

@ -0,0 +1,19 @@
<yandex>
<remote_servers>
<test_cluster>
<shard>
<internal_replication>true</internal_replication>
<replica>
<default_database>shard_0</default_database>
<host>node1</host>
<port>9000</port>
</replica>
<replica>
<default_database>shard_0</default_database>
<host>node2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</remote_servers>
</yandex>

View File

@ -0,0 +1,61 @@
import time
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
from helpers.network import PartitionManager
def fill_nodes(nodes, shard):
for node in nodes:
node.query(
'''
CREATE DATABASE test;
CREATE TABLE test.test_table(date Date, id UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0;
'''.format(shard=shard, replica=node.name))
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
fill_nodes([node1, node2], 1)
yield cluster
except Exception as ex:
print(ex)
finally:
cluster.shutdown()
def test_readonly_metrics(start_cluster):
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'") == "0\n"
with PartitionManager() as pm:
## make node1 readonly -> heal -> readonly -> heal -> detach table -> heal -> attach table
pm.drop_instance_zk_connections(node1)
assert_eq_with_retry(node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "1\n", retry_count=300, sleep_time=1)
pm.heal_all()
assert_eq_with_retry(node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "0\n", retry_count=300, sleep_time=1)
pm.drop_instance_zk_connections(node1)
assert_eq_with_retry(node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "1\n", retry_count=300, sleep_time=1)
node1.query("DETACH TABLE test.test_table")
assert "0\n" == node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'")
pm.heal_all()
node1.query("ATTACH TABLE test.test_table")
assert_eq_with_retry(node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "0\n", retry_count=300, sleep_time=1)