diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index e711d9a7784..15426eefbcc 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -50,6 +50,7 @@ last_queue_update: 2021-10-12 14:50:08 absolute_delay: 99 total_replicas: 5 active_replicas: 5 +lost_part_count: 0 last_queue_update_exception: zookeeper_exception: replica_is_active: {'r1':1,'r2':1} @@ -90,6 +91,7 @@ The next 4 columns have a non-zero value only where there is an active session w - `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. - `total_replicas` (`UInt8`) - The total number of known replicas of this table. - `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas). +- `lost_part_count` (`UInt64`) - The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase. - `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions. - `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper. - `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active. diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h index b9f84091e9b..46e971f562a 100644 --- a/src/Storages/MergeTree/ReplicatedTableStatus.h +++ b/src/Storages/MergeTree/ReplicatedTableStatus.h @@ -25,6 +25,7 @@ struct ReplicatedTableStatus UInt64 absolute_delay; UInt8 total_replicas; UInt8 active_replicas; + UInt64 lost_part_count; String last_queue_update_exception; /// If the error has happened fetching the info from ZooKeeper, this field will be set. String zookeeper_exception; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e538b4fbe6c..5e99426ba7b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -652,6 +652,8 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/alter_partition_version", String(), zkutil::CreateMode::Persistent)); /// For deduplication of async inserts futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/async_blocks", String(), zkutil::CreateMode::Persistent)); + /// To track "lost forever" parts count, just for `system.replicas` table + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/lost_part_count", String(), zkutil::CreateMode::Persistent)); /// As for now, "/temp" node must exist, but we want to be able to remove it in future if (zookeeper->exists(zookeeper_path + "/temp")) @@ -5968,6 +5970,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit res.log_pointer = 0; res.total_replicas = 0; res.active_replicas = 0; + res.lost_part_count = 0; res.last_queue_update_exception = getLastQueueUpdateException(); if (with_zk_fields && !res.is_session_expired) @@ -5984,6 +5987,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit paths.clear(); paths.push_back(fs::path(replica_path) / "log_pointer"); + paths.push_back(fs::path(zookeeper_path) / "lost_part_count"); for (const String & replica : all_replicas) paths.push_back(fs::path(zookeeper_path) / "replicas" / replica / "is_active"); @@ -6001,10 +6005,14 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit res.log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); res.total_replicas = all_replicas.size(); + if (get_result[1].error == Coordination::Error::ZNONODE) + res.lost_part_count = 0; + else + res.lost_part_count = get_result[1].data.empty() ? 0 : parse(get_result[1].data); for (size_t i = 0, size = all_replicas.size(); i < size; ++i) { - bool is_replica_active = get_result[i + 1].error != Coordination::Error::ZNONODE; + bool is_replica_active = get_result[i + 2].error != Coordination::Error::ZNONODE; res.active_replicas += static_cast(is_replica_active); res.replica_is_active.emplace(all_replicas[i], is_replica_active); } @@ -8869,6 +8877,20 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP getCommitPartOps(ops, new_data_part); + /// Increment lost_part_count + auto lost_part_count_path = fs::path(zookeeper_path) / "lost_part_count"; + Coordination::Stat lost_part_count_stat; + String lost_part_count_str; + if (zookeeper->tryGet(lost_part_count_path, lost_part_count_str, &lost_part_count_stat)) + { + UInt64 lost_part_count = lost_part_count_str.empty() ? 0 : parse(lost_part_count_str); + ops.emplace_back(zkutil::makeSetRequest(lost_part_count_path, toString(lost_part_count + 1), lost_part_count_stat.version)); + } + else + { + ops.emplace_back(zkutil::makeCreateRequest(lost_part_count_path, "1", zkutil::CreateMode::Persistent)); + } + Coordination::Responses responses; if (auto code = zookeeper->tryMulti(ops, responses); code == Coordination::Error::ZOK) { diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 8832d9d4983..f1e807a3313 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -60,6 +60,7 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) { "absolute_delay", std::make_shared() }, { "total_replicas", std::make_shared() }, { "active_replicas", std::make_shared() }, + { "lost_part_count", std::make_shared() }, { "last_queue_update_exception", std::make_shared() }, { "zookeeper_exception", std::make_shared() }, { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()) } @@ -113,6 +114,7 @@ Pipe StorageSystemReplicas::read( || column_name == "log_pointer" || column_name == "total_replicas" || column_name == "active_replicas" + || column_name == "lost_part_count" || column_name == "zookeeper_exception" || column_name == "replica_is_active") { @@ -213,6 +215,7 @@ Pipe StorageSystemReplicas::read( res_columns[col_num++]->insert(status.absolute_delay); res_columns[col_num++]->insert(status.total_replicas); res_columns[col_num++]->insert(status.active_replicas); + res_columns[col_num++]->insert(status.lost_part_count); res_columns[col_num++]->insert(status.last_queue_update_exception); res_columns[col_num++]->insert(status.zookeeper_exception); diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference index e69de29bb2d..6ed281c757a 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql index dc41b15118f..924798b0050 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql @@ -10,6 +10,7 @@ insert into rmt1 values (now(), rand()); drop table rmt1; system sync replica rmt2; +select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2'; drop table rmt2; @@ -21,6 +22,7 @@ insert into rmt1 values (now(), rand()); drop table rmt1; system sync replica rmt2; +select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2'; drop table rmt2; diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index bc7d3416065..e3a2ac07c66 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -867,6 +867,7 @@ CREATE TABLE system.replicas `absolute_delay` UInt64, `total_replicas` UInt8, `active_replicas` UInt8, + `lost_part_count` UInt64, `last_queue_update_exception` String, `zookeeper_exception` String, `replica_is_active` Map(String, UInt8) diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference index 60d3c78d740..53b44764d5c 100644 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference @@ -36,6 +36,8 @@ log log log_pointer log_pointer +lost_part_count +lost_part_count max_processed_insert_time max_processed_insert_time metadata diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference index c59be6a3af5..ccc3064ccbd 100644 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference @@ -17,6 +17,7 @@ leader_election leader_election-0 log log_pointer +lost_part_count max_processed_insert_time metadata metadata @@ -58,6 +59,7 @@ leader_election leader_election-0 log log_pointer +lost_part_count max_processed_insert_time metadata metadata