mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-12 09:22:05 +00:00
improve replica recovery
This commit is contained in:
parent
766107df0a
commit
bb78bf1c70
@ -2407,6 +2407,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
|
|||||||
std::vector<QueueEntryInfo> source_queue;
|
std::vector<QueueEntryInfo> source_queue;
|
||||||
ActiveDataPartSet get_part_set{format_version};
|
ActiveDataPartSet get_part_set{format_version};
|
||||||
ActiveDataPartSet drop_range_set{format_version};
|
ActiveDataPartSet drop_range_set{format_version};
|
||||||
|
std::unordered_set<String> exact_part_names;
|
||||||
|
|
||||||
{
|
{
|
||||||
std::vector<zkutil::ZooKeeper::FutureGet> queue_get_futures;
|
std::vector<zkutil::ZooKeeper::FutureGet> queue_get_futures;
|
||||||
@ -2444,14 +2445,22 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
|
|||||||
info.parsed_entry->znode_name = source_queue_names[i];
|
info.parsed_entry->znode_name = source_queue_names[i];
|
||||||
|
|
||||||
if (info.parsed_entry->type == LogEntry::DROP_RANGE)
|
if (info.parsed_entry->type == LogEntry::DROP_RANGE)
|
||||||
|
{
|
||||||
drop_range_set.add(info.parsed_entry->new_part_name);
|
drop_range_set.add(info.parsed_entry->new_part_name);
|
||||||
|
}
|
||||||
if (info.parsed_entry->type == LogEntry::GET_PART)
|
else if (info.parsed_entry->type == LogEntry::GET_PART)
|
||||||
{
|
{
|
||||||
String maybe_covering_drop_range = drop_range_set.getContainingPart(info.parsed_entry->new_part_name);
|
String maybe_covering_drop_range = drop_range_set.getContainingPart(info.parsed_entry->new_part_name);
|
||||||
if (maybe_covering_drop_range.empty())
|
if (maybe_covering_drop_range.empty())
|
||||||
get_part_set.add(info.parsed_entry->new_part_name);
|
get_part_set.add(info.parsed_entry->new_part_name);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// We should keep local parts if they present in the queue of source replica.
|
||||||
|
/// There's a chance that we are the only replica that has these parts.
|
||||||
|
Strings entry_virtual_parts = info.parsed_entry->getVirtualPartNames(format_version);
|
||||||
|
std::move(entry_virtual_parts.begin(), entry_virtual_parts.end(), std::inserter(exact_part_names, exact_part_names.end()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2471,12 +2480,18 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
|
|||||||
|
|
||||||
for (const auto & part : local_parts_in_zk)
|
for (const auto & part : local_parts_in_zk)
|
||||||
{
|
{
|
||||||
if (get_part_set.getContainingPart(part).empty())
|
/// We look for exact match (and not for any covering part)
|
||||||
{
|
/// because our part might be dropped and covering part might be merged though gap.
|
||||||
|
/// (avoid resurrection of data that was removed a long time ago)
|
||||||
|
if (get_part_set.getContainingPart(part) == part)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (exact_part_names.contains(part))
|
||||||
|
continue;
|
||||||
|
|
||||||
parts_to_remove_from_zk.emplace_back(part);
|
parts_to_remove_from_zk.emplace_back(part);
|
||||||
LOG_WARNING(log, "Source replica does not have part {}. Removing it from ZooKeeper.", part);
|
LOG_WARNING(log, "Source replica does not have part {}. Removing it from ZooKeeper.", part);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
{
|
{
|
||||||
/// Check "is_lost" version after retrieving queue and parts.
|
/// Check "is_lost" version after retrieving queue and parts.
|
||||||
@ -2497,12 +2512,15 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
|
|||||||
|
|
||||||
for (const auto & part : local_active_parts)
|
for (const auto & part : local_active_parts)
|
||||||
{
|
{
|
||||||
if (get_part_set.getContainingPart(part->name).empty())
|
if (get_part_set.getContainingPart(part->name) == part->name)
|
||||||
{
|
continue;
|
||||||
|
|
||||||
|
if (exact_part_names.contains(part->name))
|
||||||
|
continue;
|
||||||
|
|
||||||
parts_to_remove_from_working_set.emplace_back(part);
|
parts_to_remove_from_working_set.emplace_back(part);
|
||||||
LOG_WARNING(log, "Source replica does not have part {}. Removing it from working set.", part->name);
|
LOG_WARNING(log, "Source replica does not have part {}. Removing it from working set.", part->name);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (getSettings()->detach_old_local_parts_when_cloning_replica)
|
if (getSettings()->detach_old_local_parts_when_cloning_replica)
|
||||||
{
|
{
|
||||||
|
@ -0,0 +1,13 @@
|
|||||||
|
1 [2,3,4,5]
|
||||||
|
2 [1,2,3,4,5]
|
||||||
|
3 [1,2,3,4,5]
|
||||||
|
4 [3,4,5]
|
||||||
|
5 [1,2,3,4,5]
|
||||||
|
6 [1,2,3,4,5]
|
||||||
|
7 [1,2,3,4,5,20,30,40,50]
|
||||||
|
8 [1,2,3,4,5,10,20,30,40,50]
|
||||||
|
9 [1,2,3,4,5,10,20,30,40,50]
|
||||||
|
['all_18_23_1','all_7_17_2_13']
|
||||||
|
10 [1,2,3,4,5,10,20,30,40,50]
|
||||||
|
11 [1,2,3,4,5,10,20,30,40,50,100,300,400,500,600]
|
||||||
|
12 [1,2,3,4,5,10,20,30,40,50,100,300,400,500,600]
|
147
tests/queries/0_stateless/02448_clone_replica_lost_part.sql
Normal file
147
tests/queries/0_stateless/02448_clone_replica_lost_part.sql
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
|
||||||
|
drop table if exists rmt1;
|
||||||
|
drop table if exists rmt2;
|
||||||
|
create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '1') order by tuple()
|
||||||
|
settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, old_parts_lifetime=0, max_parts_to_merge_at_once=5;
|
||||||
|
create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '2') order by tuple()
|
||||||
|
settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, old_parts_lifetime=0, max_parts_to_merge_at_once=5;
|
||||||
|
|
||||||
|
-- insert part only on one replica
|
||||||
|
system stop replicated sends rmt1;
|
||||||
|
insert into rmt1 values (1);
|
||||||
|
detach table rmt1; -- make replica inactive
|
||||||
|
system start replicated sends rmt1;
|
||||||
|
|
||||||
|
-- trigger log rotation, rmt1 will be lost
|
||||||
|
insert into rmt2 values (2);
|
||||||
|
insert into rmt2 values (3);
|
||||||
|
insert into rmt2 values (4);
|
||||||
|
insert into rmt2 values (5);
|
||||||
|
-- check that entry was not removed from the queue (part is not lost)
|
||||||
|
set receive_timeout=5;
|
||||||
|
system sync replica rmt2; -- {serverError TIMEOUT_EXCEEDED}
|
||||||
|
set receive_timeout=300;
|
||||||
|
|
||||||
|
select 1, arraySort(groupArray(n)) from rmt2;
|
||||||
|
|
||||||
|
-- rmt1 will mimic rmt2
|
||||||
|
attach table rmt1;
|
||||||
|
system sync replica rmt1;
|
||||||
|
system sync replica rmt2;
|
||||||
|
|
||||||
|
-- check that no parts are lost
|
||||||
|
select 2, arraySort(groupArray(n)) from rmt1;
|
||||||
|
select 3, arraySort(groupArray(n)) from rmt2;
|
||||||
|
|
||||||
|
|
||||||
|
truncate table rmt1;
|
||||||
|
truncate table rmt2;
|
||||||
|
|
||||||
|
|
||||||
|
-- insert parts only on one replica and merge them
|
||||||
|
system stop replicated sends rmt2;
|
||||||
|
insert into rmt2 values (1);
|
||||||
|
insert into rmt2 values (2);
|
||||||
|
system sync replica rmt2;
|
||||||
|
optimize table rmt2 final;
|
||||||
|
system sync replica rmt2;
|
||||||
|
-- give it a chance to remove source parts
|
||||||
|
select sleep(2) format Null; -- increases probability of reproducing the issue
|
||||||
|
detach table rmt2;
|
||||||
|
system start replicated sends rmt2;
|
||||||
|
|
||||||
|
|
||||||
|
-- trigger log rotation, rmt2 will be lost
|
||||||
|
insert into rmt1 values (3);
|
||||||
|
insert into rmt1 values (4);
|
||||||
|
insert into rmt1 values (5);
|
||||||
|
set receive_timeout=5;
|
||||||
|
-- check that entry was not removed from the queue (part is not lost)
|
||||||
|
system sync replica rmt1; -- {serverError TIMEOUT_EXCEEDED}
|
||||||
|
set receive_timeout=300;
|
||||||
|
|
||||||
|
select 4, arraySort(groupArray(n)) from rmt1;
|
||||||
|
|
||||||
|
-- rmt1 will mimic rmt2
|
||||||
|
system stop fetches rmt1;
|
||||||
|
attach table rmt2;
|
||||||
|
system sync replica rmt2;
|
||||||
|
-- give rmt2 a chance to remove merged part (but it should not do it)
|
||||||
|
select sleep(2) format Null; -- increases probability of reproducing the issue
|
||||||
|
system start fetches rmt1;
|
||||||
|
system sync replica rmt1;
|
||||||
|
|
||||||
|
-- check that no parts are lost
|
||||||
|
select 5, arraySort(groupArray(n)) from rmt1;
|
||||||
|
select 6, arraySort(groupArray(n)) from rmt2;
|
||||||
|
|
||||||
|
|
||||||
|
-- insert part only on one replica
|
||||||
|
system stop replicated sends rmt1;
|
||||||
|
insert into rmt1 values (123);
|
||||||
|
alter table rmt1 update n=10 where n=123 settings mutations_sync=1;
|
||||||
|
-- give it a chance to remove source part
|
||||||
|
select sleep(2) format Null; -- increases probability of reproducing the issue
|
||||||
|
detach table rmt1; -- make replica inactive
|
||||||
|
system start replicated sends rmt1;
|
||||||
|
|
||||||
|
-- trigger log rotation, rmt1 will be lost
|
||||||
|
insert into rmt2 values (20);
|
||||||
|
insert into rmt2 values (30);
|
||||||
|
insert into rmt2 values (40);
|
||||||
|
insert into rmt2 values (50);
|
||||||
|
-- check that entry was not removed from the queue (part is not lost)
|
||||||
|
set receive_timeout=5;
|
||||||
|
system sync replica rmt2; -- {serverError TIMEOUT_EXCEEDED}
|
||||||
|
set receive_timeout=300;
|
||||||
|
|
||||||
|
select 7, arraySort(groupArray(n)) from rmt2;
|
||||||
|
|
||||||
|
-- rmt1 will mimic rmt2
|
||||||
|
system stop fetches rmt2;
|
||||||
|
attach table rmt1;
|
||||||
|
system sync replica rmt1;
|
||||||
|
-- give rmt1 a chance to remove mutated part (but it should not do it)
|
||||||
|
select sleep(2) format Null; -- increases probability of reproducing the issue
|
||||||
|
system start fetches rmt2;
|
||||||
|
system sync replica rmt2;
|
||||||
|
|
||||||
|
-- check that no parts are lost
|
||||||
|
select 8, arraySort(groupArray(n)) from rmt1;
|
||||||
|
select 9, arraySort(groupArray(n)) from rmt2;
|
||||||
|
|
||||||
|
-- avoid arbitrary merges after inserting
|
||||||
|
optimize table rmt2 final;
|
||||||
|
-- insert parts (all_18_18_0, all_19_19_0) on both replicas (will be deduplicated, but it does not matter)
|
||||||
|
insert into rmt1 values (100);
|
||||||
|
insert into rmt2 values (100);
|
||||||
|
insert into rmt1 values (200);
|
||||||
|
insert into rmt2 values (200);
|
||||||
|
detach table rmt1;
|
||||||
|
|
||||||
|
-- create a gap in block numbers buy dropping part
|
||||||
|
insert into rmt2 values (300);
|
||||||
|
alter table rmt2 drop part 'all_19_19_0';
|
||||||
|
insert into rmt2 values (400);
|
||||||
|
insert into rmt2 values (500);
|
||||||
|
insert into rmt2 values (600);
|
||||||
|
system sync replica rmt2;
|
||||||
|
-- merge through gap
|
||||||
|
optimize table rmt2;
|
||||||
|
select arraySort(groupArrayDistinct(_part)) from rmt2;
|
||||||
|
-- give it a chance to cleanup log
|
||||||
|
select sleep(2) format Null; -- increases probability of reproducing the issue
|
||||||
|
|
||||||
|
-- rmt1 will mimic rmt2, but will not be able to fetch parts for a while
|
||||||
|
system stop replicated sends rmt2;
|
||||||
|
attach table rmt1;
|
||||||
|
-- rmt1 should not show the value (100) from dropped part
|
||||||
|
select 10, arraySort(groupArray(n)) from rmt1;
|
||||||
|
select 11, arraySort(groupArray(n)) from rmt2;
|
||||||
|
|
||||||
|
system start replicated sends rmt2;
|
||||||
|
system sync replica rmt1;
|
||||||
|
select 12, arraySort(groupArray(n)) from rmt1;
|
||||||
|
|
||||||
|
drop table rmt1;
|
||||||
|
drop table rmt2;
|
Loading…
Reference in New Issue
Block a user