Fix backup of parts with projections but without projections in metadata

This commit is contained in:
kssenii 2024-05-06 19:46:57 +02:00
parent a65e208892
commit ba4f5f9b3f
3 changed files with 92 additions and 6 deletions

View File

@ -5361,20 +5361,50 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
&temp_dirs,
false, false);
auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts)
auto backup_projection = [&](IDataPartStorage & storage, IMergeTreeDataPart & projection_part)
{
projection_part->getDataPartStorage().backup(
projection_part->checksums,
projection_part->getFileNamesWithoutChecksums(),
storage.backup(
projection_part.checksums,
projection_part.getFileNamesWithoutChecksums(),
fs::path{data_path_in_backup} / part->name,
backup_settings,
read_settings,
make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs,
projection_part->is_broken,
projection_part.is_broken,
backup_settings.allow_backup_broken_projections);
};
auto projection_parts = part->getProjectionParts();
std::string proj_suffix = ".proj";
std::unordered_set<String> defined_projections;
for (const auto & [projection_name, projection_part] : projection_parts)
{
defined_projections.emplace(projection_name);
backup_projection(projection_part->getDataPartStorage(), *projection_part);
}
/// It is possible that the part has a written but not loaded projection,
/// e.g. it is written to parent part's checksums.txt and exists on disk,
/// but does not exist in table's projections definition.
/// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed.
/// In this case, the old part will load only projections from metadata.
/// See 031145_non_loaded_projection_backup.sh.
for (const auto & [name, _] : part->checksums.files)
{
auto projection_name = fs::path(name).stem().string();
if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name))
{
auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix);
if (projection_storage->exists("checksums.txt"))
{
auto projection_part = const_cast<IMergeTreeDataPart &>(*part).getProjectionPartBuilder(
projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build();
backup_projection(projection_part->getDataPartStorage(), *projection_part);
}
}
}
if (hold_storage_and_part_ptrs)

View File

@ -0,0 +1,7 @@
7
Found unexpected projection directories: pp.proj
BACKUP_CREATED
RESTORED
7
Found unexpected projection directories: pp.proj
0

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -nm -q "
drop table if exists tp_1;
create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100);
insert into tp_1 select number, number from numbers(3);
set mutations_sync = 2;
alter table tp_1 add projection pp (select x, count() group by x);
insert into tp_1 select number, number from numbers(4);
select count() from tp_1;
-- Here we have a part with written projection pp
alter table tp_1 detach partition '0';
-- Move part to detached
alter table tp_1 clear projection pp;
-- Remove projection from table metadata
alter table tp_1 drop projection pp;
-- Now, we don't load projection pp for attached part, but it is written on disk
alter table tp_1 attach partition '0';
"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"
backup_id="$CLICKHOUSE_TEST_UNIQUE_NAME"
$CLICKHOUSE_CLIENT -q "
backup table tp_1 to Disk('backups', '$backup_id');
" | grep -o "BACKUP_CREATED"
$CLICKHOUSE_CLIENT -nm -q "
drop table tp_1;
restore table tp_1 from Disk('backups', '$backup_id');
" | grep -o "RESTORED"
$CLICKHOUSE_CLIENT -q "select count() from tp_1;"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1"
$CLICKHOUSE_CLIENT -q "drop table tp_1 sync"