From ba4f5f9b3fe74d44295633995033c9d484b8a9c6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 6 May 2024 19:46:57 +0200 Subject: [PATCH] Fix backup of parts with projections but without projections in metadata --- src/Storages/MergeTree/MergeTreeData.cpp | 42 +++++++++++++--- ...145_non_loaded_projection_backup.reference | 7 +++ .../03145_non_loaded_projection_backup.sh | 49 +++++++++++++++++++ 3 files changed, 92 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03145_non_loaded_projection_backup.reference create mode 100755 tests/queries/0_stateless/03145_non_loaded_projection_backup.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9350b24c96a..fbe79a32b8e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5361,20 +5361,50 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( &temp_dirs, false, false); - auto projection_parts = part->getProjectionParts(); - for (const auto & [projection_name, projection_part] : projection_parts) + auto backup_projection = [&](IDataPartStorage & storage, IMergeTreeDataPart & projection_part) { - projection_part->getDataPartStorage().backup( - projection_part->checksums, - projection_part->getFileNamesWithoutChecksums(), + storage.backup( + projection_part.checksums, + projection_part.getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, backup_settings, read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs, - projection_part->is_broken, + projection_part.is_broken, backup_settings.allow_backup_broken_projections); + }; + + auto projection_parts = part->getProjectionParts(); + std::string proj_suffix = ".proj"; + std::unordered_set defined_projections; + + for (const auto & [projection_name, projection_part] : projection_parts) + { + defined_projections.emplace(projection_name); + backup_projection(projection_part->getDataPartStorage(), *projection_part); + } + + /// It is possible that the part has a written but not loaded projection, + /// e.g. it is written to parent part's checksums.txt and exists on disk, + /// but does not exist in table's projections definition. + /// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed. + /// In this case, the old part will load only projections from metadata. + /// See 031145_non_loaded_projection_backup.sh. + for (const auto & [name, _] : part->checksums.files) + { + auto projection_name = fs::path(name).stem().string(); + if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name)) + { + auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix); + if (projection_storage->exists("checksums.txt")) + { + auto projection_part = const_cast(*part).getProjectionPartBuilder( + projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build(); + backup_projection(projection_part->getDataPartStorage(), *projection_part); + } + } } if (hold_storage_and_part_ptrs) diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.reference b/tests/queries/0_stateless/03145_non_loaded_projection_backup.reference new file mode 100644 index 00000000000..a11ee210e62 --- /dev/null +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.reference @@ -0,0 +1,7 @@ +7 +Found unexpected projection directories: pp.proj +BACKUP_CREATED +RESTORED +7 +Found unexpected projection directories: pp.proj +0 diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh new file mode 100755 index 00000000000..721ed784fc2 --- /dev/null +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists tp_1; +create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100); +insert into tp_1 select number, number from numbers(3); + +set mutations_sync = 2; + +alter table tp_1 add projection pp (select x, count() group by x); +insert into tp_1 select number, number from numbers(4); +select count() from tp_1; + +-- Here we have a part with written projection pp +alter table tp_1 detach partition '0'; +-- Move part to detached +alter table tp_1 clear projection pp; +-- Remove projection from table metadata +alter table tp_1 drop projection pp; +-- Now, we don't load projection pp for attached part, but it is written on disk +alter table tp_1 attach partition '0'; +" + +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" + +backup_id="$CLICKHOUSE_TEST_UNIQUE_NAME" +$CLICKHOUSE_CLIENT -q " +backup table tp_1 to Disk('backups', '$backup_id'); +" | grep -o "BACKUP_CREATED" + +$CLICKHOUSE_CLIENT -nm -q " +drop table tp_1; +restore table tp_1 from Disk('backups', '$backup_id'); +" | grep -o "RESTORED" + +$CLICKHOUSE_CLIENT -q "select count() from tp_1;" +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" +$CLICKHOUSE_CLIENT -nm -q " +set send_logs_level='fatal'; +check table tp_1" +$CLICKHOUSE_CLIENT -q "drop table tp_1 sync"