From 631be9219bf67444411aceda7948c8a1336184fe Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Jul 2022 08:28:22 +0000 Subject: [PATCH 1/5] Properly remove projection from part in case it was removed from table metadata. --- .../MergeTree/DataPartStorageOnDisk.cpp | 41 +++++++++++++++++-- ...clear_projection_and_part_remove.reference | 0 ...01701_clear_projection_and_part_remove.sql | 19 +++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01701_clear_projection_and_part_remove.reference create mode 100644 tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 2cc28d020bb..91d24057f72 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -268,9 +268,10 @@ void DataPartStorageOnDisk::remove( // Record existing projection directories so we don't remove them twice std::unordered_set projection_directories; + std::string proj_suffix = ".proj"; for (const auto & projection : projections) { - std::string proj_dir_name = projection.name + ".proj"; + std::string proj_dir_name = projection.name + proj_suffix; projection_directories.emplace(proj_dir_name); clearDirectory( @@ -278,6 +279,40 @@ void DataPartStorageOnDisk::remove( can_remove_shared_data, names_not_to_remove, projection.checksums, {}, log, true); } + /// It is possible that we are removing the part which have a written but not loaded projection. + /// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed. + /// In this case, the old part will load only projections from metadata. + /// See test 01701_clear_projection_and_part. + for (const auto & [name, _] : checksums.files) + { + if (endsWith(name, proj_suffix) && !projection_directories.contains(name) && disk->isDirectory(fs::path(to) / name)) + { + + /// If we have a directory with suffix '.proj' it is likely a projection. + /// Try to load checksums for it (to avoid recusrive removing fallback). + std::string checksum_path = fs::path(to) / name / "checksums.txt"; + if (disk->exists(checksum_path)) + { + try + { + MergeTreeDataPartChecksums tmp_checksums; + auto in = disk->readFile(checksum_path, {}); + tmp_checksums.read(*in); + + projection_directories.emplace(name); + + clearDirectory( + fs::path(to) / name, + can_remove_shared_data, names_not_to_remove, tmp_checksums, {}, log, true); + } + catch (...) + { + LOG_ERROR(log, "Cannot load checksums from {}", checksum_path); + } + } + } + } + clearDirectory(to, can_remove_shared_data, names_not_to_remove, checksums, projection_directories, log, false); } @@ -343,8 +378,8 @@ void DataPartStorageOnDisk::clearDirectory( /// Recursive directory removal does many excessive "stat" syscalls under the hood. LOG_ERROR(log, "Cannot quickly remove directory {} by removing files; fallback to recursive removal. Reason: {}", fullPath(disk, dir), getCurrentExceptionMessage(false)); - - disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); + throw; + //disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); } } diff --git a/tests/queries/0_stateless/01701_clear_projection_and_part_remove.reference b/tests/queries/0_stateless/01701_clear_projection_and_part_remove.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql b/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql new file mode 100644 index 00000000000..e6f2142cac5 --- /dev/null +++ b/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql @@ -0,0 +1,19 @@ +drop table if exists tp_1; +-- In this test, we are going to create an old part with written projection which does not exist in table metadata +create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings old_parts_lifetime=1; +insert into tp_1 select number, number from numbers(3); +set mutations_sync = 2; +alter table tp_1 add projection pp (select x, count() group by x); +insert into tp_1 select number, number from numbers(4); +-- Here we have a part with written projection pp +alter table tp_1 detach partition '0'; +-- Move part to detached +alter table tp_1 clear projection pp; +-- Remove projection from table metadata +alter table tp_1 drop projection pp; +-- Now, we don't load projection pp for attached part, bit it is written on disk +alter table tp_1 attach partition '0'; +-- Make this part obsolete +optimize table tp_1 final; +-- Now, DROP TABLE triggers part removal +drop table tp_1; From 03d93014255a1d630d5fe6d77daa04584b031045 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Jul 2022 08:31:09 +0000 Subject: [PATCH 2/5] Fix log. --- src/Storages/MergeTree/DataPartStorageOnDisk.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 91d24057f72..5df90835ee3 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -378,8 +378,7 @@ void DataPartStorageOnDisk::clearDirectory( /// Recursive directory removal does many excessive "stat" syscalls under the hood. LOG_ERROR(log, "Cannot quickly remove directory {} by removing files; fallback to recursive removal. Reason: {}", fullPath(disk, dir), getCurrentExceptionMessage(false)); - throw; - //disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); + disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove); } } From cd4211298e37ddd11b4c98a6c4cfae4916745343 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Jul 2022 09:37:00 +0000 Subject: [PATCH 3/5] Fix typo. --- src/Storages/MergeTree/DataPartStorageOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 5df90835ee3..5b9b3eec998 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -289,7 +289,7 @@ void DataPartStorageOnDisk::remove( { /// If we have a directory with suffix '.proj' it is likely a projection. - /// Try to load checksums for it (to avoid recusrive removing fallback). + /// Try to load checksums for it (to avoid recursive removing fallback). std::string checksum_path = fs::path(to) / name / "checksums.txt"; if (disk->exists(checksum_path)) { From 948f15eff317a0ba0462003ad3fc94422a891c44 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 12 Jul 2022 14:01:53 +0300 Subject: [PATCH 4/5] Update run.sh --- docker/test/stress/run.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index b2c6036ba31..fcf99b34064 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -352,7 +352,6 @@ else mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log # Error messages (we should ignore some errors) - # FIXME https://github.com/ClickHouse/ClickHouse/issues/38629 ("pp.proj, errno: 21") # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ @@ -376,7 +375,6 @@ else -e "and a merge is impossible: we didn't find" \ -e "found in queue and some source parts for it was lost" \ -e "is lost forever." \ - -e "pp.proj, errno: 21" \ -e "Unknown index: idx." \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ From 582300e08c1c67203bea13636e7bf1fafaf3e428 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Jul 2022 16:22:47 +0200 Subject: [PATCH 5/5] Update 01701_clear_projection_and_part_remove.sql Fix typo --- .../0_stateless/01701_clear_projection_and_part_remove.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql b/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql index e6f2142cac5..e6cc4cbdb10 100644 --- a/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql +++ b/tests/queries/0_stateless/01701_clear_projection_and_part_remove.sql @@ -11,7 +11,7 @@ alter table tp_1 detach partition '0'; alter table tp_1 clear projection pp; -- Remove projection from table metadata alter table tp_1 drop projection pp; --- Now, we don't load projection pp for attached part, bit it is written on disk +-- Now, we don't load projection pp for attached part, but it is written on disk alter table tp_1 attach partition '0'; -- Make this part obsolete optimize table tp_1 final;