Merge pull request #39119 from ClickHouse/removed-projection-and-old-part

Properly remove projection from part in case it was removed from table metadata.
This commit is contained in:
Nikolai Kochetov 2022-07-12 16:24:30 +02:00 committed by GitHub
commit 8efbe6d44d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 4 deletions

View File

@ -352,7 +352,6 @@ else
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38629 ("pp.proj, errno: 21")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
echo "Check for Error messages in server log:"
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
@ -376,7 +375,6 @@ else
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "pp.proj, errno: 21" \
-e "Unknown index: idx." \
/var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \

View File

@ -268,9 +268,10 @@ void DataPartStorageOnDisk::remove(
// Record existing projection directories so we don't remove them twice
std::unordered_set<String> projection_directories;
std::string proj_suffix = ".proj";
for (const auto & projection : projections)
{
std::string proj_dir_name = projection.name + ".proj";
std::string proj_dir_name = projection.name + proj_suffix;
projection_directories.emplace(proj_dir_name);
clearDirectory(
@ -278,6 +279,40 @@ void DataPartStorageOnDisk::remove(
can_remove_shared_data, names_not_to_remove, projection.checksums, {}, log, true);
}
/// It is possible that we are removing the part which have a written but not loaded projection.
/// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed.
/// In this case, the old part will load only projections from metadata.
/// See test 01701_clear_projection_and_part.
for (const auto & [name, _] : checksums.files)
{
if (endsWith(name, proj_suffix) && !projection_directories.contains(name) && disk->isDirectory(fs::path(to) / name))
{
/// If we have a directory with suffix '.proj' it is likely a projection.
/// Try to load checksums for it (to avoid recursive removing fallback).
std::string checksum_path = fs::path(to) / name / "checksums.txt";
if (disk->exists(checksum_path))
{
try
{
MergeTreeDataPartChecksums tmp_checksums;
auto in = disk->readFile(checksum_path, {});
tmp_checksums.read(*in);
projection_directories.emplace(name);
clearDirectory(
fs::path(to) / name,
can_remove_shared_data, names_not_to_remove, tmp_checksums, {}, log, true);
}
catch (...)
{
LOG_ERROR(log, "Cannot load checksums from {}", checksum_path);
}
}
}
}
clearDirectory(to, can_remove_shared_data, names_not_to_remove, checksums, projection_directories, log, false);
}
@ -343,7 +378,6 @@ void DataPartStorageOnDisk::clearDirectory(
/// Recursive directory removal does many excessive "stat" syscalls under the hood.
LOG_ERROR(log, "Cannot quickly remove directory {} by removing files; fallback to recursive removal. Reason: {}", fullPath(disk, dir), getCurrentExceptionMessage(false));
disk->removeSharedRecursive(fs::path(dir) / "", !can_remove_shared_data, names_not_to_remove);
}
}

View File

@ -0,0 +1,19 @@
drop table if exists tp_1;
-- In this test, we are going to create an old part with written projection which does not exist in table metadata
create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings old_parts_lifetime=1;
insert into tp_1 select number, number from numbers(3);
set mutations_sync = 2;
alter table tp_1 add projection pp (select x, count() group by x);
insert into tp_1 select number, number from numbers(4);
-- Here we have a part with written projection pp
alter table tp_1 detach partition '0';
-- Move part to detached
alter table tp_1 clear projection pp;
-- Remove projection from table metadata
alter table tp_1 drop projection pp;
-- Now, we don't load projection pp for attached part, but it is written on disk
alter table tp_1 attach partition '0';
-- Make this part obsolete
optimize table tp_1 final;
-- Now, DROP TABLE triggers part removal
drop table tp_1;