From c17fa34fa5de8ba01fad45ec059c61be9b125d4d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Apr 2020 20:00:29 +0300 Subject: [PATCH] Fix bug with uncompressed checksums in CHECK TABLE query --- src/Storages/MergeTree/checkDataPart.cpp | 35 ++++++++++++------- .../01112_check_table_with_index.reference | 1 + .../01112_check_table_with_index.sql | 15 ++++++++ 3 files changed, 38 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01112_check_table_with_index.reference create mode 100644 tests/queries/0_stateless/01112_check_table_with_index.sql diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 6da051d04ac..03728980c69 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -99,19 +99,6 @@ IMergeTreeDataPart::Checksums checkDataPart( throw Exception("Unknown type in part " + path, ErrorCodes::UNKNOWN_PART_TYPE); } - for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - { - const String & file_name = it->name(); - auto checksum_it = checksums_data.files.find(file_name); - if (checksum_it == checksums_data.files.end() && file_name != "checksums.txt" && file_name != "columns.txt") - { - auto file_buf = disk->readFile(it->path()); - HashingReadBuffer hashing_buf(*file_buf); - hashing_buf.tryIgnore(std::numeric_limits::max()); - checksums_data.files[file_name] = IMergeTreeDataPart::Checksums::Checksum(hashing_buf.count(), hashing_buf.getHash()); - } - } - /// Checksums from file checksums.txt. May be absent. If present, they are subsequently compared with the actual data checksums. IMergeTreeDataPart::Checksums checksums_txt; @@ -122,6 +109,28 @@ IMergeTreeDataPart::Checksums checkDataPart( assertEOF(*buf); } + const auto & checksum_files_txt = checksums_txt.files; + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) + { + const String & file_name = it->name(); + auto checksum_it = checksums_data.files.find(file_name); + if (checksum_it == checksums_data.files.end() && file_name != "checksums.txt" && file_name != "columns.txt") + { + auto txt_checksum_it = checksum_files_txt.find(file_name); + if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0) + { + auto file_buf = disk->readFile(it->path()); + HashingReadBuffer hashing_buf(*file_buf); + hashing_buf.tryIgnore(std::numeric_limits::max()); + checksums_data.files[file_name] = IMergeTreeDataPart::Checksums::Checksum(hashing_buf.count(), hashing_buf.getHash()); + } + else /// If we have both compressed and uncompressed in txt, than calculate them + { + checksums_data.files[file_name] = checksum_compressed_file(disk, it->path()); + } + } + } + if (is_cancelled()) return {}; diff --git a/tests/queries/0_stateless/01112_check_table_with_index.reference b/tests/queries/0_stateless/01112_check_table_with_index.reference new file mode 100644 index 00000000000..2027ea099a8 --- /dev/null +++ b/tests/queries/0_stateless/01112_check_table_with_index.reference @@ -0,0 +1 @@ +all_1_1_0 1 diff --git a/tests/queries/0_stateless/01112_check_table_with_index.sql b/tests/queries/0_stateless/01112_check_table_with_index.sql new file mode 100644 index 00000000000..e9613df7d1a --- /dev/null +++ b/tests/queries/0_stateless/01112_check_table_with_index.sql @@ -0,0 +1,15 @@ +SET check_query_single_value_result = 'false'; + +DROP TABLE IF EXISTS check_table_with_indices; + +CREATE TABLE check_table_with_indices ( + id UInt64, + data String, + INDEX a (id) type minmax GRANULARITY 3 +) ENGINE = MergeTree() ORDER BY id; + +INSERT INTO check_table_with_indices VALUES (0, 'test'), (1, 'test2'); + +CHECK TABLE check_table_with_indices; + +DROP TABLE check_table_with_indices;