do not hardlink serialization.json in new part

This commit is contained in:
Sema Checherinda 2022-12-08 12:48:34 +01:00
parent d4cd53ccea
commit 46f96064ea
3 changed files with 45 additions and 0 deletions

View File

@ -488,6 +488,9 @@ static NameSet collectFilesToSkip(
{
NameSet files_to_skip = source_part->getFileNamesWithoutChecksums();
/// Do not hardlink this file because it's always rewritten at the end of mutation.
files_to_skip.insert(IMergeTreeDataPart::SERIALIZATION_FILE_NAME);
auto new_stream_counts = getStreamCounts(new_part, new_part->getColumns().getNames());
auto source_updated_stream_counts = getStreamCounts(source_part, updated_header.getNames());
auto new_updated_stream_counts = getStreamCounts(new_part, updated_header.getNames());

View File

@ -0,0 +1,5 @@
all_1_1_0 1
1 2000
all_1_1_0 1
all_1_1_0 1
all_1_1_0 1

View File

@ -0,0 +1,37 @@
SET mutations_sync = 1;
SET check_query_single_value_result = 0;
DROP TABLE IF EXISTS t_source_part_is_intact;
CREATE TABLE t_source_part_is_intact (id UInt64, u UInt64)
ENGINE = MergeTree ORDER BY id
SETTINGS min_bytes_for_wide_part=1, ratio_of_defaults_for_sparse_serialization = 0.5;
INSERT INTO t_source_part_is_intact SELECT
number,
if (number % 11 = 0, number, 0)
FROM numbers(2000);
CHECK TABLE t_source_part_is_intact;
SELECT 1, count() FROM t_source_part_is_intact;
BEGIN TRANSACTION;
-- size of the file serialization.json is the same in the new part but checksum is different
ALTER TABLE t_source_part_is_intact update u = 0 where u != 0;
ROLLBACK;
CHECK TABLE t_source_part_is_intact;
BEGIN TRANSACTION;
-- size of the file serialization.json is different in the new part
ALTER TABLE t_source_part_is_intact update u = 1 WHERE 1;
ROLLBACK;
CHECK TABLE t_source_part_is_intact;
DETACH TABLE t_source_part_is_intact;
ATTACH TABLE t_source_part_is_intact;
CHECK TABLE t_source_part_is_intact;
DROP TABLE t_source_part_is_intact;