diff --git a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp index 8f306f3f06a..80d363e0ec5 100644 --- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp @@ -83,6 +83,7 @@ WriteSpec getWriteSpec(const T & value) template UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { + static_assert(std::is_unsigned_v && std::is_signed_v, "T must be unsigned, while DeltaType must be signed integer type."); using UnsignedDeltaType = typename std::make_unsigned::type; if (source_size % sizeof(T) != 0) @@ -109,7 +110,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { const T curr_value = unalignedLoad(source); prev_delta = static_cast(curr_value - prev_value); - unalignedStore(dest, prev_delta); + unalignedStore(dest, prev_delta); source += sizeof(curr_value); dest += sizeof(prev_delta); @@ -123,8 +124,8 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { const T curr_value = unalignedLoad(source); - const auto delta = curr_value - prev_value; - const DeltaType double_delta = static_cast(delta - static_cast(prev_delta)); + const DeltaType delta = static_cast(curr_value - prev_value); + const DeltaType double_delta = delta - prev_delta; prev_delta = delta; prev_value = curr_value; @@ -153,6 +154,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) template void decompressDataForType(const char * source, UInt32 source_size, char * dest) { + static_assert(std::is_unsigned_v && std::is_signed_v, "T must be unsigned, while DeltaType must be signed integer type."); const char * source_end = source + source_size; const UInt32 items_count = unalignedLoad(source); @@ -173,7 +175,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_delta = unalignedLoad(source); - prev_value = static_cast(prev_value + prev_delta); + prev_value = prev_value + static_cast(prev_delta); unalignedStore(dest, prev_value); source += sizeof(prev_delta); @@ -208,11 +210,11 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) } // else if first bit is zero, no need to read more data. - const T curr_value = static_cast(prev_value + prev_delta + double_delta); + const T curr_value = prev_value + static_cast(prev_delta + double_delta); unalignedStore(dest, curr_value); dest += sizeof(curr_value); - prev_delta = curr_value - prev_value; + prev_delta = static_cast(curr_value - prev_value); prev_value = curr_value; } } diff --git a/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.reference b/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.reference index 5fb3bfb3629..eabb2f48147 100644 --- a/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.reference +++ b/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.reference @@ -8,3 +8,4 @@ I16 I8 DT D +Compression: diff --git a/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.sql b/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.sql index a5ae4766ece..93c6b0d749b 100644 --- a/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.sql +++ b/dbms/tests/queries/0_stateless/00950_test_double_delta_codec.sql @@ -29,23 +29,23 @@ CREATE TABLE codecTest ( -- checking for overflow INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueI64, valueI64) - VALUES (101, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807), (202, 0, 0, -9223372036854775808, -9223372036854775808), (203, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807); + VALUES (1, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807), (2, 0, 0, -9223372036854775808, -9223372036854775808), (3, 18446744073709551615, 18446744073709551615, 9223372036854775807, 9223372036854775807); -- n^3 covers all double delta storage cases, from small difference between neighbouref_values (stride) to big. INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD) SELECT number as n, n * n * n as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v) - FROM system.numbers LIMIT 101, 100; + FROM system.numbers LIMIT 101, 1000; -- best case - constant stride INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD) SELECT number as n, n as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v) - FROM system.numbers LIMIT 201, 100; + FROM system.numbers LIMIT 2001, 1000; -- worst case - random stride INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_valueU16, valueU16, ref_valueU8, valueU8, ref_valueI64, valueI64, ref_valueI32, valueI32, ref_valueI16, valueI16, ref_valueI8, valueI8, ref_valueDT, valueDT, ref_valueD, valueD) SELECT number as n, n + (rand64() - 9223372036854775807)/1000 as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v) - FROM system.numbers LIMIT 301, 100; + FROM system.numbers LIMIT 3001, 1000; SELECT 'U64'; @@ -147,5 +147,21 @@ WHERE dD != 0 LIMIT 10; +SELECT 'Compression:'; +SELECT + table, name, type, + compression_codec, + data_uncompressed_bytes u, + data_compressed_bytes c, + round(u/c,3) ratio +FROM system.columns +WHERE + table == 'codecTest' +AND + compression_codec != '' +AND + ratio <= 1 +ORDER BY + table, name, type; DROP TABLE IF EXISTS codecTest; \ No newline at end of file