From 3711430d9f97401d80dff14eaab68008bb03132c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 15 Jun 2023 16:14:56 +0200 Subject: [PATCH 01/13] Rename member fields of CityHash_v1_0_2::uint128: "first" -> "low64", "second" -> "high64". --- base/base/wide_integer_impl.h | 16 ++++++++++++++++ contrib/cityhash102/include/city.h | 19 ++++++++++++++++--- src/Compression/CompressedReadBufferBase.cpp | 16 ++++++++-------- src/Compression/CompressedWriteBuffer.cpp | 8 ++++---- .../DistributedAsyncInsertHeader.cpp | 4 ++-- .../MergeTree/DataPartStorageOnDiskBase.cpp | 2 +- .../MergeTree/MergeTreeDataPartChecksum.cpp | 8 ++++---- .../PartMetadataManagerWithCache.cpp | 8 ++++---- src/Storages/System/StorageSystemParts.cpp | 6 +++--- .../System/StorageSystemProjectionParts.cpp | 6 +++--- 10 files changed, 61 insertions(+), 32 deletions(-) diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 4a80c176829..dc6a49694ae 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -15,6 +15,8 @@ #include #include +#include + // NOLINTBEGIN(*) /// Use same extended double for all platforms @@ -281,6 +283,14 @@ struct integer::_impl } } + constexpr static void wide_integer_from_cityhash_uint128(integer & self, const CityHash_v1_0_2::uint128 & value) noexcept + { + if constexpr (std::endian::native == std::endian::little) + wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64)); + else + wide_integer_from_tuple_like(self, std::make_pair(value.high64, value.low64)); + } + /** * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310 * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them. @@ -1036,6 +1046,8 @@ constexpr integer::integer(T rhs) noexcept _impl::wide_integer_from_wide_integer(*this, rhs); else if constexpr (IsTupleLike::value) _impl::wide_integer_from_tuple_like(*this, rhs); + else if constexpr (std::is_same_v, CityHash_v1_0_2::uint128>) + _impl::wide_integer_from_cityhash_uint128(*this, rhs); else _impl::wide_integer_from_builtin(*this, rhs); } @@ -1051,6 +1063,8 @@ constexpr integer::integer(std::initializer_list il) noexcept _impl::wide_integer_from_wide_integer(*this, *il.begin()); else if constexpr (IsTupleLike::value) _impl::wide_integer_from_tuple_like(*this, *il.begin()); + else if constexpr (std::is_same_v, CityHash_v1_0_2::uint128>) + _impl::wide_integer_from_cityhash_uint128(*this, *il.begin()); else _impl::wide_integer_from_builtin(*this, *il.begin()); } @@ -1088,6 +1102,8 @@ constexpr integer & integer::operator=(T rhs) noexce { if constexpr (IsTupleLike::value) _impl::wide_integer_from_tuple_like(*this, rhs); + else if constexpr (std::is_same_v, CityHash_v1_0_2::uint128>) + _impl::wide_integer_from_cityhash_uint128(*this, rhs); else _impl::wide_integer_from_builtin(*this, rhs); return *this; diff --git a/contrib/cityhash102/include/city.h b/contrib/cityhash102/include/city.h index 77d4c988cdd..87363d16444 100644 --- a/contrib/cityhash102/include/city.h +++ b/contrib/cityhash102/include/city.h @@ -61,11 +61,24 @@ namespace CityHash_v1_0_2 typedef uint8_t uint8; typedef uint32_t uint32; typedef uint64_t uint64; -typedef std::pair uint128; +/// Represent an unsigned integer of 128 bits as it's used in CityHash. +/// Originally CityHash used `std::pair` instead of this struct, +/// however the members `first` and `second` could be easily confused so they were renamed to `low64` and `high64`: +/// `first` -> `low64`, `second` -> `high64`. +struct uint128 +{ + uint64 low64 = 0; + uint64 high64 = 0; -inline uint64 Uint128Low64(const uint128& x) { return x.first; } -inline uint64 Uint128High64(const uint128& x) { return x.second; } + uint128() = default; + uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {} + friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); } + friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); } +}; + +inline uint64 Uint128Low64(const uint128 & x) { return x.low64; } +inline uint64 Uint128High64(const uint128 & x) { return x.high64; } // Hash function for a byte array. uint64 CityHash64(const char *buf, size_t len); diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 278210d770a..662cd6bf337 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c /// TODO mess up of endianness in error message. message << "Checksum doesn't match: corrupted data." - " Reference: " + getHexUIntLowercase(expected_checksum.first) + getHexUIntLowercase(expected_checksum.second) - + ". Actual: " + getHexUIntLowercase(calculated_checksum.first) + getHexUIntLowercase(calculated_checksum.second) + " Reference: " + getHexUIntLowercase(expected_checksum.low64) + getHexUIntLowercase(expected_checksum.high64) + + ". Actual: " + getHexUIntLowercase(calculated_checksum.low64) + getHexUIntLowercase(calculated_checksum.high64) + ". Size of compressed block: " + toString(size); const char * message_hardware_failure = "This is most likely due to hardware failure. " @@ -95,8 +95,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c } /// Check if the difference caused by single bit flip in stored checksum. - size_t difference = std::popcount(expected_checksum.first ^ calculated_checksum.first) - + std::popcount(expected_checksum.second ^ calculated_checksum.second); + size_t difference = std::popcount(expected_checksum.low64 ^ calculated_checksum.low64) + + std::popcount(expected_checksum.high64 ^ calculated_checksum.high64); if (difference == 1) { @@ -194,8 +194,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, { Checksum checksum; ReadBufferFromMemory checksum_in(own_compressed_buffer.data(), sizeof(checksum)); - readBinaryLittleEndian(checksum.first, checksum_in); - readBinaryLittleEndian(checksum.second, checksum_in); + readBinaryLittleEndian(checksum.low64, checksum_in); + readBinaryLittleEndian(checksum.high64, checksum_in); validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum); } @@ -238,8 +238,8 @@ size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t & { Checksum checksum; ReadBufferFromMemory checksum_in(own_compressed_buffer.data(), sizeof(checksum)); - readBinaryLittleEndian(checksum.first, checksum_in); - readBinaryLittleEndian(checksum.second, checksum_in); + readBinaryLittleEndian(checksum.low64, checksum_in); + readBinaryLittleEndian(checksum.high64, checksum_in); validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum); } diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index cb2ee1140d0..f16330332ab 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -38,8 +38,8 @@ void CompressedWriteBuffer::nextImpl() CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size); - writeBinaryLittleEndian(checksum.first, out); - writeBinaryLittleEndian(checksum.second, out); + writeBinaryLittleEndian(checksum.low64, out); + writeBinaryLittleEndian(checksum.high64, out); out.position() += compressed_size; } @@ -50,8 +50,8 @@ void CompressedWriteBuffer::nextImpl() CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); - writeBinaryLittleEndian(checksum.first, out); - writeBinaryLittleEndian(checksum.second, out); + writeBinaryLittleEndian(checksum.low64, out); + writeBinaryLittleEndian(checksum.high64, out); out.write(compressed_buffer.data(), compressed_size); } diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp index 018c1d863bb..d815f671652 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp @@ -40,8 +40,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi { throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.", - getHexUIntLowercase(expected_checksum.first), getHexUIntLowercase(expected_checksum.second), - getHexUIntLowercase(calculated_checksum.first), getHexUIntLowercase(calculated_checksum.second)); + getHexUIntLowercase(expected_checksum.low64), getHexUIntLowercase(expected_checksum.high64), + getHexUIntLowercase(calculated_checksum.low64), getHexUIntLowercase(calculated_checksum.high64)); } /// Read the parts of the header. diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 30776a8bc50..92e9005751e 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -400,7 +400,7 @@ void DataPartStorageOnDiskBase::backup( if (it != checksums.files.end()) { file_size = it->second.file_size; - file_hash = {it->second.file_hash.first, it->second.file_hash.second}; + file_hash = it->second.file_hash; } BackupEntryPtr backup_entry = std::make_unique(disk, filepath_on_disk, copy_encrypted, file_size, file_hash); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 78f68ea72fe..58ba7acb9ba 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -154,9 +154,9 @@ bool MergeTreeDataPartChecksums::readV2(ReadBuffer & in) assertString("\n\tsize: ", in); readText(sum.file_size, in); assertString("\n\thash: ", in); - readText(sum.file_hash.first, in); + readText(sum.file_hash.low64, in); assertString(" ", in); - readText(sum.file_hash.second, in); + readText(sum.file_hash.high64, in); assertString("\n\tcompressed: ", in); readText(sum.is_compressed, in); if (sum.is_compressed) @@ -164,9 +164,9 @@ bool MergeTreeDataPartChecksums::readV2(ReadBuffer & in) assertString("\n\tuncompressed size: ", in); readText(sum.uncompressed_size, in); assertString("\n\tuncompressed hash: ", in); - readText(sum.uncompressed_hash.first, in); + readText(sum.uncompressed_hash.low64, in); assertString(" ", in); - readText(sum.uncompressed_hash.second, in); + readText(sum.uncompressed_hash.high64, in); } assertChar('\n', in); diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp index 7c0aedf699b..7deae69750f 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp @@ -250,8 +250,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in part {} for {}. Expected: {}. Found {}.", part->name, file_path, - getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second), - getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second)); + getHexUIntUppercase(disk_checksum.low64) + getHexUIntUppercase(disk_checksum.high64), + getHexUIntUppercase(cache_checksums[i].low64) + getHexUIntUppercase(cache_checksums[i].high64)); disk_checksums.push_back(disk_checksum); continue; @@ -287,8 +287,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.", part->name, proj_name, - getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second), - getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second)); + getHexUIntUppercase(disk_checksum.low64) + getHexUIntUppercase(disk_checksum.high64), + getHexUIntUppercase(cache_checksums[i].low64) + getHexUIntUppercase(cache_checksums[i].high64)); disk_checksums.push_back(disk_checksum); } return results; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 86ecb336b51..95bad0a20fe 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); } } diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index d2c6c3ef287..6508d062d37 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); } } From f1f0daa654755b2d12ec9548262adfe4e87fe9b6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 15 Jun 2023 17:59:37 +0200 Subject: [PATCH 02/13] Show halves of checksums in "system.parts", "system.projection_parts" and error messages in the correct order. --- src/Compression/CompressedReadBufferBase.cpp | 4 ++-- src/Storages/Distributed/DistributedAsyncInsertHeader.cpp | 4 ++-- src/Storages/MergeTree/PartMetadataManagerWithCache.cpp | 8 ++++---- src/Storages/System/StorageSystemParts.cpp | 6 +++--- src/Storages/System/StorageSystemProjectionParts.cpp | 6 +++--- utils/checksum-for-compressed-block/main.cpp | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 662cd6bf337..bae52c8bece 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c /// TODO mess up of endianness in error message. message << "Checksum doesn't match: corrupted data." - " Reference: " + getHexUIntLowercase(expected_checksum.low64) + getHexUIntLowercase(expected_checksum.high64) - + ". Actual: " + getHexUIntLowercase(calculated_checksum.low64) + getHexUIntLowercase(calculated_checksum.high64) + " Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64) + + ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64) + ". Size of compressed block: " + toString(size); const char * message_hardware_failure = "This is most likely due to hardware failure. " diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp index d815f671652..e1b54304f23 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp @@ -40,8 +40,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi { throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.", - getHexUIntLowercase(expected_checksum.low64), getHexUIntLowercase(expected_checksum.high64), - getHexUIntLowercase(calculated_checksum.low64), getHexUIntLowercase(calculated_checksum.high64)); + getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64), + getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64)); } /// Read the parts of the header. diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp index 7deae69750f..324bd4bbaee 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp @@ -250,8 +250,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in part {} for {}. Expected: {}. Found {}.", part->name, file_path, - getHexUIntUppercase(disk_checksum.low64) + getHexUIntUppercase(disk_checksum.high64), - getHexUIntUppercase(cache_checksums[i].low64) + getHexUIntUppercase(cache_checksums[i].high64)); + getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64), + getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64)); disk_checksums.push_back(disk_checksum); continue; @@ -287,8 +287,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.", part->name, proj_name, - getHexUIntUppercase(disk_checksum.low64) + getHexUIntUppercase(disk_checksum.high64), - getHexUIntUppercase(cache_checksums[i].low64) + getHexUIntUppercase(cache_checksums[i].high64)); + getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64), + getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64)); disk_checksums.push_back(disk_checksum); } return results; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 95bad0a20fe..b642f4b5088 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } } diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 6508d062d37..05c83747c4d 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } } diff --git a/utils/checksum-for-compressed-block/main.cpp b/utils/checksum-for-compressed-block/main.cpp index 4f9923e7638..d30a3798820 100644 --- a/utils/checksum-for-compressed-block/main.cpp +++ b/utils/checksum-for-compressed-block/main.cpp @@ -45,7 +45,7 @@ int main(int, char **) { auto flipped = flipBit(str, pos); auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size()); - std::cout << getHexUIntLowercase(checksum.first) << getHexUIntLowercase(checksum.second) << "\t" << pos / 8 << ", " << pos % 8 << "\n"; + std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n"; } return 0; From 5eeda0a0d24ae14a78da79273870aec9fa6bd8a0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 24 Jun 2023 22:17:53 +0200 Subject: [PATCH 03/13] Fix test 00961_checksums_in_system_parts_columns_table --- .../00961_checksums_in_system_parts_columns_table.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference index 186f2feab79..4bf3cfe65a2 100644 --- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference +++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference @@ -1 +1 @@ -20000101_1_1_0 test_00961 b5fce9c4ef1ca42ce4ed027389c208d2 fc3b062b646cd23d4c23d7f5920f89ae da96ff1e527a8a1f908ddf2b1d0af239 +20000101_1_1_0 test_00961 e4ed027389c208d2b5fce9c4ef1ca42c 4c23d7f5920f89aefc3b062b646cd23d 908ddf2b1d0af239da96ff1e527a8a1f From 71cded08ff2813f4c4757e71a773ca8cc0a293bf Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 25 Jun 2023 14:51:29 +0200 Subject: [PATCH 04/13] Remove unnecessary include from wide_integer_impl.h --- base/base/wide_integer_impl.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index dc6a49694ae..411841e6d9f 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -15,8 +15,6 @@ #include #include -#include - // NOLINTBEGIN(*) /// Use same extended double for all platforms @@ -29,6 +27,8 @@ using FromDoubleIntermediateType = long double; using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; #endif +namespace CityHash_v1_0_2 { struct uint128; } + namespace wide { @@ -283,8 +283,11 @@ struct integer::_impl } } - constexpr static void wide_integer_from_cityhash_uint128(integer & self, const CityHash_v1_0_2::uint128 & value) noexcept + template + constexpr static void wide_integer_from_cityhash_uint128(integer & self, const CityHashUInt128 & value) noexcept { + static_assert(sizeof(item_count) >= 2); + if constexpr (std::endian::native == std::endian::little) wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64)); else From a029105fd51044d692c0b2895dd9d5ce09b28036 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 28 Jun 2023 10:35:19 +0000 Subject: [PATCH 05/13] Fix duplicate storage set logical error. --- src/Interpreters/ActionsVisitor.cpp | 3 +++ .../02809_storage_set_analysis_bug.reference | 0 .../02809_storage_set_analysis_bug.sql | 15 +++++++++++++++ 3 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/02809_storage_set_analysis_bug.reference create mode 100644 tests/queries/0_stateless/02809_storage_set_analysis_bug.sql diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index cfbe53b5e4d..10502b7e66d 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1435,6 +1435,9 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (table) { + if (auto set = data.prepared_sets->findStorage(set_key)) + return set; + if (StorageSet * storage_set = dynamic_cast(table.get())) return data.prepared_sets->addFromStorage(set_key, storage_set->getSet()); } diff --git a/tests/queries/0_stateless/02809_storage_set_analysis_bug.reference b/tests/queries/0_stateless/02809_storage_set_analysis_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql new file mode 100644 index 00000000000..212170571f7 --- /dev/null +++ b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test_set; +DROP TABLE IF EXISTS null_in__fuzz_6; + +set allow_suspicious_low_cardinality_types = 1; + + +CREATE TABLE null_in__fuzz_6 (`dt` LowCardinality(UInt16), `idx` Int32, `i` Nullable(Int256), `s` Int32) ENGINE = MergeTree PARTITION BY dt ORDER BY idx; +insert into null_in__fuzz_6 select * from generateRandom() limit 1; + +SET transform_null_in = 0; + +CREATE TABLE test_set (i Nullable(int)) ENGINE = Set(); +INSERT INTO test_set VALUES (1), (NULL); + +SELECT count() = 1 FROM null_in__fuzz_6 PREWHERE 71 WHERE i IN (test_set); -- { serverError CANNOT_CONVERT_TYPE } From 82fd863ad4eb1209ca66e660d7cf3b646f88fff7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 13:39:47 +0300 Subject: [PATCH 06/13] Update 02809_storage_set_analysis_bug.sql --- tests/queries/0_stateless/02809_storage_set_analysis_bug.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql index 212170571f7..531946acda0 100644 --- a/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql +++ b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql @@ -3,7 +3,6 @@ DROP TABLE IF EXISTS null_in__fuzz_6; set allow_suspicious_low_cardinality_types = 1; - CREATE TABLE null_in__fuzz_6 (`dt` LowCardinality(UInt16), `idx` Int32, `i` Nullable(Int256), `s` Int32) ENGINE = MergeTree PARTITION BY dt ORDER BY idx; insert into null_in__fuzz_6 select * from generateRandom() limit 1; @@ -13,3 +12,6 @@ CREATE TABLE test_set (i Nullable(int)) ENGINE = Set(); INSERT INTO test_set VALUES (1), (NULL); SELECT count() = 1 FROM null_in__fuzz_6 PREWHERE 71 WHERE i IN (test_set); -- { serverError CANNOT_CONVERT_TYPE } + +DROP TABLE test_set; +DROP TABLE null_in__fuzz_6; From 45cd7f35cdfb3c1ed17cd2451468761aa25a6bfb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 02:16:56 +0200 Subject: [PATCH 07/13] Use clickhouse/integration-helper from changed images --- docker/test/integration/runner/dockerd-entrypoint.sh | 2 ++ tests/integration/helpers/network.py | 11 +++++++---- tests/integration/runner | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index fe47fc90951..347d904d5c0 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -52,6 +52,8 @@ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge +export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest} +export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest} export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 471aa2bdc2e..4859a8c5946 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -231,6 +231,9 @@ class _NetworkManager: def _ensure_container(self): if self._container is None or self._container_expire_time <= time.time(): + image_name = "clickhouse/integration-helper:" + os.getenv( + "DOCKER_HELPER_TAG", "latest" + ) for i in range(5): if self._container is not None: try: @@ -247,7 +250,7 @@ class _NetworkManager: time.sleep(i) image = subprocess.check_output( - "docker images -q clickhouse/integration-helper 2>/dev/null", shell=True + f"docker images -q {image_name} 2>/dev/null", shell=True ) if not image.strip(): print("No network image helper, will try download") @@ -256,16 +259,16 @@ class _NetworkManager: for i in range(5): try: subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - "docker pull clickhouse/integration-helper", shell=True + f"docker pull {image_name}", shell=True ) break except: time.sleep(i) else: - raise Exception("Cannot pull clickhouse/integration-helper image") + raise Exception(f"Cannot pull {image_name} image") self._container = self._docker_client.containers.run( - "clickhouse/integration-helper", + image_name, auto_remove=True, command=("sleep %s" % self.container_exit_timeout), # /run/xtables.lock passed inside for correct iptables --wait diff --git a/tests/integration/runner b/tests/integration/runner index f658bac412b..301a707a78d 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -336,6 +336,8 @@ if __name__ == "__main__": env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) elif image == "clickhouse/postgresql-java-client": env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) + elif image == "clickhouse/integration-helper": + env_tags += "-e {}={} ".format("DOCKER_HELPER_TAG", tag) elif image == "clickhouse/integration-test": env_tags += "-e {}={} ".format("DOCKER_BASE_TAG", tag) elif image == "clickhouse/kerberized-hadoop": From eeb8cdbc19e69aab64b0da2de0b569d6e31f438f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 02:36:30 +0200 Subject: [PATCH 08/13] Add way to define additional urls in test reports --- tests/ci/upload_result_helper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index 150af7aff4a..fbb89ef8078 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Optional import os import logging @@ -58,14 +58,19 @@ def upload_results( test_results: TestResults, additional_files: List[str], check_name: str, + additional_urls: Optional[List[str]] = None, ) -> str: normalized_check_name = check_name.lower() for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_")): normalized_check_name = normalized_check_name.replace(*r) + + # Preserve additional_urls to not modify the original one + original_additional_urls = additional_urls or [] s3_path_prefix = f"{pr_number}/{commit_sha}/{normalized_check_name}" additional_urls = process_logs( s3_client, additional_files, s3_path_prefix, test_results ) + additional_urls.extend(original_additional_urls) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" From f23bf9c5acdd885d61ec7f68bdf8be1b1fa79dee Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 29 Jun 2023 10:04:36 +0800 Subject: [PATCH 09/13] Update redis.md --- .../engines/table-engines/integrations/redis.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 568179eb690..7ef87927bfd 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -44,9 +44,10 @@ Create a table in ClickHouse which allows to read data from Redis: ``` sql CREATE TABLE redis_table ( - `k` String, - `m` String, - `n` UInt32 + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32 ) ENGINE = Redis('redis1:6379') PRIMARY KEY(k); ``` @@ -111,9 +112,16 @@ Flush Redis db asynchronously. Also `Truncate` support SYNC mode. TRUNCATE TABLE redis_table SYNC; ``` +Join: + +Join with other tables. + +``` +SELECT * FROM redis_table JOIN merge_tree_table ON redis_table.key=merge_tree_table.key; +``` ## Limitations {#limitations} Redis engine also supports scanning queries, such as `where k > xx`, but it has some limitations: -1. Scanning query may produce some duplicated keys in a very rare case when it is rehashing. See details in [Redis Scan](https://github.com/redis/redis/blob/e4d183afd33e0b2e6e8d1c79a832f678a04a7886/src/dict.c#L1186-L1269) +1. Scanning query may produce some duplicated keys in a very rare case when it is rehashing. See details in [Redis Scan](https://github.com/redis/redis/blob/e4d183afd33e0b2e6e8d1c79a832f678a04a7886/src/dict.c#L1186-L1269). 2. During the scanning, keys could be created and deleted, so the resulting dataset can not represent a valid point in time. From f8f0b7d086d23f70a49be88233b19d152e99e3f4 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 29 Jun 2023 10:09:27 +0800 Subject: [PATCH 10/13] fix typo --- docs/en/engines/table-engines/integrations/redis.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 7ef87927bfd..2697abcf30e 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -49,7 +49,7 @@ CREATE TABLE redis_table `v2` String, `v3` Float32 ) -ENGINE = Redis('redis1:6379') PRIMARY KEY(k); +ENGINE = Redis('redis1:6379') PRIMARY KEY(key); ``` Insert: From 635ab9f9af3894c42b69ef093cc34e64cefce219 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 29 Jun 2023 12:53:23 +0800 Subject: [PATCH 11/13] move redis_table to right --- docs/en/engines/table-engines/integrations/redis.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 2697abcf30e..8086a6503b8 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -117,7 +117,7 @@ Join: Join with other tables. ``` -SELECT * FROM redis_table JOIN merge_tree_table ON redis_table.key=merge_tree_table.key; +SELECT * FROM redis_table JOIN merge_tree_table ON merge_tree_table.key=redis_table.key; ``` ## Limitations {#limitations} From 2392dda4126f00ed968f1496e6b61ab8089832f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 09:38:59 +0200 Subject: [PATCH 12/13] Changelog for 23.6 --- CHANGELOG.md | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72372c8fac4..a2e7b021081 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
@@ -8,6 +9,107 @@ # 2023 Changelog +### ClickHouse release 23.6, 2023-06-29 + +#### Backward Incompatible Change +* Delete feature `do_not_evict_index_and_mark_files` in the fs cache. This feature was only making things worse. [#51253](https://github.com/ClickHouse/ClickHouse/pull/51253) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove ALTER support for experimental LIVE VIEW. [#51287](https://github.com/ClickHouse/ClickHouse/pull/51287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Decrease the default values for `http_max_field_value_size` and `http_max_field_name_size` to 128 KiB. [#51163](https://github.com/ClickHouse/ClickHouse/pull/51163) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). +* Add support for `APPEND` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). +* Add table engine `Redis` and table function `redis`. It allows querying external Redis servers. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). +* Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). +* Clickhouse-client can now be called with a connection string instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add setting `session_timezone`, it is used as default timezone for session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). +* Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks DEFLATE_QPL non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Performance Improvement +* Improved scheduling of merge selecting and cleanup tasks in `ReplicatedMergeTree`. The tasks will not be executed too frequently when there's nothing to merge or cleanup. Added settings `max_merge_selecting_sleep_ms`, `merge_selecting_sleep_slowdown_factor`, `max_cleanup_delay_period` and `cleanup_thread_preferred_points_per_iteration`. It should close [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919). [#50107](https://github.com/ClickHouse/ClickHouse/pull/50107) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Make filter push down through cross join. [#50605](https://github.com/ClickHouse/ClickHouse/pull/50605) ([Han Fei](https://github.com/hanfei1991)). +* Improve performance with enabled QueryProfiler using thread-local timer_id instead of global object. [#48778](https://github.com/ClickHouse/ClickHouse/pull/48778) ([Jiebin Sun](https://github.com/jiebinn)). +* Rewrite CapnProto input/output format to improve its performance. Map column names and CapnProto fields case insensitive, fix reading/writing of nested structure fields. [#49752](https://github.com/ClickHouse/ClickHouse/pull/49752) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize parquet write performance for parallel threads. [#50102](https://github.com/ClickHouse/ClickHouse/pull/50102) ([Hongbin Ma](https://github.com/binmahone)). +* Disable `parallelize_output_from_storages` for processing MATERIALIZED VIEWs and storages with one block only. [#50214](https://github.com/ClickHouse/ClickHouse/pull/50214) ([Azat Khuzhin](https://github.com/azat)). +* Merge PR [#46558](https://github.com/ClickHouse/ClickHouse/pull/46558). Avoid block permutation during sort if the block is already sorted. [#50697](https://github.com/ClickHouse/ClickHouse/pull/50697) ([Alexey Milovidov](https://github.com/alexey-milovidov), [Maksim Kita](https://github.com/kitaisreal)). +* Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). +* Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse-client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). +* Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Experimental Feature +* Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). +* Add random sleep before large merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). + +#### Improvement +* Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to cast IPv6 to IPv4 address for CIDR ::ffff:0:0/96 (IPv4-mapped addresses). [#49759](https://github.com/ClickHouse/ClickHouse/pull/49759) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update MongoDB protocol to support MongoDB 5.1 version and newer. Support for the versions with the old protocol (<3.6) is preserved. Closes [#45621](https://github.com/ClickHouse/ClickHouse/issues/45621), [#49879](https://github.com/ClickHouse/ClickHouse/issues/49879). [#50061](https://github.com/ClickHouse/ClickHouse/pull/50061) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add setting `input_format_max_bytes_to_read_for_schema_inference` to limit the number of bytes to read in schema inference. Closes [#50577](https://github.com/ClickHouse/ClickHouse/issues/50577). [#50592](https://github.com/ClickHouse/ClickHouse/pull/50592) ([Kruglov Pavel](https://github.com/Avogar)). +* Respect setting `input_format_null_as_default` in schema inference. [#50602](https://github.com/ClickHouse/ClickHouse/pull/50602) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to skip trailing empty lines in CSV/TSV/CustomSeparated formats via settings `input_format_csv_skip_trailing_empty_lines`, `input_format_tsv_skip_trailing_empty_lines` and `input_format_custom_skip_trailing_empty_lines` (disabled by default). Closes [#49315](https://github.com/ClickHouse/ClickHouse/issues/49315). [#50635](https://github.com/ClickHouse/ClickHouse/pull/50635) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "toDateOrDefault|OrNull" and "accuateCast[OrDefault|OrNull]" now correctly parse numeric arguments. [#50709](https://github.com/ClickHouse/ClickHouse/pull/50709) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Support CSV with whitespace or `\t` field delimiters, and these delimiters are supported in Spark. [#50712](https://github.com/ClickHouse/ClickHouse/pull/50712) ([KevinyhZou](https://github.com/KevinyhZou)). +* Settings `number_of_mutations_to_delay` and `number_of_mutations_to_throw` are enabled by default now with values 500 and 1000 respectively. [#50726](https://github.com/ClickHouse/ClickHouse/pull/50726) ([Anton Popov](https://github.com/CurtizJ)). +* The dashboard correctly shows missing values. This closes [#50831](https://github.com/ClickHouse/ClickHouse/issues/50831). [#50832](https://github.com/ClickHouse/ClickHouse/pull/50832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added the possibility to use date and time arguments in the syslog timestamp format in functions `parseDateTimeBestEffort*` and `parseDateTime64BestEffort*`. [#50925](https://github.com/ClickHouse/ClickHouse/pull/50925) ([Victor Krasnov](https://github.com/sirvickr)). +* Command line parameter "--password" in clickhouse-client can now be specified only once. [#50966](https://github.com/ClickHouse/ClickHouse/pull/50966) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Use `hash_of_all_files` from `system.parts` to check identity of parts during on-cluster backups. [#50997](https://github.com/ClickHouse/ClickHouse/pull/50997) ([Vitaly Baranov](https://github.com/vitlibar)). +* The system table zookeeper_connection connected_time identifies the time when the connection is established (standard format), and session_uptime_elapsed_seconds is added, which labels the duration of the established connection session (in seconds). [#51026](https://github.com/ClickHouse/ClickHouse/pull/51026) ([郭小龙](https://github.com/guoxiaolongzte)). +* Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Add total_bytes_to_read to the Progress packet in TCP protocol for better Progress bar. [#51158](https://github.com/ClickHouse/ClickHouse/pull/51158) ([Kruglov Pavel](https://github.com/Avogar)). +* Better checking of data parts on disks with filesystem cache. [#51164](https://github.com/ClickHouse/ClickHouse/pull/51164) ([Anton Popov](https://github.com/CurtizJ)). +* Fix sometimes not correct current_elements_num in fs cache. [#51242](https://github.com/ClickHouse/ClickHouse/pull/51242) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Add embedded keeper-client to standalone keeper binary. [#50964](https://github.com/ClickHouse/ClickHouse/pull/50964) ([pufit](https://github.com/pufit)). +* Actual LZ4 version is used now. [#50621](https://github.com/ClickHouse/ClickHouse/pull/50621) ([Nikita Taranov](https://github.com/nickitat)). +* ClickHouse server will print the list of changed settings on fatal errors. This closes [#51137](https://github.com/ClickHouse/ClickHouse/issues/51137). [#51138](https://github.com/ClickHouse/ClickHouse/pull/51138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with clang-17. [#51300](https://github.com/ClickHouse/ClickHouse/pull/51300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [SQLancer](https://github.com/sqlancer/sqlancer) check is considered stable as bugs that were triggered by it are fixed. Now failures of SQLancer check will be reported as failed check status. [#51340](https://github.com/ClickHouse/ClickHouse/pull/51340) ([Ilya Yatsishin](https://github.com/qoega)). +* Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Report loading status for executable dictionaries correctly [#48775](https://github.com/ClickHouse/ClickHouse/pull/48775) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Proper mutation of skip indices and projections [#50104](https://github.com/ClickHouse/ClickHouse/pull/50104) ([Amos Bird](https://github.com/amosbird)). +* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)). +* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)). +* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)). +* Query Cache: Try to fix bad cast from `ColumnConst` to `ColumnVector` [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)). +* SummingMergeTree support for DateTime64 [#50797](https://github.com/ClickHouse/ClickHouse/pull/50797) ([Jordi Villar](https://github.com/jrdi)). +* Add compatibility setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix hashing of LDAP params in the cache entries [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix checking the lock file too often while writing a backup [#50889](https://github.com/ClickHouse/ClickHouse/pull/50889) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race in the Azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix erroneous `sort_description` propagation in `CreatingSets` [#50955](https://github.com/ClickHouse/ClickHouse/pull/50955) ([Nikita Taranov](https://github.com/nickitat)). +* Fix Iceberg v2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializedMySQL: Keep parentheses for empty table overrides [#50977](https://github.com/ClickHouse/ClickHouse/pull/50977) ([Val Doroshchuk](https://github.com/valbok)). +* Fix crash in BackupCoordinationStageSync::setError() [#51012](https://github.com/ClickHouse/ClickHouse/pull/51012) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)). +* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix Set index with constant nullable comparison. [#51205](https://github.com/ClickHouse/ClickHouse/pull/51205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a crash in s3 and s3Cluster functions [#51209](https://github.com/ClickHouse/ClickHouse/pull/51209) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix a crash with compiled expressions [#51231](https://github.com/ClickHouse/ClickHouse/pull/51231) ([LiuNeng](https://github.com/liuneng1994)). +* Fix use-after-free in StorageURL when switching URLs [#51260](https://github.com/ClickHouse/ClickHouse/pull/51260) ([Michael Kolupaev](https://github.com/al13n321)). +* Updated check for parameterized view [#51272](https://github.com/ClickHouse/ClickHouse/pull/51272) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix multiple writing of same file to backup [#51299](https://github.com/ClickHouse/ClickHouse/pull/51299) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove garbage from function `transform` [#51350](https://github.com/ClickHouse/ClickHouse/pull/51350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + + ### ClickHouse release 23.5, 2023-06-08 #### Upgrade Notes From 5656d18690bd00b0db48c1e12e48e0446e263465 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Jun 2023 13:36:55 +0000 Subject: [PATCH 13/13] Update version_date.tsv and changelogs after v23.5.4.25-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.5.4.25-stable.md | 31 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 5 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.5.4.25-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 367f6043b90..6d53a6f4c51 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.5.3.24" +ARG VERSION="23.5.4.25" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index e7e879fa95f..91b22346f13 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.5.3.24" +ARG VERSION="23.5.4.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 42ae81655d2..0ed0e4e1168 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.5.3.24" +ARG VERSION="23.5.4.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.5.4.25-stable.md b/docs/changelogs/v23.5.4.25-stable.md new file mode 100644 index 00000000000..53d3a7c9c0a --- /dev/null +++ b/docs/changelogs/v23.5.4.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.5.4.25-stable (190f962abcf) FIXME as compared to v23.5.3.24-stable (76f54616d3b) + +#### Improvement +* Backported in [#51235](https://github.com/ClickHouse/ClickHouse/issues/51235): Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#51255](https://github.com/ClickHouse/ClickHouse/issues/51255): Disable cache setting `do_not_evict_index_and_mark_files` (Was enabled in `23.5`). [#51222](https://github.com/ClickHouse/ClickHouse/pull/51222) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51531](https://github.com/ClickHouse/ClickHouse/issues/51531): Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#51572](https://github.com/ClickHouse/ClickHouse/issues/51572): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Query Cache: Try to fix bad cast from ColumnConst to ColumnVector [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix ParallelReadBuffer seek [#50820](https://github.com/ClickHouse/ClickHouse/pull/50820) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 307ed97068f..5c8dd0d2481 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 v23.5.1.3174-stable 2023-06-09