diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index 091a0adf5ed..f05e8288719 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -81,14 +82,22 @@ UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer & buffer) { rowid_bitmap.runOptimize(); auto size = rowid_bitmap.getSizeInBytes(); + auto buf = std::make_unique(size); + rowid_bitmap.write(buf.get()); + + auto codec = CompressionCodecFactory::instance().get(GIN_COMPRESSION_CODEC, GIN_COMPRESSION_LEVEL); + Memory<> memory; + memory.resize(codec->getCompressedReserveSize(static_cast(size))); + auto compressed_size = codec->compress(buf.get(), static_cast(size), memory.data()); writeVarUInt(size, buffer); written_bytes += getLengthOfVarUInt(size); - auto buf = std::make_unique(size); - rowid_bitmap.write(buf.get()); - buffer.write(buf.get(), size); - written_bytes += size; + writeVarUInt(compressed_size, buffer); + written_bytes += getLengthOfVarUInt(compressed_size); + + buffer.write(memory.data(), compressed_size); + written_bytes += compressed_size; } else { @@ -110,11 +119,18 @@ GinIndexPostingsListPtr GinIndexPostingsBuilder::deserialize(ReadBuffer & buffer if (postings_list_size == USES_BIT_MAP) { size_t size = 0; + size_t compressed_size = 0; readVarUInt(size, buffer); - auto buf = std::make_unique(size); - buffer.readStrict(reinterpret_cast(buf.get()), size); + readVarUInt(compressed_size, buffer); + auto buf = std::make_unique(compressed_size); + buffer.readStrict(reinterpret_cast(buf.get()), compressed_size); - GinIndexPostingsListPtr postings_list = std::make_shared(GinIndexPostingsList::read(buf.get())); + Memory<> memory; + memory.resize(size); + auto codec = CompressionCodecFactory::instance().get(GIN_COMPRESSION_CODEC, GIN_COMPRESSION_LEVEL); + codec->decompress(buf.get(), static_cast(compressed_size), memory.data()); + + GinIndexPostingsListPtr postings_list = std::make_shared(GinIndexPostingsList::read(memory.data())); return postings_list; } diff --git a/src/Storages/MergeTree/GinIndexStore.h b/src/Storages/MergeTree/GinIndexStore.h index b86de546ecb..3ed624995e5 100644 --- a/src/Storages/MergeTree/GinIndexStore.h +++ b/src/Storages/MergeTree/GinIndexStore.h @@ -61,6 +61,9 @@ public: private: constexpr static int MIN_SIZE_FOR_ROARING_ENCODING = 16; + static constexpr auto GIN_COMPRESSION_CODEC = "ZSTD"; + static constexpr auto GIN_COMPRESSION_LEVEL = 1; + /// When the list length is no greater than MIN_SIZE_FOR_ROARING_ENCODING, array 'rowid_lst' is used /// As a special case, rowid_lst[0] == CONTAINS_ALL encodes that all rowids are set. std::array rowid_lst; @@ -211,7 +214,7 @@ private: v1 = 1, /// Initial version }; - static constexpr auto CURRENT_GIN_FILE_FORMAT_VERSION = Format::v0; + static constexpr auto CURRENT_GIN_FILE_FORMAT_VERSION = Format::v1; }; using GinIndexStorePtr = std::shared_ptr;