From a0cc54457e9e0f5d82fe01592d3c6cb17b9b3d63 Mon Sep 17 00:00:00 2001 From: Pawel Rog Date: Fri, 28 Jul 2017 16:14:07 +0200 Subject: [PATCH 01/10] Added compression NONE --- dbms/src/IO/CompressedReadBufferBase.cpp | 12 +++++++++-- dbms/src/IO/CompressedStream.h | 4 +++- dbms/src/IO/CompressedWriteBuffer.cpp | 20 +++++++++++++++++++ dbms/src/Storages/CompressionMethodSelector.h | 2 ++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../Storages/MergeTree/MergeTreeDataPart.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- utils/compressor/main.cpp | 4 ++++ 8 files changed, 42 insertions(+), 6 deletions(-) diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index 77b7907e403..487c4b6d77d 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -2,6 +2,7 @@ #include +#include #include #ifdef USE_QUICKLZ @@ -57,7 +58,11 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed = size_compressed_without_checksum; - if (method < 0x80) + if (method == static_cast (CompressionMethodByte::NONE)) { + size_compressed = unalignedLoad(&own_compressed_buffer[1]); + size_decompressed = unalignedLoad(&own_compressed_buffer[5]); + } + else if (method < 0x80) { #ifdef USE_QUICKLZ size_compressed = qlz_size_compressed(&own_compressed_buffer[0]); @@ -108,7 +113,10 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h - if (method < 0x80) + if (method == static_cast (CompressionMethodByte::NONE)) { + memcpy(to, &compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_decompressed); + } + else if (method < 0x80) { #ifdef USE_QUICKLZ if (!qlz_state) diff --git a/dbms/src/IO/CompressedStream.h b/dbms/src/IO/CompressedStream.h index 45b7a0b308c..585a6cb34ad 100644 --- a/dbms/src/IO/CompressedStream.h +++ b/dbms/src/IO/CompressedStream.h @@ -20,6 +20,7 @@ enum class CompressionMethod LZ4 = 1, LZ4HC = 2, /// The format is the same as for LZ4. The difference is only in compression. ZSTD = 3, /// Experimental algorithm: https://github.com/Cyan4973/zstd + NONE = 4, /// No compression }; /** The compressed block format is as follows: @@ -53,7 +54,8 @@ enum class CompressionMethod enum class CompressionMethodByte : uint8_t { - LZ4 = 0x82, + NONE = 0x02, + LZ4 = 0x82, ZSTD = 0x90, }; diff --git a/dbms/src/IO/CompressedWriteBuffer.cpp b/dbms/src/IO/CompressedWriteBuffer.cpp index eb00b400196..429cf825d0d 100644 --- a/dbms/src/IO/CompressedWriteBuffer.cpp +++ b/dbms/src/IO/CompressedWriteBuffer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -121,6 +122,25 @@ void CompressedWriteBuffer::nextImpl() compressed_buffer_ptr = &compressed_buffer[0]; break; } + case CompressionMethod::NONE: + { + static constexpr size_t header_size = 1 + sizeof (UInt32) + sizeof (UInt32); + + compressed_size = header_size + uncompressed_size; + UInt32 uncompressed_size_32 = uncompressed_size; + UInt32 compressed_size_32 = compressed_size; + + compressed_buffer.resize(compressed_size); + + compressed_buffer[0] = static_cast (CompressionMethodByte::NONE); + + unalignedStore(&compressed_buffer[1], compressed_size_32); + unalignedStore(&compressed_buffer[5], uncompressed_size_32); + memcpy(&compressed_buffer[9], working_buffer.begin(), uncompressed_size); + + compressed_buffer_ptr = &compressed_buffer[0]; + break; + } default: throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD); } diff --git a/dbms/src/Storages/CompressionMethodSelector.h b/dbms/src/Storages/CompressionMethodSelector.h index 5113a662b16..c762bae1513 100644 --- a/dbms/src/Storages/CompressionMethodSelector.h +++ b/dbms/src/Storages/CompressionMethodSelector.h @@ -51,6 +51,8 @@ private: method = CompressionMethod::LZ4; else if (name == "zstd") method = CompressionMethod::ZSTD; + else if (name == "none") + method = CompressionMethod::NONE; else throw Exception("Unknown compression method " + name, ErrorCodes::UNKNOWN_COMPRESSION_METHOD); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index af1b8256e7f..71001947913 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1027,7 +1027,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( false, nullptr, "", false, 0, DBMS_DEFAULT_BUFFER_SIZE, false); ExpressionBlockInputStream in(part_in, expression); - MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, CompressionMethod::LZ4, false); + MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, CompressionMethod::NONE, false); in.readPrefix(); out.writePrefix(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 1baebb13c86..863cd209632 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -200,7 +200,7 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const { writeString("checksums format version: 4\n", to); - CompressedWriteBuffer out{to, CompressionMethod::LZ4, 1 << 16}; + CompressedWriteBuffer out{to, CompressionMethod::NONE, 1 << 16}; writeVarUInt(files.size(), out); for (const auto & it : files) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c548ee560ee..f2b1b2228cc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -138,7 +138,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa } NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames()); - MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::LZ4); + MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::NONE); out.writePrefix(); out.writeWithPermutation(block, perm_ptr); diff --git a/utils/compressor/main.cpp b/utils/compressor/main.cpp index e244561ebec..18263741d64 100644 --- a/utils/compressor/main.cpp +++ b/utils/compressor/main.cpp @@ -59,6 +59,7 @@ int main(int argc, char ** argv) ("qlz", "use QuickLZ (level 1) instead of LZ4") #endif ("zstd", "use ZSTD instead of LZ4") + ("none", "use no compression instead of LZ4") ("stat", "print block statistics of compressed data") ; @@ -85,6 +86,7 @@ int main(int argc, char ** argv) bool use_lz4hc = options.count("hc"); bool use_zstd = options.count("zstd"); bool stat_mode = options.count("stat"); + bool use_none = options.count("none"); unsigned block_size = options["block-size"].as(); DB::CompressionMethod method = DB::CompressionMethod::LZ4; @@ -95,6 +97,8 @@ int main(int argc, char ** argv) method = DB::CompressionMethod::LZ4HC; else if (use_zstd) method = DB::CompressionMethod::ZSTD; + else if (use_none) + method = DB::CompressionMethod::NONE; DB::ReadBufferFromFileDescriptor rb(STDIN_FILENO); DB::WriteBufferFromFileDescriptor wb(STDOUT_FILENO); From e1ab721d3db6fe6887e20052651dd444328e2c22 Mon Sep 17 00:00:00 2001 From: Pawel Rog Date: Mon, 31 Jul 2017 13:05:49 +0200 Subject: [PATCH 02/10] Use compression method from configs --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 5 ++++- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 71001947913..8b2c006c642 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1026,8 +1026,11 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( *this, part, DEFAULT_MERGE_BLOCK_SIZE, 0, expression->getRequiredColumns(), ranges, false, nullptr, "", false, 0, DBMS_DEFAULT_BUFFER_SIZE, false); + auto compression_method = this->context.chooseCompressionMethod( + this->getTotalActiveSizeInBytes(), + static_cast (this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes()); ExpressionBlockInputStream in(part_in, expression); - MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, CompressionMethod::NONE, false); + MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false); in.readPrefix(); out.writePrefix(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 863cd209632..1baebb13c86 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -200,7 +200,7 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const { writeString("checksums format version: 4\n", to); - CompressedWriteBuffer out{to, CompressionMethod::NONE, 1 << 16}; + CompressedWriteBuffer out{to, CompressionMethod::LZ4, 1 << 16}; writeVarUInt(files.size(), out); for (const auto & it : files) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index f2b1b2228cc..62b50062252 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -137,8 +137,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } + auto compression_method = data.context.chooseCompressionMethod( + data.getTotalActiveSizeInBytes(), + static_cast (data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes()); + NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames()); - MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::NONE); + MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method); out.writePrefix(); out.writeWithPermutation(block, perm_ptr); From 3d2ec763ed650ceccb75641fbb1aaca98b89db50 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 31 Jul 2017 20:40:56 +0300 Subject: [PATCH 03/10] Update CompressedReadBufferBase.cpp --- dbms/src/IO/CompressedReadBufferBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index 346b844e0dc..33208f95175 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -59,7 +59,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_compressed = unalignedLoad(&own_compressed_buffer[1]); size_decompressed = unalignedLoad(&own_compressed_buffer[5]); } - else if (method == static_cast (CompressionMethodByte::NONE)) + else if (method == static_cast(CompressionMethodByte::NONE)) { size_compressed = unalignedLoad(&own_compressed_buffer[1]); size_decompressed = unalignedLoad(&own_compressed_buffer[5]); From 50244880ccf2028a734c26e614fcf80dda967e8b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 31 Jul 2017 20:42:23 +0300 Subject: [PATCH 04/10] Update CompressedReadBufferBase.cpp --- dbms/src/IO/CompressedReadBufferBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index 33208f95175..abcfc35702b 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -115,7 +115,7 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s if (ZSTD_isError(res)) throw Exception("Cannot ZSTD_decompress: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_DECOMPRESS); } - else if (method == static_cast (CompressionMethodByte::NONE)) + else if (method == static_cast(CompressionMethodByte::NONE)) { memcpy(to, &compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_decompressed); } From fdb7e080bed2ddd0f1a092c37843e4cd0d1947ec Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 31 Jul 2017 20:43:33 +0300 Subject: [PATCH 05/10] Update CompressedWriteBuffer.cpp --- dbms/src/IO/CompressedWriteBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/CompressedWriteBuffer.cpp b/dbms/src/IO/CompressedWriteBuffer.cpp index 93eee532754..4575278b12c 100644 --- a/dbms/src/IO/CompressedWriteBuffer.cpp +++ b/dbms/src/IO/CompressedWriteBuffer.cpp @@ -109,7 +109,7 @@ void CompressedWriteBuffer::nextImpl() compressed_buffer.resize(compressed_size); - compressed_buffer[0] = static_cast (CompressionMethodByte::NONE); + compressed_buffer[0] = static_cast(CompressionMethodByte::NONE); unalignedStore(&compressed_buffer[1], compressed_size_32); unalignedStore(&compressed_buffer[5], uncompressed_size_32); From 4877aa394c30932114dc370759096f8bd1d85d87 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 31 Jul 2017 20:44:41 +0300 Subject: [PATCH 06/10] Update MergeTreeData.cpp --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 5dbf55236ed..0c22298dd1d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1032,7 +1032,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( auto compression_method = this->context.chooseCompressionMethod( this->getTotalActiveSizeInBytes(), - static_cast (this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes()); + static_cast(this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes()); ExpressionBlockInputStream in(part_in, expression); MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false); in.readPrefix(); From d174ebc5c902f14634271963ba62a71de9369f54 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 31 Jul 2017 20:44:58 +0300 Subject: [PATCH 07/10] Update MergeTreeDataWriter.cpp --- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2ae318468c2..2b3d869967e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -148,7 +148,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa auto compression_method = data.context.chooseCompressionMethod( data.getTotalActiveSizeInBytes(), - static_cast (data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes()); + static_cast(data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes()); NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames()); MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method); From 9f799820d9e62c73792cf4907d0b6a7640e3d752 Mon Sep 17 00:00:00 2001 From: Pawel Rog Date: Tue, 1 Aug 2017 10:12:15 +0200 Subject: [PATCH 08/10] Applied changes requested by Alexey --- dbms/src/IO/CompressedReadBufferBase.cpp | 9 +++------ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index abcfc35702b..55c5b94f13d 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -54,12 +54,9 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed = size_compressed_without_checksum; - if (method == static_cast(CompressionMethodByte::LZ4) || method == static_cast(CompressionMethodByte::ZSTD)) - { - size_compressed = unalignedLoad(&own_compressed_buffer[1]); - size_decompressed = unalignedLoad(&own_compressed_buffer[5]); - } - else if (method == static_cast(CompressionMethodByte::NONE)) + if (method == static_cast(CompressionMethodByte::LZ4) || + method == static_cast(CompressionMethodByte::ZSTD) || + method == static_cast(CompressionMethodByte::NONE)) { size_compressed = unalignedLoad(&own_compressed_buffer[1]); size_decompressed = unalignedLoad(&own_compressed_buffer[5]); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 0c22298dd1d..70e763c9dd3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1032,7 +1032,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( auto compression_method = this->context.chooseCompressionMethod( this->getTotalActiveSizeInBytes(), - static_cast(this->getTotalCompressedSize()) / this->getTotalActiveSizeInBytes()); + static_cast(this->getTotalActiveSizeInBytes()) / this->getTotalCompressedSize()); ExpressionBlockInputStream in(part_in, expression); MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false); in.readPrefix(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2b3d869967e..67de3bcf30e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -148,7 +148,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa auto compression_method = data.context.chooseCompressionMethod( data.getTotalActiveSizeInBytes(), - static_cast(data.getTotalCompressedSize()) / data.getTotalActiveSizeInBytes()); + static_cast(data.getTotalActiveSizeInBytes()) / data.getTotalCompressedSize()); NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames()); MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method); From d6a2056851bc89dfbf273537b58bce9b37c01a10 Mon Sep 17 00:00:00 2001 From: Pawel Rog Date: Tue, 1 Aug 2017 16:48:24 +0200 Subject: [PATCH 09/10] Applied changes requested by Alexey --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 70e763c9dd3..c9f1bac787b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1031,8 +1031,8 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( false, nullptr, "", false, 0, DBMS_DEFAULT_BUFFER_SIZE, false); auto compression_method = this->context.chooseCompressionMethod( - this->getTotalActiveSizeInBytes(), - static_cast(this->getTotalActiveSizeInBytes()) / this->getTotalCompressedSize()); + part->size_in_bytes, + static_cast(part->size_in_bytes) / this->getTotalActiveSizeInBytes()); ExpressionBlockInputStream in(part_in, expression); MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true, compression_method, false); in.readPrefix(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 82076becfbe..e7b9ad90c5f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -620,8 +620,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart merged_stream = std::make_shared(merged_stream, Limits(), 0 /*limit_hint*/, Names()); auto compression_method = data.context.chooseCompressionMethod( - merge_entry->total_size_bytes_compressed, - static_cast(merge_entry->total_size_bytes_compressed) / data.getTotalActiveSizeInBytes()); + merge_entry->total_size_bytes_compressed, + static_cast (merge_entry->total_size_bytes_compressed) / data.getTotalActiveSizeInBytes()); MergedBlockOutputStream to{ data, new_part_tmp_path, merging_columns, compression_method, merged_column_to_size, aio_threshold}; From 2ae6f1ebdb9007c8664a0bc25f04405e17fe7011 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 1 Aug 2017 23:07:16 +0300 Subject: [PATCH 10/10] Update MergeTreeDataWriter.cpp --- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 67de3bcf30e..dff3d22f168 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -146,9 +146,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - auto compression_method = data.context.chooseCompressionMethod( - data.getTotalActiveSizeInBytes(), - static_cast(data.getTotalActiveSizeInBytes()) / data.getTotalCompressedSize()); + /// This effectively chooses minimal compression method: + /// either default lz4 or compression method with zero thresholds on absolute and relative part size. + auto compression_method = data.context.chooseCompressionMethod(0, 0); NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames()); MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_method);