diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index f28157ebde2..3eb3e617ff9 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -36,6 +36,8 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec - Requires `max_rows_to_keep` - `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block. - Default value: `0` +- `compress` - Whether to compress data in memory. + - Default value: `false` ## Usage {#usage} diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 3f88ca93a97..013821db2c9 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -1024,10 +1024,10 @@ void ColumnArray::updatePermutationWithCollation(const Collator & collator, Perm DefaultPartialSort()); } -ColumnPtr ColumnArray::compress() const +ColumnPtr ColumnArray::compress(bool force_compression) const { - ColumnPtr data_compressed = data->compress(); - ColumnPtr offsets_compressed = offsets->compress(); + ColumnPtr data_compressed = data->compress(force_compression); + ColumnPtr offsets_compressed = offsets->compress(force_compression); size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize(); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index a66f9041213..dee6ae931f2 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -159,7 +159,7 @@ public: /// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`. MutableColumnPtr getDataInRange(size_t start, size_t length) const; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; ColumnCheckpointPtr getCheckpoint() const override; void updateCheckpoint(ColumnCheckpoint & checkpoint) const override; diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp index 3bdc514d6d8..adb2a5f391d 100644 --- a/src/Columns/ColumnCompressed.cpp +++ b/src/Columns/ColumnCompressed.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes } -std::shared_ptr> ColumnCompressed::compressBuffer(const void * data, size_t data_size, bool always_compress) +std::shared_ptr> ColumnCompressed::compressBuffer(const void * data, size_t data_size, bool force_compression) { size_t max_dest_size = LZ4_COMPRESSBOUND(data_size); @@ -35,7 +35,8 @@ std::shared_ptr> ColumnCompressed::compressBuffer(const void * data, si throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column"); /// If compression is inefficient. - if (!always_compress && static_cast(compressed_size) * 2 > data_size) + const size_t threshold = force_compression ? 1 : 2; + if (static_cast(compressed_size) * threshold > data_size) return {}; /// Shrink to fit. diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index c4270e8216b..b030e762acd 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -72,7 +72,7 @@ public: /// If data is not worth to be compressed and not 'always_compress' - returns nullptr. /// Note: shared_ptr is to allow to be captured by std::function. - static std::shared_ptr> compressBuffer(const void * data, size_t data_size, bool always_compress); + static std::shared_ptr> compressBuffer(const void * data, size_t data_size, bool force_compression); static void decompressBuffer( const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size); diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 73366150e7d..c286c54198a 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -478,7 +478,7 @@ ColumnPtr ColumnDecimal::replicate(const IColumn::Offsets & offsets) const } template -ColumnPtr ColumnDecimal::compress() const +ColumnPtr ColumnDecimal::compress(bool force_compression) const { const size_t data_size = data.size(); const size_t source_size = data_size * sizeof(T); @@ -487,7 +487,7 @@ ColumnPtr ColumnDecimal::compress() const if (source_size < 4096) /// A wild guess. return ColumnCompressed::wrap(this->getPtr()); - auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false); + auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, force_compression); if (!compressed) return ColumnCompressed::wrap(this->getPtr()); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 690549e4a56..3e5c189b731 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -140,7 +140,7 @@ public: return false; } - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; void insertValue(const T value) { data.push_back(value); } Container & getData() { return data; } diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index a4c932eafdd..2d05701c57b 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -991,9 +991,9 @@ void ColumnDynamic::updatePermutation(IColumn::PermutationSortDirection directio updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort()); } -ColumnPtr ColumnDynamic::compress() const +ColumnPtr ColumnDynamic::compress(bool force_compression) const { - ColumnPtr variant_compressed = variant_column_ptr->compress(); + ColumnPtr variant_compressed = variant_column_ptr->compress(force_compression); size_t byte_size = variant_compressed->byteSize(); return ColumnCompressed::create(size(), byte_size, [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_global_max_dynamic_types = global_max_dynamic_types, my_statistics = statistics]() mutable diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index bdbad99519f..093aaaf2793 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -335,7 +335,7 @@ public: return false; } - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; double getRatioOfDefaultRows(double sample_ratio) const override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 04e894ee5ab..f076f904768 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -419,7 +419,7 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const get(max_idx, max); } -ColumnPtr ColumnFixedString::compress() const +ColumnPtr ColumnFixedString::compress(bool force_compression) const { size_t source_size = chars.size(); @@ -427,7 +427,7 @@ ColumnPtr ColumnFixedString::compress() const if (source_size < 4096) /// A wild guess. return ColumnCompressed::wrap(this->getPtr()); - auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size, false); + auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size, force_compression); if (!compressed) return ColumnCompressed::wrap(this->getPtr()); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 8cf0a6a57da..f55fb60a976 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -175,7 +175,7 @@ public: ColumnPtr replicate(const Offsets & offsets) const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; void reserve(size_t size) override { diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index a5511dfeeb4..fb9c8c9fbaf 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -352,9 +352,9 @@ bool ColumnMap::dynamicStructureEquals(const IColumn & rhs) const return false; } -ColumnPtr ColumnMap::compress() const +ColumnPtr ColumnMap::compress(bool force_compression) const { - auto compressed = nested->compress(); + auto compressed = nested->compress(force_compression); const auto byte_size = compressed->byteSize(); /// The order of evaluation of function arguments is unspecified /// and could cause interacting with object in moved-from state diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 8dfa5bb5845..31404a3e152 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -120,7 +120,7 @@ public: const ColumnTuple & getNestedData() const { return assert_cast(getNestedColumn().getData()); } ColumnTuple & getNestedData() { return assert_cast(getNestedColumn().getData()); } - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; bool hasDynamicStructure() const override { return nested->hasDynamicStructure(); } bool dynamicStructureEquals(const IColumn & rhs) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 6e8bd3fc70c..640550fcf9a 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -773,10 +773,10 @@ void ColumnNullable::protect() getNullMapColumn().protect(); } -ColumnPtr ColumnNullable::compress() const +ColumnPtr ColumnNullable::compress(bool force_compression) const { - ColumnPtr nested_compressed = nested_column->compress(); - ColumnPtr null_map_compressed = null_map->compress(); + ColumnPtr nested_compressed = nested_column->compress(force_compression); + ColumnPtr null_map_compressed = null_map->compress(force_compression); size_t byte_size = nested_column->byteSize() + null_map->byteSize(); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 32ce66c5965..3a0be008cc2 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -141,7 +141,7 @@ public: // Special function for nullable minmax index void getExtremesNullLast(Field & min, Field & max) const; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; ColumnCheckpointPtr getCheckpoint() const override; void updateCheckpoint(ColumnCheckpoint & checkpoint) const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 064145c5d4f..17a90cc9b50 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1224,14 +1224,14 @@ bool ColumnObject::structureEquals(const IColumn & rhs) const return true; } -ColumnPtr ColumnObject::compress() const +ColumnPtr ColumnObject::compress(bool force_compression) const { std::unordered_map compressed_typed_paths; compressed_typed_paths.reserve(typed_paths.size()); size_t byte_size = 0; for (const auto & [path, column] : typed_paths) { - auto compressed_column = column->compress(); + auto compressed_column = column->compress(force_compression); byte_size += compressed_column->byteSize(); compressed_typed_paths[path] = std::move(compressed_column); } @@ -1240,12 +1240,12 @@ ColumnPtr ColumnObject::compress() const compressed_dynamic_paths.reserve(dynamic_paths_ptrs.size()); for (const auto & [path, column] : dynamic_paths_ptrs) { - auto compressed_column = column->compress(); + auto compressed_column = column->compress(force_compression); byte_size += compressed_column->byteSize(); compressed_dynamic_paths[path] = std::move(compressed_column); } - auto compressed_shared_data = shared_data->compress(); + auto compressed_shared_data = shared_data->compress(force_compression); byte_size += compressed_shared_data->byteSize(); auto decompress = diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 7b8a381d571..3160b66cd20 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -171,7 +171,7 @@ public: bool structureEquals(const IColumn & rhs) const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; void finalize() override; bool isFinalized() const override; diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index a0e47e65fc6..b7d82ed8a09 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -774,10 +774,10 @@ UInt64 ColumnSparse::getNumberOfDefaultRows() const return _size - offsets->size(); } -ColumnPtr ColumnSparse::compress() const +ColumnPtr ColumnSparse::compress(bool force_compression) const { - auto values_compressed = values->compress(); - auto offsets_compressed = offsets->compress(); + auto values_compressed = values->compress(force_compression); + auto offsets_compressed = offsets->compress(force_compression); size_t byte_size = values_compressed->byteSize() + offsets_compressed->byteSize(); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 619dce63c1e..f95752cd546 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -147,7 +147,7 @@ public: double getRatioOfDefaultRows(double sample_ratio) const override; UInt64 getNumberOfDefaultRows() const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; ColumnCheckpointPtr getCheckpoint() const override; void updateCheckpoint(ColumnCheckpoint & checkpoint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 269c20397b4..0ed4f5f432d 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -627,7 +627,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const get(max_idx, max); } -ColumnPtr ColumnString::compress() const +ColumnPtr ColumnString::compress(bool force_compression) const { const size_t source_chars_size = chars.size(); const size_t source_offsets_elements = offsets.size(); @@ -637,13 +637,13 @@ ColumnPtr ColumnString::compress() const if (source_chars_size < 4096) /// A wild guess. return ColumnCompressed::wrap(this->getPtr()); - auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, false); + auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, force_compression); /// Return original column if not compressible. if (!chars_compressed) return ColumnCompressed::wrap(this->getPtr()); - auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true); + auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, /*force_compression=*/true); const size_t chars_compressed_size = chars_compressed->size(); const size_t offsets_compressed_size = offsets_compressed->size(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c2371412437..b2e340be61b 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -284,7 +284,7 @@ public: ColumnPtr replicate(const Offsets & replicate_offsets) const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; void reserve(size_t n) override; size_t capacity() const override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 28e5f03cc3c..9bb377f56ae 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -796,7 +796,7 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c } -ColumnPtr ColumnTuple::compress() const +ColumnPtr ColumnTuple::compress(bool force_compression) const { if (columns.empty()) { @@ -812,7 +812,7 @@ ColumnPtr ColumnTuple::compress() const compressed.reserve(columns.size()); for (const auto & column : columns) { - auto compressed_column = column->compress(); + auto compressed_column = column->compress(force_compression); byte_size += compressed_column->byteSize(); compressed.emplace_back(std::move(compressed_column)); } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index d5eee911edc..b8b3697b84d 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -125,7 +125,7 @@ public: void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override; bool structureEquals(const IColumn & rhs) const override; bool isCollationSupported() const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; void finalize() override; bool isFinalized() const override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 2fa59b8e33c..38d3bac3c10 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1426,16 +1426,16 @@ bool ColumnVariant::dynamicStructureEquals(const IColumn & rhs) const return true; } -ColumnPtr ColumnVariant::compress() const +ColumnPtr ColumnVariant::compress(bool force_compression) const { - ColumnPtr local_discriminators_compressed = local_discriminators->compress(); - ColumnPtr offsets_compressed = offsets->compress(); + ColumnPtr local_discriminators_compressed = local_discriminators->compress(force_compression); + ColumnPtr offsets_compressed = offsets->compress(force_compression); size_t byte_size = local_discriminators_compressed->byteSize() + offsets_compressed->byteSize(); Columns compressed; compressed.reserve(variants.size()); for (const auto & variant : variants) { - auto compressed_variant = variant->compress(); + auto compressed_variant = variant->compress(force_compression); byte_size += compressed_variant->byteSize(); compressed.emplace_back(std::move(compressed_variant)); } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index a68a961169c..c7e37517004 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -254,7 +254,7 @@ public: void forEachSubcolumn(MutableColumnCallback callback) override; void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override; bool structureEquals(const IColumn & rhs) const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; double getRatioOfDefaultRows(double sample_ratio) const override; UInt64 getNumberOfDefaultRows() const override; void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 3c7727f37c4..62f6c23c4f8 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -951,7 +951,7 @@ void ColumnVector::getExtremes(Field & min, Field & max) const } template -ColumnPtr ColumnVector::compress() const +ColumnPtr ColumnVector::compress(bool force_compression) const { const size_t data_size = data.size(); const size_t source_size = data_size * sizeof(T); @@ -960,7 +960,7 @@ ColumnPtr ColumnVector::compress() const if (source_size < 4096) /// A wild guess. return ColumnCompressed::wrap(this->getPtr()); - auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false); + auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, force_compression); if (!compressed) return ColumnCompressed::wrap(this->getPtr()); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 1387cca1ece..22b064ae053 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -286,7 +286,7 @@ public: ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override; - ColumnPtr compress() const override; + ColumnPtr compress(bool force_compression) const override; /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements. void applyZeroMap(const IColumn::Filter & filt, bool inverted = false); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 9d1b42d2bc1..e2099ac34b9 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -601,7 +601,8 @@ public: /// Compress column in memory to some representation that allows to decompress it back. /// Return itself if compression is not applicable for this column type. - [[nodiscard]] virtual Ptr compress() const + /// The flag `force_compression` indicates that compression should be performed even if it's not efficient (if only compression factor < 1). + [[nodiscard]] virtual Ptr compress([[maybe_unused]] bool force_compression) const { /// No compression by default. return getPtr(); diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 02176a6b77a..0efb4596dcd 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -608,7 +608,7 @@ Block Block::compress() const size_t num_columns = data.size(); Columns new_columns(num_columns); for (size_t i = 0; i < num_columns; ++i) - new_columns[i] = data[i].column->compress(); + new_columns[i] = data[i].column->compress(/*force_compression=*/false); return cloneWithColumns(new_columns); } diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 7dbee567c5b..de3d720fc35 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -469,7 +469,7 @@ void QueryCache::Writer::finalizeWrite() Columns compressed_columns; for (const auto & column : columns) { - auto compressed_column = column->compress(); + auto compressed_column = column->compress(/*force_compression=*/false); compressed_columns.push_back(compressed_column); } Chunk compressed_chunk(compressed_columns, chunk.getNumRows()); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index d798e1b4fb5..55574e7b1db 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -91,8 +91,7 @@ public: { Block compressed_block; for (const auto & elem : block) - compressed_block.insert({ elem.column->compress(), elem.type, elem.name }); - + compressed_block.insert({elem.column->compress(/*force_compression=*/true), elem.type, elem.name}); new_blocks.push_back(std::move(compressed_block)); } else @@ -259,7 +258,7 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context { if ((*memory_settings)[MemorySetting::compress]) for (auto & elem : block) - elem.column = elem.column->compress(); + elem.column = elem.column->compress(/*force_compression=*/true); out.push_back(block); } @@ -574,7 +573,7 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat { Block compressed_block; for (const auto & elem : block) - compressed_block.insert({ elem.column->compress(), elem.type, elem.name }); + compressed_block.insert({elem.column->compress(/*force_compression=*/true), elem.type, elem.name}); new_blocks.push_back(std::move(compressed_block)); }