mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 12:22:12 +00:00
fix
This commit is contained in:
parent
f2c328aa7a
commit
82da99cfd3
@ -70,7 +70,7 @@ public:
|
||||
|
||||
/// Helper methods for compression.
|
||||
|
||||
/// If data is not worth to be compressed and not 'always_compress' - returns nullptr.
|
||||
/// If data is not worth to be compressed and not `force_compression` - returns nullptr.
|
||||
/// Note: shared_ptr is to allow to be captured by std::function.
|
||||
static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size, bool force_compression);
|
||||
|
||||
|
@ -635,26 +635,39 @@ ColumnPtr ColumnString::compress(bool force_compression) const
|
||||
const size_t source_offsets_size = source_offsets_elements * sizeof(Offset);
|
||||
|
||||
/// Don't compress small blocks.
|
||||
if (source_chars_size < 4096) /// A wild guess.
|
||||
if (source_chars_size < min_size_to_compress)
|
||||
{
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
}
|
||||
|
||||
auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, force_compression);
|
||||
|
||||
/// Return original column if not compressible.
|
||||
if (!chars_compressed)
|
||||
{
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
}
|
||||
|
||||
auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, /*force_compression=*/true);
|
||||
const bool offsets_were_compressed = !!offsets_compressed;
|
||||
|
||||
/// Offsets are not compressible. Use the source data.
|
||||
if (!offsets_compressed)
|
||||
{
|
||||
offsets_compressed = std::make_shared<Memory<>>(source_offsets_size);
|
||||
memcpy(offsets_compressed->data(), offsets.data(), source_offsets_size);
|
||||
}
|
||||
|
||||
const size_t chars_compressed_size = chars_compressed->size();
|
||||
const size_t offsets_compressed_size = offsets_compressed->size();
|
||||
return ColumnCompressed::create(source_offsets_elements, chars_compressed_size + offsets_compressed_size,
|
||||
[
|
||||
my_chars_compressed = std::move(chars_compressed),
|
||||
my_offsets_compressed = std::move(offsets_compressed),
|
||||
source_chars_size,
|
||||
source_offsets_elements
|
||||
]
|
||||
return ColumnCompressed::create(
|
||||
source_offsets_elements,
|
||||
chars_compressed_size + offsets_compressed_size,
|
||||
[my_chars_compressed = std::move(chars_compressed),
|
||||
my_offsets_compressed = std::move(offsets_compressed),
|
||||
source_chars_size,
|
||||
source_offsets_elements,
|
||||
offsets_were_compressed]
|
||||
{
|
||||
auto res = ColumnString::create();
|
||||
|
||||
@ -664,8 +677,18 @@ ColumnPtr ColumnString::compress(bool force_compression) const
|
||||
ColumnCompressed::decompressBuffer(
|
||||
my_chars_compressed->data(), res->getChars().data(), my_chars_compressed->size(), source_chars_size);
|
||||
|
||||
ColumnCompressed::decompressBuffer(
|
||||
my_offsets_compressed->data(), res->getOffsets().data(), my_offsets_compressed->size(), source_offsets_elements * sizeof(Offset));
|
||||
if (offsets_were_compressed)
|
||||
{
|
||||
ColumnCompressed::decompressBuffer(
|
||||
my_offsets_compressed->data(),
|
||||
res->getOffsets().data(),
|
||||
my_offsets_compressed->size(),
|
||||
source_offsets_elements * sizeof(Offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(res->getOffsets().data(), my_offsets_compressed->data(), my_offsets_compressed->size());
|
||||
}
|
||||
|
||||
return res;
|
||||
});
|
||||
|
@ -29,6 +29,8 @@ public:
|
||||
using Char = UInt8;
|
||||
using Chars = PaddedPODArray<UInt8>;
|
||||
|
||||
static constexpr size_t min_size_to_compress = 4096;
|
||||
|
||||
private:
|
||||
friend class COWHelper<IColumnHelper<ColumnString>, ColumnString>;
|
||||
|
||||
|
71
src/Columns/tests/gtest_column_string.cpp
Normal file
71
src/Columns/tests/gtest_column_string.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
static pcg64 rng(randomSeed());
|
||||
|
||||
constexpr size_t bytes_per_string = sizeof(size_t) + 1;
|
||||
/// Column should have enough bytes to be compressed
|
||||
constexpr size_t column_size = ColumnString::min_size_to_compress / bytes_per_string + 42;
|
||||
|
||||
TEST(ColumnString, Incompressible)
|
||||
{
|
||||
auto col = ColumnString::create();
|
||||
auto & chars = col->getChars();
|
||||
auto & offsets = col->getOffsets();
|
||||
chars.resize(column_size * bytes_per_string);
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
auto value = rng();
|
||||
memcpy(&chars[i * bytes_per_string], &value, sizeof(size_t));
|
||||
chars[i * bytes_per_string + sizeof(size_t)] = '\0';
|
||||
offsets.push_back((i + 1) * bytes_per_string);
|
||||
}
|
||||
|
||||
auto compressed = col->compress(true);
|
||||
auto decompressed = compressed->decompress();
|
||||
ASSERT_EQ(decompressed.get(), col.get());
|
||||
}
|
||||
|
||||
TEST(ColumnString, CompressibleCharsAndIncompressibleOffsets)
|
||||
{
|
||||
auto col = ColumnString::create();
|
||||
auto & chars = col->getChars();
|
||||
auto & offsets = col->getOffsets();
|
||||
chars.resize(column_size * bytes_per_string);
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
static const size_t value = 42;
|
||||
memcpy(&chars[i * bytes_per_string], &value, sizeof(size_t));
|
||||
chars[i * bytes_per_string + sizeof(size_t)] = '\0';
|
||||
}
|
||||
offsets.push_back(chars.size());
|
||||
|
||||
auto compressed = col->compress(true);
|
||||
auto decompressed = compressed->decompress();
|
||||
ASSERT_NE(decompressed.get(), col.get());
|
||||
}
|
||||
|
||||
TEST(ColumnString, CompressibleCharsAndCompressibleOffsets)
|
||||
{
|
||||
auto col = ColumnString::create();
|
||||
auto & chars = col->getChars();
|
||||
auto & offsets = col->getOffsets();
|
||||
chars.resize(column_size * bytes_per_string);
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
{
|
||||
static const size_t value = 42;
|
||||
memcpy(&chars[i * bytes_per_string], &value, sizeof(size_t));
|
||||
chars[i * bytes_per_string + sizeof(size_t)] = '\0';
|
||||
offsets.push_back((i + 1) * bytes_per_string);
|
||||
}
|
||||
|
||||
auto compressed = col->compress(true);
|
||||
auto decompressed = compressed->decompress();
|
||||
ASSERT_NE(decompressed.get(), col.get());
|
||||
}
|
Loading…
Reference in New Issue
Block a user