mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Updated LZ4 performance testing tool #1890
This commit is contained in:
parent
0baa62b0da
commit
e3d5a2860e
@ -71,7 +71,7 @@ struct PerformanceStatistics
|
|||||||
{
|
{
|
||||||
++count;
|
++count;
|
||||||
|
|
||||||
if (count > PerformanceStatistics::NUM_INVOCATIONS_TO_THROW_OFF)
|
if (count > NUM_INVOCATIONS_TO_THROW_OFF)
|
||||||
sum += seconds / bytes;
|
sum += seconds / bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,6 +93,12 @@ struct PerformanceStatistics
|
|||||||
/// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account.
|
/// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account.
|
||||||
static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2;
|
static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2;
|
||||||
|
|
||||||
|
/// How to select method to run.
|
||||||
|
/// -1 - automatically, based on statistics (default);
|
||||||
|
/// 0..3 - always choose specified method (for performance testing);
|
||||||
|
/// -2 - choose methods in round robin fashion (for performance testing).
|
||||||
|
ssize_t choose_method = -1;
|
||||||
|
|
||||||
Element data[NUM_ELEMENTS];
|
Element data[NUM_ELEMENTS];
|
||||||
|
|
||||||
pcg64 rng;
|
pcg64 rng;
|
||||||
@ -101,12 +107,22 @@ struct PerformanceStatistics
|
|||||||
/// Sample random values from estimated normal distributions and choose the minimal.
|
/// Sample random values from estimated normal distributions and choose the minimal.
|
||||||
size_t select()
|
size_t select()
|
||||||
{
|
{
|
||||||
double samples[NUM_ELEMENTS];
|
if (choose_method < 0)
|
||||||
for (size_t i = 0; i < NUM_ELEMENTS; ++i)
|
{
|
||||||
samples[i] = data[i].sample(rng);
|
double samples[NUM_ELEMENTS];
|
||||||
|
for (size_t i = 0; i < NUM_ELEMENTS; ++i)
|
||||||
|
samples[i] = choose_method == -1
|
||||||
|
? data[i].sample(rng)
|
||||||
|
: data[i].adjustedCount();
|
||||||
|
|
||||||
return std::min_element(samples, samples + NUM_ELEMENTS) - samples;
|
return std::min_element(samples, samples + NUM_ELEMENTS) - samples;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return choose_method;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PerformanceStatistics() {}
|
||||||
|
PerformanceStatistics(ssize_t choose_method) : choose_method(choose_method) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,6 +42,11 @@ protected:
|
|||||||
/// Points to memory, holding compressed block.
|
/// Points to memory, holding compressed block.
|
||||||
char * compressed_buffer = nullptr;
|
char * compressed_buffer = nullptr;
|
||||||
|
|
||||||
|
ssize_t variant;
|
||||||
|
|
||||||
|
/// Variant for reference implementation of LZ4.
|
||||||
|
static constexpr ssize_t LZ4_REFERENCE = -3;
|
||||||
|
|
||||||
LZ4::StreamStatistics stream_stat;
|
LZ4::StreamStatistics stream_stat;
|
||||||
LZ4::PerformanceStatistics perf_stat;
|
LZ4::PerformanceStatistics perf_stat;
|
||||||
|
|
||||||
@ -83,7 +88,7 @@ protected:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
own_compressed_buffer.resize(size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
|
own_compressed_buffer.resize(size_compressed + variant == LZ4_REFERENCE ? 0 : LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
|
||||||
compressed_buffer = &own_compressed_buffer[0];
|
compressed_buffer = &own_compressed_buffer[0];
|
||||||
compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
|
compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
|
||||||
}
|
}
|
||||||
@ -98,7 +103,14 @@ protected:
|
|||||||
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
|
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
|
||||||
{
|
{
|
||||||
//LZ4::statistics(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed, stat);
|
//LZ4::statistics(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed, stat);
|
||||||
LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat);
|
|
||||||
|
if (variant == LZ4_REFERENCE)
|
||||||
|
{
|
||||||
|
if (LZ4_decompress_fast(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed) < 0)
|
||||||
|
throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||||
@ -106,8 +118,8 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
/// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'.
|
||||||
FasterCompressedReadBufferBase(ReadBuffer * in = nullptr)
|
FasterCompressedReadBufferBase(ReadBuffer * in, ssize_t variant)
|
||||||
: compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE)
|
: compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE), variant(variant), perf_stat(variant)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,8 +150,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FasterCompressedReadBuffer(ReadBuffer & in_)
|
FasterCompressedReadBuffer(ReadBuffer & in_, ssize_t method)
|
||||||
: FasterCompressedReadBufferBase(&in_), BufferWithOwnMemory<ReadBuffer>(0)
|
: FasterCompressedReadBufferBase(&in_, method), BufferWithOwnMemory<ReadBuffer>(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -147,21 +159,28 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int, char **)
|
int main(int argc, char ** argv)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
using namespace DB;
|
using namespace DB;
|
||||||
|
|
||||||
|
/** -3 - use reference implementation of LZ4
|
||||||
|
* -2 - run all algorithms in round robin fashion
|
||||||
|
* -1 - automatically detect best algorithm based on statistics
|
||||||
|
* 0..3 - run specified algorithm
|
||||||
|
*/
|
||||||
|
ssize_t variant = argc < 2 ? -1 : parse<ssize_t>(argv[1]);
|
||||||
|
|
||||||
ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
||||||
FasterCompressedReadBuffer decompressing_in(in);
|
FasterCompressedReadBuffer decompressing_in(in, variant);
|
||||||
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
|
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
|
||||||
HashingWriteBuffer hashing_out(out);
|
// HashingWriteBuffer hashing_out(out);
|
||||||
|
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
copyData(decompressing_in, hashing_out);
|
copyData(decompressing_in, /*hashing_*/out);
|
||||||
watch.stop();
|
watch.stop();
|
||||||
|
|
||||||
auto hash = hashing_out.getHash();
|
// auto hash = hashing_out.getHash();
|
||||||
|
|
||||||
double seconds = watch.elapsedSeconds();
|
double seconds = watch.elapsedSeconds();
|
||||||
std::cerr << std::fixed << std::setprecision(3)
|
std::cerr << std::fixed << std::setprecision(3)
|
||||||
@ -171,7 +190,7 @@ try
|
|||||||
<< ", ratio: " << static_cast<double>(decompressing_in.count()) / in.count()
|
<< ", ratio: " << static_cast<double>(decompressing_in.count()) / in.count()
|
||||||
<< ", " << formatReadableSizeWithBinarySuffix(in.count() / seconds) << "/sec. compressed"
|
<< ", " << formatReadableSizeWithBinarySuffix(in.count() / seconds) << "/sec. compressed"
|
||||||
<< ", " << formatReadableSizeWithBinarySuffix(decompressing_in.count() / seconds) << "/sec. decompressed"
|
<< ", " << formatReadableSizeWithBinarySuffix(decompressing_in.count() / seconds) << "/sec. decompressed"
|
||||||
<< ", checksum: " << hash.first << "_" << hash.second
|
// << ", checksum: " << hash.first << "_" << hash.second
|
||||||
<< "\n";
|
<< "\n";
|
||||||
|
|
||||||
// decompressing_in.getStatistics().print();
|
// decompressing_in.getStatistics().print();
|
||||||
|
Loading…
Reference in New Issue
Block a user