Skip empty literals in lz4 decompression (#40142)

This commit is contained in:
Nikita Taranov 2022-09-06 13:58:26 +02:00 committed by GitHub
parent f77809ddbc
commit 7c4f42d014
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 9 deletions

View File

@ -478,11 +478,7 @@ template <> void inline copyOverlap<32, true>(UInt8 * op, const UInt8 *& match,
/// See also https://stackoverflow.com/a/30669632
template <size_t copy_amount, bool use_shuffle>
bool NO_INLINE decompressImpl(
const char * const source,
char * const dest,
size_t source_size,
size_t dest_size)
bool NO_INLINE decompressImpl(const char * const source, char * const dest, size_t source_size, size_t dest_size)
{
const UInt8 * ip = reinterpret_cast<const UInt8 *>(source);
UInt8 * op = reinterpret_cast<UInt8 *>(dest);
@ -515,6 +511,18 @@ bool NO_INLINE decompressImpl(
const unsigned token = *ip++;
length = token >> 4;
UInt8 * copy_end;
size_t real_length;
/// It might be true fairly often for well-compressed columns.
/// ATST it may hurt performance in other cases because this condition is hard to predict (especially if the number of zeros is ~50%).
/// In such cases this `if` will significantly increase number of mispredicted instructions. But seems like it results in a
/// noticeable slowdown only for implementations with `copy_amount` > 8. Probably because they use havier instructions.
if constexpr (copy_amount == 8)
if (length == 0)
goto decompress_match;
if (length == 0x0F)
{
if (unlikely(ip + 1 >= input_end))
@ -524,7 +532,7 @@ bool NO_INLINE decompressImpl(
/// Copy literals.
UInt8 * copy_end = op + length;
copy_end = op + length;
/// input: Hello, world
/// ^-ip
@ -541,7 +549,7 @@ bool NO_INLINE decompressImpl(
return false;
// Due to implementation specifics the copy length is always a multiple of copy_amount
size_t real_length = 0;
real_length = 0;
static_assert(copy_amount == 8 || copy_amount == 16 || copy_amount == 32);
if constexpr (copy_amount == 8)
@ -552,9 +560,9 @@ bool NO_INLINE decompressImpl(
real_length = (((length >> 5) + 1) * 32);
if (unlikely(ip + real_length >= input_end + ADDITIONAL_BYTES_AT_END_OF_BUFFER))
return false;
return false;
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (copy_end == output_end)
return true;
@ -562,6 +570,8 @@ bool NO_INLINE decompressImpl(
ip += length;
op = copy_end;
decompress_match:
if (unlikely(ip + 1 >= input_end))
return false;

22
tests/performance/lz4.xml Normal file
View File

@ -0,0 +1,22 @@
<test>
<create_query>create table t_lz4(a UInt64) engine=MergeTree order by tuple()</create_query>
<create_query>create table t_lz4_norm(a UInt64) engine=MergeTree order by tuple()</create_query>
<create_query>create table t_lz4_uncomp(a UInt32) engine=MergeTree order by a</create_query>
<fill_query>insert into t_lz4 select number % 100 from numbers_mt(5e7) order by rand()</fill_query>
<fill_query>optimize table t_lz4 final</fill_query>
<fill_query>insert into t_lz4_norm select number from numbers_mt(5e7) order by rand()</fill_query>
<fill_query>optimize table t_lz4_norm final</fill_query>
<fill_query>insert into t_lz4_uncomp select number from numbers_mt(5e7)</fill_query>
<fill_query>optimize table t_lz4_uncomp final</fill_query>
<query>select a from t_lz4 format Null</query>
<query>select a from t_lz4_norm format Null</query>
<query>select a from t_lz4_uncomp format Null</query>
<drop_query>drop table t_lz4</drop_query>
<drop_query>drop table t_lz4_norm</drop_query>
<drop_query>drop table t_lz4_uncomp</drop_query>
</test>

View File

@ -0,0 +1,39 @@
<test>
<substitutions>
<substitution>
<name>column</name>
<values>
<value>ClientIP</value>
<value>ClientTimeZone</value>
<value>CookieEnable</value>
<value>CounterClass</value>
<value>CounterID</value>
<value>EventDate</value>
<value>EventTime</value>
<value>GoodEvent</value>
<value>HitColor</value>
<value>JavaEnable</value>
<value>OpenerName</value>
<value>PageCharset</value>
<value>ParamCurrency</value>
<value>ParamPrice</value>
<value>Referer</value>
<value>RefererCategoryID</value>
<value>RefererHash</value>
<value>RegionID</value>
<value>SearchPhrase</value>
<value>SilverlightVersion4</value>
<value>Title</value>
<value>TraficSourceID</value>
<value>URLCategoryID</value>
<value>UserAgent</value>
<value>UserAgentMinor</value>
<value>UserID</value>
<value>WatchID</value>
<value>WindowName</value>
</values>
</substitution>
</substitutions>
<query>select {column} from hits_100m_single format Null</query>
</test>