Merge pull request #29881 from vesslanjin/optimize-column-string-filter-performance

This commit is contained in:
Vladimir C 2021-10-09 19:44:15 +03:00 committed by GitHub
commit 8ca3c7be3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 32 deletions

View File

@ -248,31 +248,23 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{
/// Nothing is inserted.
data_pos += chars_per_simd_elements;
}
else if (0xFFFF == mask)
if (0xFFFF == mask)
{
res->chars.insert(data_pos, data_pos + chars_per_simd_elements);
data_pos += chars_per_simd_elements;
}
else
{
size_t res_chars_size = res->chars.size();
for (size_t i = 0; i < SIMD_BYTES; ++i)
while (mask)
{
if (filt_pos[i])
{
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
res_chars_size += n;
}
data_pos += n;
size_t index = __builtin_ctz(mask);
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n);
res_chars_size += n;
mask = mask & (mask - 1);
}
}
data_pos += chars_per_simd_elements;
filt_pos += SIMD_BYTES;
}
#endif

View File

@ -327,19 +327,18 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0 == mask)
{
/// Nothing is inserted.
}
else if (0xFFFF == mask)
if (0xFFFF == mask)
{
res_data.insert(data_pos, data_pos + SIMD_BYTES);
}
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (filt_pos[i])
res_data.push_back(data_pos[i]);
while (mask)
{
size_t index = __builtin_ctz(mask);
res_data.push_back(data_pos[index]);
mask = mask & (mask - 1);
}
}
filt_pos += SIMD_BYTES;

View File

@ -241,11 +241,7 @@ namespace
zero_vec));
mask = ~mask;
if (mask == 0)
{
/// SIMD_BYTES consecutive rows do not pass the filter
}
else if (mask == 0xffff)
if (mask == 0xffff)
{
/// SIMD_BYTES consecutive rows pass the filter
const auto first = offsets_pos == offsets_begin;
@ -262,9 +258,12 @@ namespace
}
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (filt_pos[i])
copy_array(offsets_pos + i);
while (mask)
{
size_t index = __builtin_ctz(mask);
copy_array(offsets_pos + index);
mask = mask & (mask - 1);
}
}
filt_pos += SIMD_BYTES;