mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-12 01:12:12 +00:00
ISSUES-1885 use movemask & popcnt
This commit is contained in:
parent
6033096fd1
commit
eeae014a42
@ -7,10 +7,6 @@
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -58,40 +54,22 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
|
||||
size_t res = 0;
|
||||
const auto end = data + size;
|
||||
|
||||
#if __SSE2__ && __SSE4_1__
|
||||
#if __SSE2__
|
||||
const auto bytes_sse = sizeof(__m128i);
|
||||
const auto src_end_sse = (data + size) - (size % bytes_sse);
|
||||
|
||||
const auto one_sse = _mm_set1_epi8(1);
|
||||
const auto zero_sse = _mm_setzero_si128();
|
||||
|
||||
const auto align_sse = _mm_set1_epi8(0x40);
|
||||
const auto upper_bound = _mm_set1_epi8(0xBF);
|
||||
|
||||
for (; data < src_end_sse;)
|
||||
for (; data < src_end_sse; data += bytes_sse)
|
||||
{
|
||||
auto sse_res = _mm_set1_epi8(0);
|
||||
const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data));
|
||||
|
||||
for (int i = 0, limit = 0xFF; i < limit && data < src_end_sse; ++i, data += bytes_sse)
|
||||
{
|
||||
const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data));
|
||||
///Align to zero for the solve two case
|
||||
const auto align_res = _mm_adds_epu8(chars, align_sse);
|
||||
const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res);
|
||||
|
||||
///Align to zero for the solve two case
|
||||
const auto align_res = _mm_adds_epu8(chars, align_sse);
|
||||
const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res);
|
||||
///Because _mm_cmpeq_epi8 return 0xFF if true
|
||||
const auto choose_res = _mm_blendv_epi8(zero_sse, one_sse, less_than_and_equals);
|
||||
|
||||
sse_res = _mm_adds_epu8(sse_res, choose_res);
|
||||
}
|
||||
|
||||
UInt16 mem_res[8] = {0};
|
||||
auto horizontal_add = _mm_sad_epu8(sse_res, zero_sse);
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(mem_res), horizontal_add);
|
||||
|
||||
/// Hack,Because only bytes MSB to LSB
|
||||
res += mem_res[0];
|
||||
res += mem_res[4];
|
||||
res += __builtin_popcount(_mm_movemask_epi8(less_than_and_equals));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user