mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-13 01:41:59 +00:00
ISSUES-1885 use movemask & popcnt
This commit is contained in:
parent
6033096fd1
commit
eeae014a42
@ -7,10 +7,6 @@
|
|||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __SSE4_1__
|
|
||||||
#include <smmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -58,40 +54,22 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
|
|||||||
size_t res = 0;
|
size_t res = 0;
|
||||||
const auto end = data + size;
|
const auto end = data + size;
|
||||||
|
|
||||||
#if __SSE2__ && __SSE4_1__
|
#if __SSE2__
|
||||||
const auto bytes_sse = sizeof(__m128i);
|
const auto bytes_sse = sizeof(__m128i);
|
||||||
const auto src_end_sse = (data + size) - (size % bytes_sse);
|
const auto src_end_sse = (data + size) - (size % bytes_sse);
|
||||||
|
|
||||||
const auto one_sse = _mm_set1_epi8(1);
|
|
||||||
const auto zero_sse = _mm_setzero_si128();
|
|
||||||
|
|
||||||
const auto align_sse = _mm_set1_epi8(0x40);
|
const auto align_sse = _mm_set1_epi8(0x40);
|
||||||
const auto upper_bound = _mm_set1_epi8(0xBF);
|
const auto upper_bound = _mm_set1_epi8(0xBF);
|
||||||
|
|
||||||
for (; data < src_end_sse;)
|
for (; data < src_end_sse; data += bytes_sse)
|
||||||
{
|
{
|
||||||
auto sse_res = _mm_set1_epi8(0);
|
const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data));
|
||||||
|
|
||||||
for (int i = 0, limit = 0xFF; i < limit && data < src_end_sse; ++i, data += bytes_sse)
|
///Align to zero for the solve two case
|
||||||
{
|
const auto align_res = _mm_adds_epu8(chars, align_sse);
|
||||||
const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data));
|
const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res);
|
||||||
|
|
||||||
///Align to zero for the solve two case
|
res += __builtin_popcount(_mm_movemask_epi8(less_than_and_equals));
|
||||||
const auto align_res = _mm_adds_epu8(chars, align_sse);
|
|
||||||
const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res);
|
|
||||||
///Because _mm_cmpeq_epi8 return 0xFF if true
|
|
||||||
const auto choose_res = _mm_blendv_epi8(zero_sse, one_sse, less_than_and_equals);
|
|
||||||
|
|
||||||
sse_res = _mm_adds_epu8(sse_res, choose_res);
|
|
||||||
}
|
|
||||||
|
|
||||||
UInt16 mem_res[8] = {0};
|
|
||||||
auto horizontal_add = _mm_sad_epu8(sse_res, zero_sse);
|
|
||||||
_mm_store_si128(reinterpret_cast<__m128i *>(mem_res), horizontal_add);
|
|
||||||
|
|
||||||
/// Hack,Because only bytes MSB to LSB
|
|
||||||
res += mem_res[0];
|
|
||||||
res += mem_res[4];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user