ISSUES-1885 use movemask & popcnt

This commit is contained in:
zhang2014 2018-02-14 08:11:12 +08:00
parent 6033096fd1
commit eeae014a42

View File

@ -7,10 +7,6 @@
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
#if __SSE4_1__
#include <smmintrin.h>
#endif
namespace DB namespace DB
{ {
@ -58,40 +54,22 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
size_t res = 0; size_t res = 0;
const auto end = data + size; const auto end = data + size;
#if __SSE2__ && __SSE4_1__ #if __SSE2__
const auto bytes_sse = sizeof(__m128i); const auto bytes_sse = sizeof(__m128i);
const auto src_end_sse = (data + size) - (size % bytes_sse); const auto src_end_sse = (data + size) - (size % bytes_sse);
const auto one_sse = _mm_set1_epi8(1);
const auto zero_sse = _mm_setzero_si128();
const auto align_sse = _mm_set1_epi8(0x40); const auto align_sse = _mm_set1_epi8(0x40);
const auto upper_bound = _mm_set1_epi8(0xBF); const auto upper_bound = _mm_set1_epi8(0xBF);
for (; data < src_end_sse;) for (; data < src_end_sse; data += bytes_sse)
{ {
auto sse_res = _mm_set1_epi8(0); const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data));
for (int i = 0, limit = 0xFF; i < limit && data < src_end_sse; ++i, data += bytes_sse) ///Align to zero for the solve two case
{ const auto align_res = _mm_adds_epu8(chars, align_sse);
const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data)); const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res);
///Align to zero for the solve two case res += __builtin_popcount(_mm_movemask_epi8(less_than_and_equals));
const auto align_res = _mm_adds_epu8(chars, align_sse);
const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res);
///Because _mm_cmpeq_epi8 return 0xFF if true
const auto choose_res = _mm_blendv_epi8(zero_sse, one_sse, less_than_and_equals);
sse_res = _mm_adds_epu8(sse_res, choose_res);
}
UInt16 mem_res[8] = {0};
auto horizontal_add = _mm_sad_epu8(sse_res, zero_sse);
_mm_store_si128(reinterpret_cast<__m128i *>(mem_res), horizontal_add);
/// Hack,Because only bytes MSB to LSB
res += mem_res[0];
res += mem_res[4];
} }
#endif #endif