diff --git a/dbms/src/Common/UTF8Helpers.h b/dbms/src/Common/UTF8Helpers.h index 698ff369684..0237b6f036c 100644 --- a/dbms/src/Common/UTF8Helpers.h +++ b/dbms/src/Common/UTF8Helpers.h @@ -7,10 +7,6 @@ #include #endif -#if __SSE4_1__ -#include -#endif - namespace DB { @@ -58,40 +54,22 @@ inline size_t countCodePoints(const UInt8 * data, size_t size) size_t res = 0; const auto end = data + size; -#if __SSE2__ && __SSE4_1__ +#if __SSE2__ const auto bytes_sse = sizeof(__m128i); const auto src_end_sse = (data + size) - (size % bytes_sse); - const auto one_sse = _mm_set1_epi8(1); - const auto zero_sse = _mm_setzero_si128(); - const auto align_sse = _mm_set1_epi8(0x40); const auto upper_bound = _mm_set1_epi8(0xBF); - for (; data < src_end_sse;) + for (; data < src_end_sse; data += bytes_sse) { - auto sse_res = _mm_set1_epi8(0); + const auto chars = _mm_loadu_si128(reinterpret_cast(data)); - for (int i = 0, limit = 0xFF; i < limit && data < src_end_sse; ++i, data += bytes_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data)); + ///Align to zero for the solve two case + const auto align_res = _mm_adds_epu8(chars, align_sse); + const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res); - ///Align to zero for the solve two case - const auto align_res = _mm_adds_epu8(chars, align_sse); - const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res); - ///Because _mm_cmpeq_epi8 return 0xFF if true - const auto choose_res = _mm_blendv_epi8(zero_sse, one_sse, less_than_and_equals); - - sse_res = _mm_adds_epu8(sse_res, choose_res); - } - - UInt16 mem_res[8] = {0}; - auto horizontal_add = _mm_sad_epu8(sse_res, zero_sse); - _mm_store_si128(reinterpret_cast<__m128i *>(mem_res), horizontal_add); - - /// Hack,Because only bytes MSB to LSB - res += mem_res[0]; - res += mem_res[4]; + res += __builtin_popcount(_mm_movemask_epi8(less_than_and_equals)); } #endif