From 0f20952f2ce98329ed64c567484272905bcfc6d9 Mon Sep 17 00:00:00 2001
From: zhang2014 <coswde@gmail.com>
Date: Sat, 10 Feb 2018 17:21:54 +0800
Subject: [PATCH] ISSUES-1885 UTF8 countCodePoints use simd

---
 dbms/src/Common/UTF8Helpers.h | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)
diff --git a/dbms/src/Common/UTF8Helpers.h b/dbms/src/Common/UTF8Helpers.h
index 1ce31426e85..ba1685c89f3 100644
--- a/dbms/src/Common/UTF8Helpers.h
+++ b/dbms/src/Common/UTF8Helpers.h
@@ -3,6 +3,9 @@
 #include <Core/Types.h>
 #include <Common/BitHelpers.h>
 
+#if __SSE2__
+#include <emmintrin.h>
+#endif
 
 namespace DB
 {
@@ -49,9 +52,37 @@ inline size_t seqLength(const UInt8 first_octet)
 inline size_t countCodePoints(const UInt8 * data, size_t size)
 {
     size_t res = 0;
+    const auto end = data + size;
 
-    /// TODO SIMD implementation looks quite simple.
-    for (auto end = data + size; data < end; ++data) /// Skip UTF-8 continuation bytes.
+#if __SSE2__
+    const auto bytes_sse = sizeof(__m128i);
+    const auto src_end_sse = (data + size) - (size % bytes_sse);
+
+    const auto upper_bound = _mm_set1_epi8(0x7F + 1);
+    const auto lower_bound = _mm_set1_epi8(0xC0 - 1);
+
+    for (; data < src_end_sse;)
+    {
+        UInt8 mem_res[16] = {0};
+        auto sse_res = _mm_set1_epi8(0);
+
+        for (int i = 0; i < 0XFF && data < src_end_sse; ++i, data += bytes_sse)
+        {
+            const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data));
+            sse_res = _mm_add_epi8(sse_res,
+                                   _mm_or_si128(_mm_cmplt_epi8(chars, upper_bound),
+                                                _mm_cmpgt_epi8(chars, lower_bound)));
+        }
+
+        _mm_store_si128(reinterpret_cast<__m128i *>(mem_res), sse_res);
+
+        for (auto count : mem_res)
+            res += count;
+    }
+
+#endif
+
+    for (; data < end; ++data) /// Skip UTF-8 continuation bytes.
         res += (*data <= 0x7F || *data >= 0xC0);
 
     return res;