dbms: finally fix out-of-bounds access in SSE lower/upper UTF8[#METR-14764]

This commit is contained in:
Andrey Mironov 2015-06-03 19:08:32 +03:00
parent b176f53f4a
commit 1037433f3d

View File

@ -342,7 +342,6 @@ private:
const auto v_zero = _mm_setzero_si128(); const auto v_zero = _mm_setzero_si128();
const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1);
const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1);
// const auto v_not_case_range = _mm_set1_epi16((not_case_upper_bound << 8) | not_case_lower_bound);
const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask);
while (src < src_end_sse) while (src < src_end_sse)
@ -360,10 +359,6 @@ private:
_mm_cmplt_epi8(chars, v_not_case_upper_bound)); _mm_cmplt_epi8(chars, v_not_case_upper_bound));
const auto mask_is_not_case = _mm_movemask_epi8(is_not_case); const auto mask_is_not_case = _mm_movemask_epi8(is_not_case);
/// check for case
// const auto is_case_result = _mm_cmpestra(v_not_case_range, 2, chars, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES);
// if (is_case_result == 0)
/// everything in correct case ASCII /// everything in correct case ASCII
if (mask_is_not_case == 0) if (mask_is_not_case == 0)
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars); _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars);
@ -385,9 +380,9 @@ private:
else else
{ {
/// UTF-8 /// UTF-8
const auto end = src + bytes_sse; const auto expected_end = src + bytes_sse;
while (src < end) while (src < expected_end)
{ {
if (src[0] <= ascii_upper_bound) if (src[0] <= ascii_upper_bound)
{ {
@ -434,22 +429,20 @@ private:
// res_pos += 3; // res_pos += 3;
// } // }
else else
{
if (const auto chars = utf8.convert(to_case(utf8.convert(src)), dst, src_end - src)) if (const auto chars = utf8.convert(to_case(utf8.convert(src)), dst, src_end - src))
{ src += chars, dst += chars;
src += chars;
dst += chars;
}
else else
{ ++src, ++dst;
++src;
++dst;
}
}
} }
const auto diff = src - end; const auto diff = src - expected_end;
src_end_sse += diff; if (diff != 0)
{
if (src_end_sse + diff < src_end)
src_end_sse += diff;
else
src_end_sse -= bytes_sse - diff;
}
} }
} }