mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Accurate tail handling
This commit is contained in:
parent
304c3fcc9b
commit
3519598ae9
@ -252,13 +252,13 @@ struct ValidUTF8Impl
|
||||
while (len >= 16)
|
||||
checkPacked(_mm_loadu_si128(reinterpret_cast<const __m128i *>(data)));
|
||||
|
||||
if (len)
|
||||
{
|
||||
alignas(16) char buf[32];
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(buf), _mm_loadu_si128(reinterpret_cast<const __m128i *>(data)));
|
||||
memset(reinterpret_cast<char *>(&buf) + len, 0, 16);
|
||||
checkPacked(_mm_load_si128(reinterpret_cast<__m128i *>(buf)));
|
||||
}
|
||||
/// 0 <= len <= 15 for now. Reading data from data - 1 because of right padding of 15 and left padding
|
||||
/// Then zero some bytes from the unknown memory and check again.
|
||||
alignas(16) char buf[32];
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(buf), _mm_loadu_si128(reinterpret_cast<const __m128i *>(data - 1)));
|
||||
memset(buf + len + 1, 0, 16);
|
||||
checkPacked(_mm_loadu_si128(reinterpret_cast<__m128i *>(buf + 1)));
|
||||
|
||||
/* Reduce error vector, error_reduced = 0xFFFF if error == 0 */
|
||||
return _mm_movemask_epi8(_mm_cmpeq_epi8(error, _mm_set1_epi8(0))) == 0xFFFF;
|
||||
}
|
||||
|
@ -1178,3 +1178,23 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -33,7 +33,9 @@ select 0 = isValidUTF8('\xc0\x9f') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('\xf5\xff\xff\xff') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('\xed\xa0\x81') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('\xf8\x90\x80\x80\x80') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('12345678901234\xed') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('123456789012345\xed') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('123456789012345\xed123456789012345\xed') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('123456789012345\xf1') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('123456789012345\xc2') from system.numbers limit 10;
|
||||
select 0 = isValidUTF8('\xC2\x7F') from system.numbers limit 10;
|
||||
|
Loading…
Reference in New Issue
Block a user