last commit

This commit is contained in:
taiyang-li 2024-04-10 16:19:34 +08:00
parent c580ae0853
commit 4f4fa79ce7
3 changed files with 50 additions and 2 deletions

46
src/Common/memchrSmall.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <bit>
#include <Common/MemorySanitizer.h>
#include <base/types.h>
#if defined(__SSE2__)
# include <emmintrin.h>
namespace detail
{
inline const char * memchrSmallAllowOverflow15Impl(const char * s, int c, ssize_t n)
{
__msan_unpoison_overflow_15(s, n);
__m128i c16 = _mm_set1_epi8(c);
while (n > 0)
{
__m128i block = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(block, c16));
if (mask)
{
auto offset = std::countl_zero(mask);
return offset < n ? s + offset : nullptr;
}
s += 16;
n -= 16;
}
return nullptr;
}
}
/// Works under assumption, that it's possible to read up to 15 excessive bytes after end of 's' region
inline const void * memchrSmallAllowOverflow15(const void * s, int c, size_t n)
{
return detail::memchrSmallAllowOverflow15Impl(reinterpret_cast<const char *>(s), c, n);
}
#else
inline const void * memchrSmallAllowOverflow15(const void * s, int c, size_t n)
{
return memchr(s, c, n);
}
#endif

View File

@ -4,6 +4,7 @@
#include <Functions/FunctionFactory.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/memchrSmall.h>
namespace DB
@ -93,7 +94,7 @@ public:
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos)
if (!pos) [[unlikely]]
return false;
token_begin = pos;
@ -114,7 +115,7 @@ public:
return false;
}
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
pos = reinterpret_cast<Pos>(memchrSmallAllowOverflow15(pos, separator, end - pos));
if (pos)
{
token_end = pos;

View File

@ -1,4 +1,5 @@
<test>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000)</query>
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000)</query>
</test>