mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
last commit
This commit is contained in:
parent
c580ae0853
commit
4f4fa79ce7
46
src/Common/memchrSmall.h
Normal file
46
src/Common/memchrSmall.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <base/types.h>
|
||||
|
||||
#if defined(__SSE2__)
|
||||
# include <emmintrin.h>
|
||||
|
||||
namespace detail
|
||||
{
|
||||
inline const char * memchrSmallAllowOverflow15Impl(const char * s, int c, ssize_t n)
|
||||
{
|
||||
__msan_unpoison_overflow_15(s, n);
|
||||
|
||||
__m128i c16 = _mm_set1_epi8(c);
|
||||
while (n > 0)
|
||||
{
|
||||
__m128i block = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
|
||||
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(block, c16));
|
||||
if (mask)
|
||||
{
|
||||
auto offset = std::countl_zero(mask);
|
||||
return offset < n ? s + offset : nullptr;
|
||||
}
|
||||
|
||||
s += 16;
|
||||
n -= 16;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// Works under assumption, that it's possible to read up to 15 excessive bytes after end of 's' region
|
||||
inline const void * memchrSmallAllowOverflow15(const void * s, int c, size_t n)
|
||||
{
|
||||
return detail::memchrSmallAllowOverflow15Impl(reinterpret_cast<const char *>(s), c, n);
|
||||
}
|
||||
|
||||
#else
|
||||
inline const void * memchrSmallAllowOverflow15(const void * s, int c, size_t n)
|
||||
{
|
||||
return memchr(s, c, n);
|
||||
}
|
||||
#endif
|
@ -4,6 +4,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/memchrSmall.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -93,7 +94,7 @@ public:
|
||||
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!pos)
|
||||
if (!pos) [[unlikely]]
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
@ -114,7 +115,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
|
||||
pos = reinterpret_cast<Pos>(memchrSmallAllowOverflow15(pos, separator, end - pos));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
|
@ -1,4 +1,5 @@
|
||||
<test>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000)</query>
|
||||
<query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000)</query>
|
||||
</test>
|
||||
|
Loading…
Reference in New Issue
Block a user