mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Merge pull request #10363 from ClickHouse/fixed-string-compare-as-zero-padded
Compare with fixed string of different size as if strings are zero padded
This commit is contained in:
commit
587e16e23b
@ -64,6 +64,80 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
|
||||
}
|
||||
|
||||
|
||||
/** Variant when memory regions may have different sizes.
|
||||
* But compare the regions as the smaller one is padded with zero bytes up to the size of the larger.
|
||||
* It's needed to hold that: toFixedString('abc', 5) = 'abc'
|
||||
* for compatibility with SQL standard.
|
||||
*/
|
||||
template <typename Char>
|
||||
inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
|
||||
{
|
||||
size_t min_size = std::min(a_size, b_size);
|
||||
|
||||
for (size_t offset = 0; offset < min_size; offset += 16)
|
||||
{
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(a + offset)),
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset))));
|
||||
mask = ~mask;
|
||||
|
||||
if (mask)
|
||||
{
|
||||
offset += __builtin_ctz(mask);
|
||||
|
||||
if (offset >= min_size)
|
||||
break;
|
||||
|
||||
return detail::cmp(a[offset], b[offset]);
|
||||
}
|
||||
}
|
||||
|
||||
/// The strings are equal up to min_size.
|
||||
/// If the rest of the larger string is zero bytes then the strings are considered equal.
|
||||
|
||||
size_t max_size;
|
||||
const Char * longest;
|
||||
int cmp;
|
||||
|
||||
if (a_size == b_size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else if (a_size > b_size)
|
||||
{
|
||||
max_size = a_size;
|
||||
longest = a;
|
||||
cmp = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
max_size = b_size;
|
||||
longest = b;
|
||||
cmp = -1;
|
||||
}
|
||||
|
||||
const __m128i zero16 = _mm_setzero_si128();
|
||||
|
||||
for (size_t offset = min_size; offset < max_size; offset += 16)
|
||||
{
|
||||
uint16_t mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(longest + offset)),
|
||||
zero16));
|
||||
|
||||
if (mask)
|
||||
{
|
||||
offset += __builtin_ctz(mask);
|
||||
|
||||
if (offset >= max_size)
|
||||
return 0;
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/** Variant when memory regions have same size.
|
||||
* TODO Check if the compiler can optimize previous function when the caller pass identical sizes.
|
||||
*/
|
||||
@ -206,6 +280,46 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
|
||||
return detail::cmp(a_size, b_size);
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
|
||||
{
|
||||
size_t min_size;
|
||||
size_t max_size;
|
||||
const Char * longest;
|
||||
int size_cmp;
|
||||
|
||||
if (a_size == b_size)
|
||||
{
|
||||
min_size = a_size;
|
||||
max_size = a_size;
|
||||
longest = a;
|
||||
size_cmp = 0;
|
||||
}
|
||||
else if (a_size > b_size)
|
||||
{
|
||||
min_size = b_size;
|
||||
max_size = a_size;
|
||||
longest = a;
|
||||
size_cmp = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
min_size = a_size;
|
||||
max_size = b_size;
|
||||
longest = b;
|
||||
size_cmp = -1;
|
||||
}
|
||||
|
||||
if (auto res = memcmp(a, b, min_size))
|
||||
return res;
|
||||
|
||||
for (size_t i = min_size; i < max_size; ++i)
|
||||
if (longest[i] != 0)
|
||||
return size_cmp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
|
||||
{
|
||||
@ -248,3 +362,13 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/** Compare memory regions for equality.
|
||||
* But if the sizes are different, compare the regions as the smaller one is padded with zero bytes up to the size of the larger.
|
||||
*/
|
||||
template <typename Char>
|
||||
inline bool memequalSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
|
||||
{
|
||||
return 0 == memcmpSmallLikeZeroPaddedAllowOverflow15(a, a_size, b, b_size);
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ struct StringComparisonImpl
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
c[i] = Op::apply(memcmpSmallAllowOverflow15(
|
||||
c[i] = Op::apply(memcmpSmallLikeZeroPaddedAllowOverflow15(
|
||||
a_data.data() + prev_a_offset, a_offsets[i] - prev_a_offset - 1,
|
||||
b_data.data() + i * b_n, b_n), 0);
|
||||
|
||||
@ -168,7 +168,7 @@ struct StringComparisonImpl
|
||||
}
|
||||
}
|
||||
|
||||
static void NO_INLINE string_vectorConstant(
|
||||
static void NO_INLINE string_vector_constant(
|
||||
const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets,
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_size,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
@ -239,11 +239,11 @@ struct StringComparisonImpl
|
||||
size_t size = a_data.size() / a_n;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = Op::apply(memcmpSmallAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data() + i * b_n, b_n), 0);
|
||||
c[i] = Op::apply(memcmpSmallLikeZeroPaddedAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data() + i * b_n, b_n), 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void NO_INLINE fixed_string_vectorConstant(
|
||||
static void NO_INLINE fixed_string_vector_constant(
|
||||
const ColumnString::Chars & a_data, ColumnString::Offset a_n,
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_size,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
@ -262,7 +262,7 @@ struct StringComparisonImpl
|
||||
{
|
||||
size_t size = a_data.size();
|
||||
for (size_t i = 0, j = 0; i < size; i += a_n, ++j)
|
||||
c[j] = Op::apply(memcmpSmallAllowOverflow15(a_data.data() + i, a_n, b_data.data(), b_size), 0);
|
||||
c[j] = Op::apply(0, memcmpSmallLikeZeroPaddedAllowOverflow15(a_data.data() + i, a_n, b_data.data(), b_size));
|
||||
}
|
||||
}
|
||||
|
||||
@ -271,7 +271,7 @@ struct StringComparisonImpl
|
||||
const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
{
|
||||
StringComparisonImpl<typename Op::SymmetricOp>::string_vectorConstant(b_data, b_offsets, a_data, a_size, c);
|
||||
StringComparisonImpl<typename Op::SymmetricOp>::string_vector_constant(b_data, b_offsets, a_data, a_size, c);
|
||||
}
|
||||
|
||||
static void constant_fixed_string_vector(
|
||||
@ -279,15 +279,7 @@ struct StringComparisonImpl
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_n,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
{
|
||||
StringComparisonImpl<typename Op::SymmetricOp>::fixed_string_vectorConstant(b_data, b_n, a_data, a_size, c);
|
||||
}
|
||||
|
||||
static void constantConstant(
|
||||
const ColumnString::Chars & a_data, ColumnString::Offset a_size,
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_size,
|
||||
UInt8 & c)
|
||||
{
|
||||
c = Op::apply(memcmpSmallAllowOverflow15(a_data.data(), a_size, b_data.data(), b_size), 0);
|
||||
StringComparisonImpl<typename Op::SymmetricOp>::fixed_string_vector_constant(b_data, b_n, a_data, a_size, c);
|
||||
}
|
||||
};
|
||||
|
||||
@ -331,7 +323,7 @@ struct StringEqualsImpl
|
||||
{
|
||||
auto a_size = a_offsets[i] - prev_a_offset - 1;
|
||||
|
||||
c[i] = positive == memequalSmallAllowOverflow15(
|
||||
c[i] = positive == memequalSmallLikeZeroPaddedAllowOverflow15(
|
||||
a_data.data() + prev_a_offset, a_size,
|
||||
b_data.data() + b_n * i, b_n);
|
||||
|
||||
@ -339,7 +331,7 @@ struct StringEqualsImpl
|
||||
}
|
||||
}
|
||||
|
||||
static void NO_INLINE string_vectorConstant(
|
||||
static void NO_INLINE string_vector_constant(
|
||||
const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets,
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_size,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
@ -397,15 +389,21 @@ struct StringEqualsImpl
|
||||
{
|
||||
fixed_string_vector_fixed_string_vector_16(a_data, b_data, c);
|
||||
}
|
||||
else if (a_n == b_n)
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = positive == memequalSmallAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data() + i * a_n, a_n);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = positive == memequalSmallAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data() + i * b_n, b_n);
|
||||
c[i] = positive == memequalSmallLikeZeroPaddedAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data() + i * b_n, b_n);
|
||||
}
|
||||
}
|
||||
|
||||
static void NO_INLINE fixed_string_vectorConstant(
|
||||
static void NO_INLINE fixed_string_vector_constant(
|
||||
const ColumnString::Chars & a_data, ColumnString::Offset a_n,
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_size,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
@ -418,7 +416,7 @@ struct StringEqualsImpl
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c[i] = positive == memequalSmallAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data(), b_size);
|
||||
c[i] = positive == memequalSmallLikeZeroPaddedAllowOverflow15(a_data.data() + i * a_n, a_n, b_data.data(), b_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -435,7 +433,7 @@ struct StringEqualsImpl
|
||||
const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
{
|
||||
string_vectorConstant(b_data, b_offsets, a_data, a_size, c);
|
||||
string_vector_constant(b_data, b_offsets, a_data, a_size, c);
|
||||
}
|
||||
|
||||
static void constant_fixed_string_vector(
|
||||
@ -443,15 +441,7 @@ struct StringEqualsImpl
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_n,
|
||||
PaddedPODArray<UInt8> & c)
|
||||
{
|
||||
fixed_string_vectorConstant(b_data, b_n, a_data, a_size, c);
|
||||
}
|
||||
|
||||
static void constantConstant(
|
||||
const ColumnString::Chars & a_data, ColumnString::Offset a_size,
|
||||
const ColumnString::Chars & b_data, ColumnString::Offset b_size,
|
||||
UInt8 & c)
|
||||
{
|
||||
c = positive == memequalSmallAllowOverflow15(a_data.data(), a_size, b_data.data(), b_size);
|
||||
fixed_string_vector_constant(b_data, b_n, a_data, a_size, c);
|
||||
}
|
||||
};
|
||||
|
||||
@ -758,9 +748,11 @@ private:
|
||||
|
||||
if (c0_const && c1_const)
|
||||
{
|
||||
UInt8 res = 0;
|
||||
StringImpl::constantConstant(*c0_const_chars, c0_const_size, *c1_const_chars, c1_const_size, res);
|
||||
block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(c0_const->size(), toField(res));
|
||||
auto res = executeString(block, result, &c0_const->getDataColumn(), &c1_const->getDataColumn());
|
||||
if (!res)
|
||||
return false;
|
||||
|
||||
block.getByPosition(result).column = ColumnConst::create(block.getByPosition(result).column, c0_const->size());
|
||||
return true;
|
||||
}
|
||||
else
|
||||
@ -780,7 +772,7 @@ private:
|
||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||
c_res->getData());
|
||||
else if (c0_string && c1_const)
|
||||
StringImpl::string_vectorConstant(
|
||||
StringImpl::string_vector_constant(
|
||||
c0_string->getChars(), c0_string->getOffsets(),
|
||||
*c1_const_chars, c1_const_size,
|
||||
c_res->getData());
|
||||
@ -795,7 +787,7 @@ private:
|
||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||
c_res->getData());
|
||||
else if (c0_fixed_string && c1_const)
|
||||
StringImpl::fixed_string_vectorConstant(
|
||||
StringImpl::fixed_string_vector_constant(
|
||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||
*c1_const_chars, c1_const_size,
|
||||
c_res->getData());
|
||||
|
@ -0,0 +1,110 @@
|
||||
Row 1:
|
||||
──────
|
||||
equals(b, b): 1
|
||||
greater(b, b): 0
|
||||
less(b, b): 0
|
||||
equals(b, c): 0
|
||||
greater(b, c): 0
|
||||
less(b, c): 1
|
||||
equals(b, d): 0
|
||||
greater(b, d): 0
|
||||
less(b, d): 1
|
||||
equals(b, bf): 1
|
||||
greater(b, bf): 0
|
||||
less(b, bf): 0
|
||||
equals(b, cf): 0
|
||||
greater(b, cf): 0
|
||||
less(b, cf): 1
|
||||
equals(b, df): 0
|
||||
greater(b, df): 0
|
||||
less(b, df): 1
|
||||
equals(c, b): 0
|
||||
greater(c, b): 1
|
||||
less(c, b): 0
|
||||
equals(c, c): 1
|
||||
greater(c, c): 0
|
||||
less(c, c): 0
|
||||
equals(c, d): 0
|
||||
greater(c, d): 0
|
||||
less(c, d): 1
|
||||
equals(c, bf): 0
|
||||
greater(c, bf): 1
|
||||
less(c, bf): 0
|
||||
equals(c, cf): 1
|
||||
greater(c, cf): 0
|
||||
less(c, cf): 0
|
||||
equals(c, df): 0
|
||||
greater(c, df): 0
|
||||
less(c, df): 1
|
||||
equals(d, b): 0
|
||||
greater(d, b): 1
|
||||
less(d, b): 0
|
||||
equals(d, c): 0
|
||||
greater(d, c): 1
|
||||
less(d, c): 0
|
||||
equals(d, d): 1
|
||||
greater(d, d): 0
|
||||
less(d, d): 0
|
||||
equals(d, bf): 0
|
||||
greater(d, bf): 1
|
||||
less(d, bf): 0
|
||||
equals(d, cf): 0
|
||||
greater(d, cf): 1
|
||||
less(d, cf): 0
|
||||
equals(d, df): 1
|
||||
greater(d, df): 0
|
||||
less(d, df): 0
|
||||
equals(bf, b): 1
|
||||
greater(bf, b): 0
|
||||
less(bf, b): 0
|
||||
equals(bf, c): 0
|
||||
greater(bf, c): 0
|
||||
less(bf, c): 1
|
||||
equals(bf, d): 0
|
||||
greater(bf, d): 0
|
||||
less(bf, d): 1
|
||||
equals(bf, bf): 1
|
||||
greater(bf, bf): 0
|
||||
less(bf, bf): 0
|
||||
equals(bf, cf): 0
|
||||
greater(bf, cf): 0
|
||||
less(bf, cf): 1
|
||||
equals(bf, df): 0
|
||||
greater(bf, df): 0
|
||||
less(bf, df): 1
|
||||
equals(cf, b): 0
|
||||
greater(cf, b): 1
|
||||
less(cf, b): 0
|
||||
equals(cf, c): 1
|
||||
greater(cf, c): 0
|
||||
less(cf, c): 0
|
||||
equals(cf, d): 0
|
||||
greater(cf, d): 0
|
||||
less(cf, d): 1
|
||||
equals(cf, bf): 0
|
||||
greater(cf, bf): 1
|
||||
less(cf, bf): 0
|
||||
equals(cf, cf): 1
|
||||
greater(cf, cf): 0
|
||||
less(cf, cf): 0
|
||||
equals(cf, df): 0
|
||||
greater(cf, df): 0
|
||||
less(cf, df): 1
|
||||
equals(df, b): 0
|
||||
greater(df, b): 1
|
||||
less(df, b): 0
|
||||
equals(df, c): 0
|
||||
greater(df, c): 1
|
||||
less(df, c): 0
|
||||
equals(df, d): 1
|
||||
greater(df, d): 0
|
||||
less(df, d): 0
|
||||
equals(df, bf): 0
|
||||
greater(df, bf): 1
|
||||
less(df, bf): 0
|
||||
equals(df, cf): 0
|
||||
greater(df, cf): 1
|
||||
less(df, cf): 0
|
||||
equals(df, df): 1
|
||||
greater(df, df): 0
|
||||
less(df, df): 0
|
45
tests/queries/0_stateless/01250_fixed_string_comparison.sql
Normal file
45
tests/queries/0_stateless/01250_fixed_string_comparison.sql
Normal file
@ -0,0 +1,45 @@
|
||||
WITH 'abb' AS b, 'abc' AS c, 'abd' AS d, toFixedString(b, 5) AS bf, toFixedString(c, 5) AS cf, toFixedString(d, 5) AS df
|
||||
SELECT
|
||||
b = b, b > b, b < b,
|
||||
b = c, b > c, b < c,
|
||||
b = d, b > d, b < d,
|
||||
b = bf, b > bf, b < bf,
|
||||
b = cf, b > cf, b < cf,
|
||||
b = df, b > df, b < df,
|
||||
|
||||
c = b, c > b, c < b,
|
||||
c = c, c > c, c < c,
|
||||
c = d, c > d, c < d,
|
||||
c = bf, c > bf, c < bf,
|
||||
c = cf, c > cf, c < cf,
|
||||
c = df, c > df, c < df,
|
||||
|
||||
d = b, d > b, d < b,
|
||||
d = c, d > c, d < c,
|
||||
d = d, d > d, d < d,
|
||||
d = bf, d > bf, d < bf,
|
||||
d = cf, d > cf, d < cf,
|
||||
d = df, d > df, d < df,
|
||||
|
||||
bf = b, bf > b, bf < b,
|
||||
bf = c, bf > c, bf < c,
|
||||
bf = d, bf > d, bf < d,
|
||||
bf = bf, bf > bf, bf < bf,
|
||||
bf = cf, bf > cf, bf < cf,
|
||||
bf = df, bf > df, bf < df,
|
||||
|
||||
cf = b, cf > b, cf < b,
|
||||
cf = c, cf > c, cf < c,
|
||||
cf = d, cf > d, cf < d,
|
||||
cf = bf, cf > bf, cf < bf,
|
||||
cf = cf, cf > cf, cf < cf,
|
||||
cf = df, cf > df, cf < df,
|
||||
|
||||
df = b, df > b, df < b,
|
||||
df = c, df > c, df < c,
|
||||
df = d, df > d, df < d,
|
||||
df = bf, df > bf, df < bf,
|
||||
df = cf, df > cf, df < cf,
|
||||
df = df, df > df, df < df
|
||||
|
||||
FORMAT Vertical;
|
@ -0,0 +1 @@
|
||||
1
|
1
tests/queries/0_stateless/01251_string_comparison.sql
Normal file
1
tests/queries/0_stateless/01251_string_comparison.sql
Normal file
@ -0,0 +1 @@
|
||||
SELECT isConstant('a' = 'b');
|
Loading…
Reference in New Issue
Block a user