mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Fix Bug 37114 - ilike on FixedString(N) columns produces wrong results
The main fix is in MatchImpl.h where the "case_insensitive" parameter is added to Regexps::get(). Also made "case_insensitive" a non-default template parameter to reduce the risk of future bugs. The remainder of this commit are minor random code improvements. resoves #37114
This commit is contained in:
parent
48c165b360
commit
7232f47c68
@ -94,7 +94,7 @@ public:
|
||||
template <typename Factory>
|
||||
Pointer get(const Key & key, Factory && f)
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto it = container.find(key);
|
||||
if (container.end() == it)
|
||||
|
@ -477,7 +477,7 @@ public:
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
if (!col->getValue<String>().empty())
|
||||
re = Regexps::get<false, false>(col->getValue<String>());
|
||||
re = Regexps::get<false, false, false>(col->getValue<String>());
|
||||
|
||||
}
|
||||
|
||||
@ -560,7 +560,7 @@ public:
|
||||
+ " of first argument of function " + getName() + ". Must be constant string.",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
re = Regexps::get<false, false>(col->getValue<String>());
|
||||
re = Regexps::get<false, false, false>(col->getValue<String>());
|
||||
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
|
||||
|
||||
matches.resize(capture + 1);
|
||||
|
@ -24,12 +24,11 @@ namespace ErrorCodes
|
||||
/// Is the [I]LIKE expression reduced to finding a substring in a string?
|
||||
static inline bool likePatternIsStrstr(const String & pattern, String & res)
|
||||
{
|
||||
res = "";
|
||||
|
||||
if (pattern.size() < 2 || pattern.front() != '%' || pattern.back() != '%')
|
||||
return false;
|
||||
|
||||
res.reserve(pattern.size() * 2);
|
||||
res = "";
|
||||
res.reserve(pattern.size() - 2);
|
||||
|
||||
const char * pos = pattern.data();
|
||||
const char * end = pos + pattern.size();
|
||||
@ -81,7 +80,7 @@ struct MatchImpl
|
||||
static void vectorConstant(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & pattern,
|
||||
const String & pattern,
|
||||
const ColumnPtr & start_pos,
|
||||
PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
@ -92,14 +91,13 @@ struct MatchImpl
|
||||
if (offsets.empty())
|
||||
return;
|
||||
|
||||
String strstr_pattern;
|
||||
|
||||
/// A simple case where the [I]LIKE expression reduces to finding a substring in a string
|
||||
String strstr_pattern;
|
||||
if (like && likePatternIsStrstr(pattern, strstr_pattern))
|
||||
{
|
||||
const UInt8 * begin = data.data();
|
||||
const UInt8 * const begin = data.data();
|
||||
const UInt8 * const end = data.data() + data.size();
|
||||
const UInt8 * pos = begin;
|
||||
const UInt8 * end = pos + data.size();
|
||||
|
||||
/// The current index in the array of strings.
|
||||
size_t i = 0;
|
||||
@ -137,7 +135,7 @@ struct MatchImpl
|
||||
|
||||
auto regexp = Regexps::get<like, true, case_insensitive>(pattern);
|
||||
|
||||
std::string required_substring;
|
||||
String required_substring;
|
||||
bool is_trivial;
|
||||
bool required_substring_is_prefix; /// for `anchored` execution of the regexp.
|
||||
|
||||
@ -172,9 +170,9 @@ struct MatchImpl
|
||||
{
|
||||
/// NOTE This almost matches with the case of LikePatternIsStrstr.
|
||||
|
||||
const UInt8 * begin = data.data();
|
||||
const UInt8 * const begin = data.data();
|
||||
const UInt8 * const end = data.begin() + data.size();
|
||||
const UInt8 * pos = begin;
|
||||
const UInt8 * end = pos + data.size();
|
||||
|
||||
/// The current index in the array of strings.
|
||||
size_t i = 0;
|
||||
@ -230,6 +228,7 @@ struct MatchImpl
|
||||
++i;
|
||||
}
|
||||
|
||||
/// Tail, in which there can be no substring.
|
||||
if (i < res.size())
|
||||
memset(&res[i], revert, (res.size() - i) * sizeof(res[0]));
|
||||
}
|
||||
@ -238,14 +237,14 @@ struct MatchImpl
|
||||
|
||||
/// Very carefully crafted copy-paste.
|
||||
static void vectorFixedConstant(
|
||||
const ColumnString::Chars & data, size_t n, const std::string & pattern,
|
||||
const ColumnString::Chars & data, size_t n, const String & pattern,
|
||||
PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
if (data.empty())
|
||||
return;
|
||||
|
||||
String strstr_pattern;
|
||||
/// A simple case where the LIKE expression reduces to finding a substring in a string
|
||||
String strstr_pattern;
|
||||
if (like && likePatternIsStrstr(pattern, strstr_pattern))
|
||||
{
|
||||
const UInt8 * begin = data.data();
|
||||
@ -291,9 +290,9 @@ struct MatchImpl
|
||||
{
|
||||
size_t size = data.size() / n;
|
||||
|
||||
auto regexp = Regexps::get<like, true>(pattern);
|
||||
auto regexp = Regexps::get<like, true, case_insensitive>(pattern);
|
||||
|
||||
std::string required_substring;
|
||||
String required_substring;
|
||||
bool is_trivial;
|
||||
bool required_substring_is_prefix; /// for `anchored` execution of the regexp.
|
||||
|
||||
|
@ -44,23 +44,20 @@ namespace Regexps
|
||||
template <bool like>
|
||||
inline Regexp createRegexp(const std::string & pattern, int flags)
|
||||
{
|
||||
return {pattern, flags};
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Regexp createRegexp<true>(const std::string & pattern, int flags)
|
||||
{
|
||||
return {likePatternToRegexp(pattern), flags};
|
||||
if constexpr (like)
|
||||
return {likePatternToRegexp(pattern), flags};
|
||||
else
|
||||
return {pattern, flags};
|
||||
}
|
||||
|
||||
/** Returns holder of an object from Pool.
|
||||
* You must hold the ownership while using the object.
|
||||
* In destructor, it returns the object back to the Pool for further reuse.
|
||||
*/
|
||||
template <bool like, bool no_capture, bool case_insensitive = false>
|
||||
template <bool like, bool no_capture, bool case_insensitive>
|
||||
inline Pool::Pointer get(const std::string & pattern)
|
||||
{
|
||||
/// C++11 has thread-safe function-local static on most modern compilers.
|
||||
/// the Singleton is thread-safe in C++11
|
||||
static Pool known_regexps; /// Different variables for different pattern parameters.
|
||||
|
||||
return known_regexps.get(pattern, [&pattern]
|
||||
|
@ -21,7 +21,7 @@ struct ExtractImpl
|
||||
res_data.reserve(data.size() / 5);
|
||||
res_offsets.resize(offsets.size());
|
||||
|
||||
const auto & regexp = Regexps::get<false, false>(pattern);
|
||||
const auto & regexp = Regexps::get<false, false, false>(pattern);
|
||||
|
||||
unsigned capture = regexp->getNumberOfSubpatterns() > 0 ? 1 : 0;
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
|
@ -95,7 +95,7 @@ public:
|
||||
throw Exception("Length of 'needle' argument must be greater than 0.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
using StringPiece = typename Regexps::Regexp::StringPieceType;
|
||||
auto holder = Regexps::get<false, false>(needle);
|
||||
auto holder = Regexps::get<false, false, false>(needle);
|
||||
const auto & regexp = holder->getRE2();
|
||||
|
||||
if (!regexp)
|
||||
|
@ -63,7 +63,7 @@ public:
|
||||
if (needle.empty())
|
||||
throw Exception(getName() + " length of 'needle' argument must be greater than 0.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
auto regexp = Regexps::get<false, false>(needle);
|
||||
auto regexp = Regexps::get<false, false, false>(needle);
|
||||
const auto & re2 = regexp->getRE2();
|
||||
|
||||
if (!re2)
|
||||
|
@ -0,0 +1,2 @@
|
||||
AA 0 1
|
||||
Aa 1 1
|
10
tests/queries/0_stateless/02293_ilike_on_fixed_strings.sql
Normal file
10
tests/queries/0_stateless/02293_ilike_on_fixed_strings.sql
Normal file
@ -0,0 +1,10 @@
|
||||
DROP TABLE IF EXISTS tab;
|
||||
|
||||
CREATE TABLE tab (col FixedString(2)) engine = MergeTree() ORDER BY col;
|
||||
|
||||
INSERT INTO tab VALUES ('AA') ('Aa');
|
||||
|
||||
SELECT col, col LIKE '%a', col ILIKE '%a' FROM tab WHERE col = 'AA';
|
||||
SELECT col, col LIKE '%a', col ILIKE '%a' FROM tab WHERE col = 'Aa';
|
||||
|
||||
DROP TABLE IF EXISTS tab;
|
Loading…
Reference in New Issue
Block a user