mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Merge pull request #11649 from ClickHouse/case-insensitive-regexp
Allow case-insensitive regexps; added a test
This commit is contained in:
commit
b9b725a39f
@ -38,6 +38,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
required_substring_is_prefix = false;
|
||||
required_substring.clear();
|
||||
bool has_alternative_on_depth_0 = false;
|
||||
bool has_case_insensitive_flag = false;
|
||||
|
||||
/// Substring with a position.
|
||||
using Substring = std::pair<std::string, size_t>;
|
||||
@ -65,7 +66,17 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
|
||||
switch (*pos)
|
||||
{
|
||||
case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{':
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '^':
|
||||
case '$':
|
||||
case '.':
|
||||
case '[':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '{':
|
||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||
{
|
||||
if (last_substring->first.empty())
|
||||
@ -110,6 +121,28 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
|
||||
/// Check for case-insensitive flag.
|
||||
if (pos + 1 < end && pos[1] == '?')
|
||||
{
|
||||
for (size_t offset = 2; pos + offset < end; ++offset)
|
||||
{
|
||||
if (pos[offset] == '-' /// it means flag negation
|
||||
/// various possible flags, actually only imsU are supported by re2
|
||||
|| (pos[offset] >= 'a' && pos[offset] <= 'z')
|
||||
|| (pos[offset] >= 'A' && pos[offset] <= 'Z'))
|
||||
{
|
||||
if (pos[offset] == 'i')
|
||||
{
|
||||
/// Actually it can be negated case-insensitive flag. But we don't care.
|
||||
has_case_insensitive_flag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
@ -209,7 +242,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
|
||||
if (!is_trivial)
|
||||
{
|
||||
if (!has_alternative_on_depth_0)
|
||||
if (!has_alternative_on_depth_0 && !has_case_insensitive_flag)
|
||||
{
|
||||
/// We choose the non-alternative substring of the maximum length for first search.
|
||||
|
||||
|
@ -0,0 +1,8 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
@ -0,0 +1,8 @@
|
||||
SELECT match('Too late', 'Too late');
|
||||
select match('Too late', '(?i)Too late');
|
||||
select match('Too late', '(?i)too late');
|
||||
select match('Too late', '(?i:too late)');
|
||||
select match('Too late', '(?i)to{2} late');
|
||||
select match('Too late', '(?i)to(?)o late');
|
||||
select match('Too late', '(?i)to+ late');
|
||||
select match('Too late', '(?i)to(?:o|o) late');
|
Loading…
Reference in New Issue
Block a user