mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge pull request #11649 from ClickHouse/case-insensitive-regexp
Allow case-insensitive regexps; added a test
This commit is contained in:
commit
b9b725a39f
@ -38,6 +38,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
|||||||
required_substring_is_prefix = false;
|
required_substring_is_prefix = false;
|
||||||
required_substring.clear();
|
required_substring.clear();
|
||||||
bool has_alternative_on_depth_0 = false;
|
bool has_alternative_on_depth_0 = false;
|
||||||
|
bool has_case_insensitive_flag = false;
|
||||||
|
|
||||||
/// Substring with a position.
|
/// Substring with a position.
|
||||||
using Substring = std::pair<std::string, size_t>;
|
using Substring = std::pair<std::string, size_t>;
|
||||||
@ -65,7 +66,17 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
|||||||
|
|
||||||
switch (*pos)
|
switch (*pos)
|
||||||
{
|
{
|
||||||
case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{':
|
case '|':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '^':
|
||||||
|
case '$':
|
||||||
|
case '.':
|
||||||
|
case '[':
|
||||||
|
case '?':
|
||||||
|
case '*':
|
||||||
|
case '+':
|
||||||
|
case '{':
|
||||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||||
{
|
{
|
||||||
if (last_substring->first.empty())
|
if (last_substring->first.empty())
|
||||||
@ -110,6 +121,28 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
|||||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||||
last_substring = &trivial_substrings.back();
|
last_substring = &trivial_substrings.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check for case-insensitive flag.
|
||||||
|
if (pos + 1 < end && pos[1] == '?')
|
||||||
|
{
|
||||||
|
for (size_t offset = 2; pos + offset < end; ++offset)
|
||||||
|
{
|
||||||
|
if (pos[offset] == '-' /// it means flag negation
|
||||||
|
/// various possible flags, actually only imsU are supported by re2
|
||||||
|
|| (pos[offset] >= 'a' && pos[offset] <= 'z')
|
||||||
|
|| (pos[offset] >= 'A' && pos[offset] <= 'Z'))
|
||||||
|
{
|
||||||
|
if (pos[offset] == 'i')
|
||||||
|
{
|
||||||
|
/// Actually it can be negated case-insensitive flag. But we don't care.
|
||||||
|
has_case_insensitive_flag = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
++pos;
|
++pos;
|
||||||
break;
|
break;
|
||||||
@ -209,7 +242,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
|||||||
|
|
||||||
if (!is_trivial)
|
if (!is_trivial)
|
||||||
{
|
{
|
||||||
if (!has_alternative_on_depth_0)
|
if (!has_alternative_on_depth_0 && !has_case_insensitive_flag)
|
||||||
{
|
{
|
||||||
/// We choose the non-alternative substring of the maximum length for first search.
|
/// We choose the non-alternative substring of the maximum length for first search.
|
||||||
|
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
@ -0,0 +1,8 @@
|
|||||||
|
SELECT match('Too late', 'Too late');
|
||||||
|
select match('Too late', '(?i)Too late');
|
||||||
|
select match('Too late', '(?i)too late');
|
||||||
|
select match('Too late', '(?i:too late)');
|
||||||
|
select match('Too late', '(?i)to{2} late');
|
||||||
|
select match('Too late', '(?i)to(?)o late');
|
||||||
|
select match('Too late', '(?i)to+ late');
|
||||||
|
select match('Too late', '(?i)to(?:o|o) late');
|
Loading…
Reference in New Issue
Block a user