mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
process regexp flags correctly
This commit is contained in:
parent
008d02880b
commit
9f6e472b0c
@ -244,33 +244,41 @@ const char * analyzeImpl(
|
|||||||
is_trivial = false;
|
is_trivial = false;
|
||||||
if (!in_square_braces)
|
if (!in_square_braces)
|
||||||
{
|
{
|
||||||
/// Check for case-insensitive flag.
|
/// it means flag negation
|
||||||
if (pos + 1 < end && pos[1] == '?')
|
/// there are various possible flags
|
||||||
|
/// actually only imsU are supported by re2
|
||||||
|
auto is_flag_char = [](char x)
|
||||||
{
|
{
|
||||||
for (size_t offset = 2; pos + offset < end; ++offset)
|
return x == '-' || x == 'i' || x == 'm' || x == 's' || x == 'U' || x == 'u';
|
||||||
|
};
|
||||||
|
/// Check for case-insensitive flag.
|
||||||
|
if (pos + 2 < end && pos[1] == '?' && is_flag_char(pos[2]))
|
||||||
|
{
|
||||||
|
size_t offset = 2;
|
||||||
|
for (; pos + offset < end; ++offset)
|
||||||
{
|
{
|
||||||
if (pos[offset] == '-' /// it means flag negation
|
if (pos[offset] == 'i')
|
||||||
/// various possible flags, actually only imsU are supported by re2
|
|
||||||
|| (pos[offset] >= 'a' && pos[offset] <= 'z')
|
|
||||||
|| (pos[offset] >= 'A' && pos[offset] <= 'Z'))
|
|
||||||
{
|
{
|
||||||
if (pos[offset] == 'i')
|
/// Actually it can be negated case-insensitive flag. But we don't care.
|
||||||
{
|
has_case_insensitive_flag = true;
|
||||||
/// Actually it can be negated case-insensitive flag. But we don't care.
|
|
||||||
has_case_insensitive_flag = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else if (!is_flag_char(pos[offset]))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
pos += offset;
|
||||||
|
/// if this group only contains flags, we have nothing to do.
|
||||||
|
if (*pos == ')')
|
||||||
|
{
|
||||||
|
++pos;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/// (?:regex) means non-capturing parentheses group
|
/// (?:regex) means non-capturing parentheses group
|
||||||
if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
|
else if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
|
||||||
{
|
{
|
||||||
pos += 2;
|
pos += 2;
|
||||||
}
|
}
|
||||||
if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<')))
|
else if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<')))
|
||||||
{
|
{
|
||||||
pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end);
|
pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end);
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,8 @@ TEST(OptimizeRE, analyze)
|
|||||||
};
|
};
|
||||||
test_f("abc", "abc", {}, true, true);
|
test_f("abc", "abc", {}, true, true);
|
||||||
test_f("c([^k]*)de", "");
|
test_f("c([^k]*)de", "");
|
||||||
|
test_f("(?-s)bob", "bob", {}, false, true);
|
||||||
|
test_f("(?s)bob", "bob", {}, false, true);
|
||||||
test_f("abc(de)fg", "abcdefg", {}, false, true);
|
test_f("abc(de)fg", "abcdefg", {}, false, true);
|
||||||
test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true);
|
test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true);
|
||||||
test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true);
|
test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true);
|
||||||
|
Loading…
Reference in New Issue
Block a user