ClickHouse/dbms/src/Common/parseGlobs.cpp

76 lines
3.0 KiB
C++
Raw Normal View History

2019-07-21 13:15:04 +00:00
#include <Common/parseGlobs.h>
#include <re2/re2.h>
#include <re2/stringpiece.h>
#include <algorithm>
namespace DB
{
2019-08-27 15:20:31 +00:00
/* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library fo matching
* with such steps:
* 1) search intervals and enums in {}, replace them by regexp with pipe (expr1|expr2|expr3),
* 2) search and replace "*" and "?".
* Before each search need to escape symbols that we would not search.
*/
2019-08-27 15:20:31 +00:00
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs)
2019-07-21 13:15:04 +00:00
{
2019-08-27 15:20:31 +00:00
std::string escaped_with_globs;
escaped_with_globs.reserve(initial_str_with_globs.size());
/// Escaping only characters that not used in glob syntax
for (const auto & letter : initial_str_with_globs)
2019-07-21 13:15:04 +00:00
{
2019-08-27 15:20:31 +00:00
if ((letter == '[') || (letter == ']') || (letter == '|') || (letter == '+') || (letter == '-') || (letter == '(') || (letter == ')'))
escaped_with_globs.push_back('\\');
escaped_with_globs.push_back(letter);
2019-07-21 13:15:04 +00:00
}
2019-08-27 15:20:31 +00:00
re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
re2::StringPiece input(escaped_with_globs);
re2::StringPiece matched(escaped_with_globs);
2019-07-21 13:15:04 +00:00
size_t current_index = 0;
2019-08-27 15:20:31 +00:00
std::string almost_regexp;
almost_regexp.reserve(escaped_with_globs.size());
while (RE2::FindAndConsume(&input, enum_or_range, &matched))
2019-07-21 13:15:04 +00:00
{
2019-08-27 15:20:31 +00:00
std::string buffer = matched.ToString();
almost_regexp.append(escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1));
if (buffer.find(',') == std::string::npos)
{
size_t first_point = buffer.find('.');
std::string first_number = buffer.substr(0, first_point);
std::string second_number = buffer.substr(first_point + 2, buffer.size() - first_point - 2);
size_t range_begin = std::stoull(first_number);
size_t range_end = std::stoull(second_number);
buffer = std::to_string(range_begin);
for (size_t i = range_begin + 1; i <= range_end; ++i)
{
buffer += "|";
buffer += std::to_string(i);
}
}
else
{
std::replace(buffer.begin(), buffer.end(), ',', '|');
}
almost_regexp.append("(" + buffer + ")");
current_index = input.data() - escaped_with_globs.data();
2019-07-21 13:15:04 +00:00
}
2019-08-27 15:20:31 +00:00
almost_regexp += escaped_with_globs.substr(current_index); /////
2019-07-21 13:15:04 +00:00
std::string result;
2019-08-27 15:20:31 +00:00
result.reserve(almost_regexp.size());
for (const auto & letter : almost_regexp)
2019-07-21 13:15:04 +00:00
{
if ((letter == '?') || (letter == '*'))
{
result += "[^/]";
if (letter == '?')
continue;
}
2019-08-27 15:20:31 +00:00
if ((letter == '.') || (letter == '{') || (letter == '}') || (letter == '\\'))
2019-07-21 13:15:04 +00:00
result.push_back('\\');
result.push_back(letter);
}
return result;
}
}