2019-07-21 13:15:04 +00:00
# include <Common/parseGlobs.h>
# include <re2/re2.h>
# include <re2/stringpiece.h>
# include <algorithm>
2019-09-03 14:23:51 +00:00
# include <sstream>
2020-03-20 02:15:28 +00:00
# include <cassert>
2020-04-01 15:06:20 +00:00
# include <iomanip>
2020-03-20 02:15:28 +00:00
2019-07-21 13:15:04 +00:00
namespace DB
{
2020-08-08 01:01:47 +00:00
/* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library for matching
2019-08-27 15:20:31 +00:00
* with such steps :
2019-09-03 14:23:51 +00:00
* 1 ) search intervals like { 0. .9 } and enums like { abc , xyz , qwe } in { } , replace them by regexp with pipe ( expr1 | expr2 | expr3 ) ,
2019-08-30 13:27:05 +00:00
* 2 ) search and replace " * " and " ? " .
2019-08-27 15:20:31 +00:00
* Before each search need to escape symbols that we would not search .
2019-08-29 15:38:33 +00:00
*
* There are few examples in unit tests .
2019-08-09 17:06:29 +00:00
*/
2019-08-27 15:20:31 +00:00
std : : string makeRegexpPatternFromGlobs ( const std : : string & initial_str_with_globs )
2019-07-21 13:15:04 +00:00
{
2019-09-04 19:55:56 +00:00
std : : ostringstream oss_for_escaping ;
2019-08-27 15:20:31 +00:00
/// Escaping only characters that not used in glob syntax
for ( const auto & letter : initial_str_with_globs )
2019-07-21 13:15:04 +00:00
{
2019-08-27 15:20:31 +00:00
if ( ( letter = = ' [ ' ) | | ( letter = = ' ] ' ) | | ( letter = = ' | ' ) | | ( letter = = ' + ' ) | | ( letter = = ' - ' ) | | ( letter = = ' ( ' ) | | ( letter = = ' ) ' ) )
2019-09-04 19:55:56 +00:00
oss_for_escaping < < ' \\ ' ;
oss_for_escaping < < letter ;
2019-07-21 13:15:04 +00:00
}
2019-09-04 19:55:56 +00:00
std : : string escaped_with_globs = oss_for_escaping . str ( ) ;
2019-09-03 14:23:51 +00:00
static const re2 : : RE2 enum_or_range ( R " ({([ \ d]+ \ . \ .[ \ d]+|[^{}*,]+,[^{}*]*[^{}*,])}) " ) ; /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
2019-08-27 15:20:31 +00:00
re2 : : StringPiece input ( escaped_with_globs ) ;
2019-09-03 14:23:51 +00:00
re2 : : StringPiece matched ;
2019-09-04 19:55:56 +00:00
std : : ostringstream oss_for_replacing ;
2019-07-21 13:15:04 +00:00
size_t current_index = 0 ;
2019-08-27 15:20:31 +00:00
while ( RE2 : : FindAndConsume ( & input , enum_or_range , & matched ) )
2019-07-21 13:15:04 +00:00
{
2019-08-27 15:20:31 +00:00
std : : string buffer = matched . ToString ( ) ;
2019-09-04 19:55:56 +00:00
oss_for_replacing < < escaped_with_globs . substr ( current_index , matched . data ( ) - escaped_with_globs . data ( ) - current_index - 1 ) < < ' ( ' ;
2019-08-27 15:20:31 +00:00
if ( buffer . find ( ' , ' ) = = std : : string : : npos )
{
2020-03-20 02:15:28 +00:00
size_t range_begin = 0 ;
size_t range_end = 0 ;
2019-09-03 14:23:51 +00:00
char point ;
2019-09-04 19:55:56 +00:00
std : : istringstream iss_range ( buffer ) ;
iss_range > > range_begin > > point > > point > > range_end ;
2020-04-08 01:09:01 +00:00
assert ( ! iss_range . fail ( ) ) ;
2020-04-01 15:06:20 +00:00
bool leading_zeros = buffer [ 0 ] = = ' 0 ' ;
size_t num_len = std : : to_string ( range_end ) . size ( ) ;
if ( leading_zeros )
oss_for_replacing < < std : : setfill ( ' 0 ' ) < < std : : setw ( num_len ) ;
2019-09-04 19:55:56 +00:00
oss_for_replacing < < range_begin ;
2019-08-27 15:20:31 +00:00
for ( size_t i = range_begin + 1 ; i < = range_end ; + + i )
{
2020-04-01 15:06:20 +00:00
oss_for_replacing < < ' | ' ;
if ( leading_zeros )
oss_for_replacing < < std : : setfill ( ' 0 ' ) < < std : : setw ( num_len ) ;
oss_for_replacing < < i ;
2019-08-27 15:20:31 +00:00
}
}
else
{
std : : replace ( buffer . begin ( ) , buffer . end ( ) , ' , ' , ' | ' ) ;
2019-09-04 19:55:56 +00:00
oss_for_replacing < < buffer ;
2019-08-27 15:20:31 +00:00
}
2019-09-04 19:55:56 +00:00
oss_for_replacing < < " ) " ;
2019-08-27 15:20:31 +00:00
current_index = input . data ( ) - escaped_with_globs . data ( ) ;
2019-07-21 13:15:04 +00:00
}
2019-09-04 19:55:56 +00:00
oss_for_replacing < < escaped_with_globs . substr ( current_index ) ;
std : : string almost_res = oss_for_replacing . str ( ) ;
std : : ostringstream oss_final_processing ;
2019-09-03 14:23:51 +00:00
for ( const auto & letter : almost_res )
2019-07-21 13:15:04 +00:00
{
if ( ( letter = = ' ? ' ) | | ( letter = = ' * ' ) )
{
2019-09-04 19:55:56 +00:00
oss_final_processing < < " [^/] " ; /// '?' is any symbol except '/'
2019-07-21 13:15:04 +00:00
if ( letter = = ' ? ' )
continue ;
}
2019-08-29 15:38:33 +00:00
if ( ( letter = = ' . ' ) | | ( letter = = ' { ' ) | | ( letter = = ' } ' ) )
2019-09-04 19:55:56 +00:00
oss_final_processing < < ' \\ ' ;
oss_final_processing < < letter ;
2019-07-21 13:15:04 +00:00
}
2019-09-04 19:55:56 +00:00
return oss_final_processing . str ( ) ;
2019-07-21 13:15:04 +00:00
}
}