2019-07-21 13:15:04 +00:00
# include <Common/parseGlobs.h>
2020-11-09 19:07:38 +00:00
# include <IO/WriteBufferFromString.h>
# include <IO/ReadBufferFromString.h>
# include <IO/Operators.h>
2019-07-21 13:15:04 +00:00
# include <re2/re2.h>
# include <re2/stringpiece.h>
# include <algorithm>
2019-09-03 14:23:51 +00:00
# include <sstream>
2020-04-01 15:06:20 +00:00
# include <iomanip>
2020-03-20 02:15:28 +00:00
2019-07-21 13:15:04 +00:00
namespace DB
{
2020-08-08 01:01:47 +00:00
/* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library for matching
2019-08-27 15:20:31 +00:00
* with such steps :
2019-09-03 14:23:51 +00:00
* 1 ) search intervals like { 0. .9 } and enums like { abc , xyz , qwe } in { } , replace them by regexp with pipe ( expr1 | expr2 | expr3 ) ,
2019-08-30 13:27:05 +00:00
* 2 ) search and replace " * " and " ? " .
2019-08-27 15:20:31 +00:00
* Before each search need to escape symbols that we would not search .
2019-08-29 15:38:33 +00:00
*
* There are few examples in unit tests .
2019-08-09 17:06:29 +00:00
*/
2019-08-27 15:20:31 +00:00
std : : string makeRegexpPatternFromGlobs ( const std : : string & initial_str_with_globs )
2019-07-21 13:15:04 +00:00
{
2020-11-09 19:07:38 +00:00
/// FIXME make it better
WriteBufferFromOwnString buf_for_escaping ;
2019-08-27 15:20:31 +00:00
/// Escaping only characters that not used in glob syntax
for ( const auto & letter : initial_str_with_globs )
2019-07-21 13:15:04 +00:00
{
2020-11-09 19:07:38 +00:00
if ( ( letter = = ' [ ' ) | | ( letter = = ' ] ' ) | | ( letter = = ' | ' ) | | ( letter = = ' + ' ) | | ( letter = = ' - ' ) | | ( letter = = ' ( ' ) | | ( letter = = ' ) ' ) | | ( letter = = ' \\ ' ) )
buf_for_escaping < < ' \\ ' ;
buf_for_escaping < < letter ;
2019-07-21 13:15:04 +00:00
}
2020-11-09 19:07:38 +00:00
std : : string escaped_with_globs = buf_for_escaping . str ( ) ;
2019-09-04 19:55:56 +00:00
2022-07-06 04:18:39 +00:00
static const re2 : : RE2 enum_or_range ( R " ({([ \ d]+ \ . \ .[ \ d]+|[^{}*,]+,[^{}*]*[^{}*,])}) " ) ; /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
2019-08-27 15:20:31 +00:00
re2 : : StringPiece input ( escaped_with_globs ) ;
2019-09-03 14:23:51 +00:00
re2 : : StringPiece matched ;
2020-11-09 19:07:38 +00:00
std : : ostringstream oss_for_replacing ; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
2020-11-07 00:14:53 +00:00
oss_for_replacing . exceptions ( std : : ios : : failbit ) ;
2019-07-21 13:15:04 +00:00
size_t current_index = 0 ;
2019-08-27 15:20:31 +00:00
while ( RE2 : : FindAndConsume ( & input , enum_or_range , & matched ) )
2019-07-21 13:15:04 +00:00
{
2019-08-27 15:20:31 +00:00
std : : string buffer = matched . ToString ( ) ;
2019-09-04 19:55:56 +00:00
oss_for_replacing < < escaped_with_globs . substr ( current_index , matched . data ( ) - escaped_with_globs . data ( ) - current_index - 1 ) < < ' ( ' ;
2019-08-27 15:20:31 +00:00
if ( buffer . find ( ' , ' ) = = std : : string : : npos )
{
2020-03-20 02:15:28 +00:00
size_t range_begin = 0 ;
size_t range_end = 0 ;
2019-09-03 14:23:51 +00:00
char point ;
2020-11-09 19:07:38 +00:00
ReadBufferFromString buf_range ( buffer ) ;
buf_range > > range_begin > > point > > point > > range_end ;
2022-06-28 19:49:31 +00:00
size_t range_begin_width = buffer . find ( ' . ' ) ;
size_t range_end_width = buffer . size ( ) - buffer . find_last_of ( ' . ' ) - 1 ;
2022-07-06 04:18:39 +00:00
bool leading_zeros = buffer [ 0 ] = = ' 0 ' ;
size_t output_width = 0 ;
if ( range_begin > range_end ) //Descending Sequence {20..15} {9..01}
{
std : : swap ( range_begin , range_end ) ;
leading_zeros = buffer [ buffer . find_last_of ( ' . ' ) + 1 ] = = ' 0 ' ;
std : : swap ( range_begin_width , range_end_width ) ;
}
2022-06-28 19:49:31 +00:00
if ( range_begin_width = = 1 & & leading_zeros )
2022-07-06 04:18:39 +00:00
output_width = 1 ; ///Special Case: {0..10} {0..999}
2022-06-28 19:49:31 +00:00
else
2022-07-06 04:18:39 +00:00
output_width = std : : max ( range_begin_width , range_end_width ) ;
2020-04-01 15:06:20 +00:00
if ( leading_zeros )
2022-10-07 10:46:45 +00:00
oss_for_replacing < < std : : setfill ( ' 0 ' ) < < std : : setw ( static_cast < int > ( output_width ) ) ;
2019-09-04 19:55:56 +00:00
oss_for_replacing < < range_begin ;
2022-07-06 04:18:39 +00:00
2019-08-27 15:20:31 +00:00
for ( size_t i = range_begin + 1 ; i < = range_end ; + + i )
{
2020-04-01 15:06:20 +00:00
oss_for_replacing < < ' | ' ;
if ( leading_zeros )
2022-10-07 10:46:45 +00:00
oss_for_replacing < < std : : setfill ( ' 0 ' ) < < std : : setw ( static_cast < int > ( output_width ) ) ;
2020-04-01 15:06:20 +00:00
oss_for_replacing < < i ;
2019-08-27 15:20:31 +00:00
}
}
else
{
std : : replace ( buffer . begin ( ) , buffer . end ( ) , ' , ' , ' | ' ) ;
2019-09-04 19:55:56 +00:00
oss_for_replacing < < buffer ;
2019-08-27 15:20:31 +00:00
}
2019-09-04 19:55:56 +00:00
oss_for_replacing < < " ) " ;
2019-08-27 15:20:31 +00:00
current_index = input . data ( ) - escaped_with_globs . data ( ) ;
2019-07-21 13:15:04 +00:00
}
2019-09-04 19:55:56 +00:00
oss_for_replacing < < escaped_with_globs . substr ( current_index ) ;
std : : string almost_res = oss_for_replacing . str ( ) ;
2020-11-09 19:07:38 +00:00
WriteBufferFromOwnString buf_final_processing ;
2022-10-17 07:04:25 +00:00
char previous = ' ' ;
2019-09-03 14:23:51 +00:00
for ( const auto & letter : almost_res )
2019-07-21 13:15:04 +00:00
{
2022-10-17 07:04:25 +00:00
if ( previous = = ' * ' & & letter = = ' * ' )
{
buf_final_processing < < " [^{}] " ;
}
else if ( ( letter = = ' ? ' ) | | ( letter = = ' * ' ) )
2019-07-21 13:15:04 +00:00
{
2020-11-09 19:07:38 +00:00
buf_final_processing < < " [^/] " ; /// '?' is any symbol except '/'
2019-07-21 13:15:04 +00:00
if ( letter = = ' ? ' )
continue ;
}
2022-10-17 07:04:25 +00:00
else if ( ( letter = = ' . ' ) | | ( letter = = ' { ' ) | | ( letter = = ' } ' ) )
2020-11-09 19:07:38 +00:00
buf_final_processing < < ' \\ ' ;
buf_final_processing < < letter ;
2022-10-17 07:04:25 +00:00
previous = letter ;
2019-07-21 13:15:04 +00:00
}
2020-11-09 19:07:38 +00:00
return buf_final_processing . str ( ) ;
2019-07-21 13:15:04 +00:00
}
}