mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
Review Comments
This commit is contained in:
parent
608bfb8453
commit
d89ba2e5d9
@ -32,7 +32,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
|
||||
}
|
||||
std::string escaped_with_globs = buf_for_escaping.str();
|
||||
|
||||
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
|
||||
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
|
||||
re2::StringPiece input(escaped_with_globs);
|
||||
re2::StringPiece matched;
|
||||
std::ostringstream oss_for_replacing; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
@ -50,25 +50,32 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
|
||||
char point;
|
||||
ReadBufferFromString buf_range(buffer);
|
||||
buf_range >> range_begin >> point >> point >> range_end;
|
||||
bool leading_zeros = buffer[0] == '0';
|
||||
|
||||
size_t range_begin_width = buffer.find('.');
|
||||
size_t range_end_width = buffer.size() - buffer.find_last_of('.') - 1;
|
||||
//Scenarios {0..10} {0..999}
|
||||
size_t num_len = 0;
|
||||
bool leading_zeros = buffer[0] == '0';
|
||||
size_t output_width = 0;
|
||||
|
||||
if (range_begin > range_end) //Descending Sequence {20..15} {9..01}
|
||||
{
|
||||
std::swap(range_begin,range_end);
|
||||
leading_zeros = buffer[buffer.find_last_of('.')+1]=='0';
|
||||
std::swap(range_begin_width,range_end_width);
|
||||
}
|
||||
if (range_begin_width == 1 && leading_zeros)
|
||||
num_len = 1;
|
||||
//Scenarios {00..99} {00..099}
|
||||
output_width = 1; ///Special Case: {0..10} {0..999}
|
||||
else
|
||||
num_len = range_begin_width < range_end_width ? range_end_width : range_begin_width;
|
||||
output_width = std::max(range_begin_width, range_end_width);
|
||||
|
||||
if (leading_zeros)
|
||||
oss_for_replacing << std::setfill('0') << std::setw(num_len);
|
||||
oss_for_replacing << std::setfill('0') << std::setw(output_width);
|
||||
oss_for_replacing << range_begin;
|
||||
|
||||
for (size_t i = range_begin + 1; i <= range_end; ++i)
|
||||
{
|
||||
oss_for_replacing << '|';
|
||||
if (leading_zeros)
|
||||
oss_for_replacing << std::setfill('0') << std::setw(num_len);
|
||||
oss_for_replacing << std::setfill('0') << std::setw(output_width);
|
||||
oss_for_replacing << i;
|
||||
}
|
||||
}
|
||||
|
@ -8,17 +8,36 @@ using namespace DB;
|
||||
|
||||
TEST(Common, makeRegexpPatternFromGlobs)
|
||||
{
|
||||
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("?"), "[^/]");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("*"), "[^/]*");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("/?"), "/[^/]");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("/*"), "/[^/]*");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("*_{{a,b,c,d}}/?.csv"), "[^/]*_\\{(a|b|c|d)\\}/[^/]\\.csv");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{01..9}"), "f(01|02|03|04|05|06|07|08|09)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{001..0009}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
|
||||
/* Regex Parsing for {..} can have three possible cases
|
||||
1) The left range width == the right range width
|
||||
2) The left range width > the right range width
|
||||
3) The left range width < the right range width
|
||||
*/
|
||||
// Ascending Sequences
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{1..9}"), "f(1|2|3|4|5|6|7|8|9)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{0..10}"), "f(0|1|2|3|4|5|6|7|8|9|10)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{10..20}"), "f(10|11|12|13|14|15|16|17|18|19|20)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{00..10}"), "f(00|01|02|03|04|05|06|07|08|09|10)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{000..9}"), "f(000|001|002|003|004|005|006|007|008|009)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{0001..0009}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{01..9}"), "f(01|02|03|04|05|06|07|08|09)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{000..9}"), "f(000|001|002|003|004|005|006|007|008|009)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{95..103}"), "f(95|96|97|98|99|100|101|102|103)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{99..109}"), "f(99|100|101|102|103|104|105|106|107|108|109)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{001..0009}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
|
||||
// Descending Sequences
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{20..15}"), "f(15|16|17|18|19|20)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{200..199}"), "f(199|200)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{0009..0001}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{100..90}"), "f(90|91|92|93|94|95|96|97|98|99|100)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{103..95}"), "f(95|96|97|98|99|100|101|102|103)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{9..01}"), "f(01|02|03|04|05|06|07|08|09)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{9..000}"), "f(000|001|002|003|004|005|006|007|008|009)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{1..2}{1..2}"), "f(1|2)(1|2)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{1..1}{1..1}"), "f(1)(1)");
|
||||
EXPECT_EQ(makeRegexpPatternFromGlobs("f{0..0}{0..0}"), "f(0)(0)");
|
||||
|
@ -0,0 +1 @@
|
||||
11
|
38
tests/queries/0_stateless/02297_regex_parsing_file_names.sh
Executable file
38
tests/queries/0_stateless/02297_regex_parsing_file_names.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
# Data preparation.
|
||||
|
||||
# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
|
||||
# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
|
||||
CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
|
||||
mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/
|
||||
|
||||
echo '{"obj": "aaa", "id": 1, "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_0.json
|
||||
echo '{"id": 2, "obj": "bbb", "s": "bar"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_1.json
|
||||
echo '{"id": 3, "obj": "ccc", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_2.json
|
||||
echo '{"id": 4, "obj": "ddd", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_3.json
|
||||
echo '{"id": 5, "obj": "eee", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_4.json
|
||||
echo '{"id": 6, "obj": "fff", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_5.json
|
||||
echo '{"id": 7, "obj": "ggg", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_6.json
|
||||
echo '{"id": 8, "obj": "hhh", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_7.json
|
||||
echo '{"id": 9, "obj": "iii", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_8.json
|
||||
echo '{"id": 10, "obj":"jjj", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_9.json
|
||||
echo '{"id": 11, "obj": "kkk", "s": "foo"}'>> ${CLICKHOUSE_USER_FILES_PATH}/file_10.json
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_regex (id UInt64, obj String, s String) ENGINE =File(JSONEachRow)" ;
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO t_regex SELECT * FROM file('file_{0..10}.json','JSONEachRow')";
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count() from t_regex"
|
||||
|
||||
rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_*.json;
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex"
|
Loading…
Reference in New Issue
Block a user