From d89ba2e5d98600e1bf682b987339f132b8a6b6cb Mon Sep 17 00:00:00 2001
From: HeenaBansal2009 <heena.bansal@ibm.com>
Date: Tue, 5 Jul 2022 21:18:39 -0700
Subject: [PATCH] Review Comments

---
 src/Common/parseGlobs.cpp                     | 25 +++++++-----
 .../gtest_makeRegexpPatternFromGlobs.cpp      | 27 +++++++++++--
 .../02297_regex_parsing_file_names.reference  |  1 +
 .../02297_regex_parsing_file_names.sh         | 38 +++++++++++++++++++
 4 files changed, 78 insertions(+), 13 deletions(-)
 create mode 100644 tests/queries/0_stateless/02297_regex_parsing_file_names.reference
 create mode 100755 tests/queries/0_stateless/02297_regex_parsing_file_names.sh

diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp
index 9e18a9c0780..8e9195f9842 100644
--- a/src/Common/parseGlobs.cpp
+++ b/src/Common/parseGlobs.cpp
@@ -32,7 +32,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
     }
     std::string escaped_with_globs = buf_for_escaping.str();
 
-    static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");    /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
+    static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");    /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
     re2::StringPiece input(escaped_with_globs);
     re2::StringPiece matched;
     std::ostringstream oss_for_replacing;       // STYLE_CHECK_ALLOW_STD_STRING_STREAM
@@ -50,25 +50,32 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
             char point;
             ReadBufferFromString buf_range(buffer);
             buf_range >> range_begin >> point >> point >> range_end;
-            bool leading_zeros = buffer[0] == '0';
 
             size_t range_begin_width = buffer.find('.');
             size_t range_end_width = buffer.size() - buffer.find_last_of('.') - 1;
-            //Scenarios {0..10} {0..999}  
-            size_t num_len = 0;
+            bool leading_zeros = buffer[0] == '0';
+            size_t output_width = 0;
+
+            if (range_begin > range_end)    //Descending Sequence {20..15} {9..01}
+            {
+                std::swap(range_begin,range_end);
+                leading_zeros = buffer[buffer.find_last_of('.')+1]=='0';
+                std::swap(range_begin_width,range_end_width);
+            }
             if (range_begin_width == 1 && leading_zeros)
-                num_len = 1;
-            //Scenarios {00..99} {00..099} 
+                output_width = 1;   ///Special Case: {0..10} {0..999}
             else
-                num_len = range_begin_width < range_end_width ? range_end_width : range_begin_width;
+                output_width = std::max(range_begin_width, range_end_width);
+
             if (leading_zeros)
-                oss_for_replacing << std::setfill('0') << std::setw(num_len);
+                oss_for_replacing << std::setfill('0') << std::setw(output_width);
             oss_for_replacing << range_begin;
+
             for (size_t i = range_begin + 1; i <= range_end; ++i)
             {
                 oss_for_replacing << '|';
                 if (leading_zeros)
-                    oss_for_replacing << std::setfill('0') << std::setw(num_len);
+                    oss_for_replacing << std::setfill('0') << std::setw(output_width);
                 oss_for_replacing << i;
             }
         }
diff --git a/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp b/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp
index 833a0a6ba73..fda3a6ee1c8 100644
--- a/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp
+++ b/src/Common/tests/gtest_makeRegexpPatternFromGlobs.cpp
@@ -8,21 +8,40 @@ using namespace DB;
 
 TEST(Common, makeRegexpPatternFromGlobs)
 {
+
     EXPECT_EQ(makeRegexpPatternFromGlobs("?"), "[^/]");
     EXPECT_EQ(makeRegexpPatternFromGlobs("*"), "[^/]*");
     EXPECT_EQ(makeRegexpPatternFromGlobs("/?"), "/[^/]");
     EXPECT_EQ(makeRegexpPatternFromGlobs("/*"), "/[^/]*");
     EXPECT_EQ(makeRegexpPatternFromGlobs("*_{{a,b,c,d}}/?.csv"), "[^/]*_\\{(a|b|c|d)\\}/[^/]\\.csv");
-    EXPECT_EQ(makeRegexpPatternFromGlobs("f{01..9}"), "f(01|02|03|04|05|06|07|08|09)");
-    EXPECT_EQ(makeRegexpPatternFromGlobs("f{001..0009}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
+    /* Regex Parsing for {..} can have three possible cases
+       1) The left range width == the right range width
+       2) The left range width > the right range width
+       3) The left range width < the right range width
+    */
+    // Ascending Sequences
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{1..9}"), "f(1|2|3|4|5|6|7|8|9)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("f{0..10}"), "f(0|1|2|3|4|5|6|7|8|9|10)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{10..20}"), "f(10|11|12|13|14|15|16|17|18|19|20)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("f{00..10}"), "f(00|01|02|03|04|05|06|07|08|09|10)");
-    EXPECT_EQ(makeRegexpPatternFromGlobs("f{000..9}"), "f(000|001|002|003|004|005|006|007|008|009)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("f{0001..0009}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{01..9}"), "f(01|02|03|04|05|06|07|08|09)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{000..9}"), "f(000|001|002|003|004|005|006|007|008|009)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{95..103}"), "f(95|96|97|98|99|100|101|102|103)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{99..109}"), "f(99|100|101|102|103|104|105|106|107|108|109)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{001..0009}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
+    // Descending Sequences
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{20..15}"), "f(15|16|17|18|19|20)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{200..199}"), "f(199|200)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{0009..0001}"), "f(0001|0002|0003|0004|0005|0006|0007|0008|0009)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{100..90}"), "f(90|91|92|93|94|95|96|97|98|99|100)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{103..95}"), "f(95|96|97|98|99|100|101|102|103)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{9..01}"), "f(01|02|03|04|05|06|07|08|09)");
+    EXPECT_EQ(makeRegexpPatternFromGlobs("f{9..000}"), "f(000|001|002|003|004|005|006|007|008|009)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("f{1..2}{1..2}"), "f(1|2)(1|2)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("f{1..1}{1..1}"), "f(1)(1)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("f{0..0}{0..0}"), "f(0)(0)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("file{1..5}"),"file(1|2|3|4|5)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("file{1,2,3}"),"file(1|2|3)");
     EXPECT_EQ(makeRegexpPatternFromGlobs("{1,2,3}blabla{a.x,b.x,c.x}smth[]_else{aa,bb}?*"), "(1|2|3)blabla(a\\.x|b\\.x|c\\.x)smth\\[\\]_else(aa|bb)[^/][^/]*");
-}
\ No newline at end of file
+}
diff --git a/tests/queries/0_stateless/02297_regex_parsing_file_names.reference b/tests/queries/0_stateless/02297_regex_parsing_file_names.reference
new file mode 100644
index 00000000000..b4de3947675
--- /dev/null
+++ b/tests/queries/0_stateless/02297_regex_parsing_file_names.reference
@@ -0,0 +1 @@
+11
diff --git a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh
new file mode 100755
index 00000000000..2db4ae8044c
--- /dev/null
+++ b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# Data preparation.
+
+# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as:
+#  "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')"
+CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/
+
+echo '{"obj": "aaa", "id": 1, "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_0.json
+echo '{"id": 2, "obj": "bbb", "s": "bar"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_1.json
+echo '{"id": 3, "obj": "ccc", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_2.json
+echo '{"id": 4, "obj": "ddd", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_3.json
+echo '{"id": 5, "obj": "eee", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_4.json
+echo '{"id": 6, "obj": "fff", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_5.json
+echo '{"id": 7, "obj": "ggg", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_6.json
+echo '{"id": 8, "obj": "hhh", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_7.json
+echo '{"id": 9, "obj": "iii", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_8.json
+echo '{"id": 10, "obj":"jjj", "s": "foo"}' >> ${CLICKHOUSE_USER_FILES_PATH}/file_9.json
+echo '{"id": 11, "obj": "kkk", "s": "foo"}'>> ${CLICKHOUSE_USER_FILES_PATH}/file_10.json
+
+
+${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex"
+
+${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_regex (id UInt64, obj String, s String) ENGINE =File(JSONEachRow)" ;
+
+
+${CLICKHOUSE_CLIENT} -q "INSERT INTO t_regex SELECT * FROM file('file_{0..10}.json','JSONEachRow')";
+${CLICKHOUSE_CLIENT} -q "SELECT count() from t_regex"
+
+rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_*.json;
+${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex"