From 2eb63649bdd884293cc604eb3534e6ea1aea1ad4 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 22 Sep 2023 00:30:46 +0200 Subject: [PATCH] fix old workflows --- src/Storages/HDFS/StorageHDFS.cpp | 42 ++++++++------------------ src/Storages/StorageFile.cpp | 49 ++++++++++--------------------- 2 files changed, 29 insertions(+), 62 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 8563a777bb2..ac2635aafaf 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -137,41 +137,25 @@ namespace const HDFSFSPtr & fs, const String & for_match) { + /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and "," + static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); + + std::string_view for_match_view(for_match); + std::string_view matched; + if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched)) + { + std::string buffer(matched); + if (buffer.find(',') != std::string::npos) + return expandSelector(path_for_ls, fs, for_match); + } + const size_t first_glob_pos = for_match.find_first_of("*?{"); - const bool has_glob = first_glob_pos != std::string::npos; const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - bool has_generator = false; - bool range_generator = false; - - const size_t next_slash_after_glob_pos = [&]() - { - if (!has_glob) - return suffix_with_globs.find('/', 1); - - bool prev_is_dot = false; - - for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) - { - if (*it == '{') - has_generator = true; - else if (*it == '/') - return size_t(std::distance(suffix_with_globs.begin(), it)); - else if (*it == '.') - { - if (prev_is_dot) - range_generator = true; - prev_is_dot = true; - } - } - return std::string::npos; - }(); - - if (has_generator && !range_generator) - return expandSelector(path_for_ls, fs, for_match); + const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index eb18842fdaa..5b05dfa687f 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -106,7 +106,6 @@ namespace ErrorCodes namespace { - /// Forward-declare to use in expandSelector() void listFilesWithRegexpMatchingImpl( const std::string & path_for_ls, @@ -171,43 +170,27 @@ void listFilesWithRegexpMatchingImpl( std::vector & result, bool recursive) { + /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and "," + static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); + + std::string_view for_match_view(for_match); + std::string_view matched; + if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched)) + { + std::string buffer(matched); + if (buffer.find(',') != std::string::npos) + { + expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive); + return; + } + } + const size_t first_glob_pos = for_match.find_first_of("*?{"); - const bool has_glob = first_glob_pos != std::string::npos; const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - bool has_generator = false; - bool range_generator = false; - - const size_t next_slash_after_glob_pos = [&]() - { - if (!has_glob) - return suffix_with_globs.find('/', 1); - - bool prev_is_dot = false; - - for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) - { - if (*it == '{') - has_generator = true; - else if (*it == '/') - return size_t(std::distance(suffix_with_globs.begin(), it)); - else if (*it == '.') - { - if (prev_is_dot) - range_generator = true; - prev_is_dot = true; - } - } - return std::string::npos; - }(); - - if (has_generator && !range_generator) - { - expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive); - return; - } + const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);