Update after review

expandSelector is now returning vector<string> directly
minor changes
This commit is contained in:
zvonand 2023-11-15 16:35:25 +01:00
parent 20163e6aba
commit a8aa206cfd
4 changed files with 22 additions and 13 deletions

View File

@ -122,17 +122,18 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
return buf_final_processing.str();
}
void expandSelector(const std::string & path, Strings & for_match_paths_expanded)
namespace
{
/// regexp for {expr1,expr2,expr3}, expr.. should be without "{", "}", "*" and ","
void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> & for_match_paths_expanded)
{
/// regexp for {expr1,expr2,....};
/// expr1, expr2,... cannot contain any of these: '{', '}', ','
static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
std::string_view path_view(path);
std::string_view matched;
if (RE2::FindAndConsume(&path_view, selector_regex, &matched))
std::string buffer(matched);
else
if (!RE2::FindAndConsume(&path_view, selector_regex, &matched))
{
for_match_paths_expanded.push_back(path);
return;
@ -140,7 +141,7 @@ void expandSelector(const std::string & path, Strings & for_match_paths_expanded
Strings expanded_paths;
std::vector<size_t> anchor_positions = {};
std::vector<size_t> anchor_positions;
bool opened = false, closed = false;
for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
@ -181,7 +182,15 @@ void expandSelector(const std::string & path, Strings & for_match_paths_expanded
std::string expanded_matcher = common_prefix
+ path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+ common_suffix;
expandSelector(expanded_matcher, for_match_paths_expanded);
expandSelectorGlobImpl(expanded_matcher, for_match_paths_expanded);
}
}
}
std::vector<std::string> expandSelectionGlob(const std::string & path)
{
std::vector<std::string> result;
expandSelectorGlobImpl(path, result);
return result;
}
}

View File

@ -7,6 +7,8 @@ namespace DB
/// Parse globs in string and make a regexp for it.
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
void expandSelector(const std::string & path, std::vector<std::string> & for_match_paths_expanded);
/// Process {a,b,c...} globs:
/// Don't match it against regex, but generate a,b,c strings instead and process each of them separately.
/// E.g. for a string like `file{1,2,3}.csv` return vector of strings: {`file1.csv`,`file2.csv`,`file3.csv`}
std::vector<std::string> expandSelectionGlob(const std::string & path);
}

View File

@ -173,8 +173,7 @@ namespace
HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
HDFSFSPtr fs = createHDFSFS(builder.get());
Strings paths;
expandSelector(path_from_uri, paths);
Strings paths = expandSelectionGlob(path_from_uri);
std::vector<StorageHDFS::PathWithInfo> res;

View File

@ -196,8 +196,7 @@ std::vector<std::string> listFilesWithRegexpMatching(
{
std::vector<std::string> result;
Strings for_match_paths_expanded;
expandSelector(for_match, for_match_paths_expanded);
Strings for_match_paths_expanded = expandSelectionGlob(for_match);
for (const auto & for_match_expanded : for_match_paths_expanded)
listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result);