mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Update after review
expandSelector is now returning vector<string> directly minor changes
This commit is contained in:
parent
20163e6aba
commit
a8aa206cfd
@ -122,17 +122,18 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
|
||||
return buf_final_processing.str();
|
||||
}
|
||||
|
||||
void expandSelector(const std::string & path, Strings & for_match_paths_expanded)
|
||||
namespace
|
||||
{
|
||||
/// regexp for {expr1,expr2,expr3}, expr.. should be without "{", "}", "*" and ","
|
||||
void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> & for_match_paths_expanded)
|
||||
{
|
||||
/// regexp for {expr1,expr2,....};
|
||||
/// expr1, expr2,... cannot contain any of these: '{', '}', ','
|
||||
static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
|
||||
|
||||
std::string_view path_view(path);
|
||||
std::string_view matched;
|
||||
|
||||
if (RE2::FindAndConsume(&path_view, selector_regex, &matched))
|
||||
std::string buffer(matched);
|
||||
else
|
||||
if (!RE2::FindAndConsume(&path_view, selector_regex, &matched))
|
||||
{
|
||||
for_match_paths_expanded.push_back(path);
|
||||
return;
|
||||
@ -140,7 +141,7 @@ void expandSelector(const std::string & path, Strings & for_match_paths_expanded
|
||||
|
||||
Strings expanded_paths;
|
||||
|
||||
std::vector<size_t> anchor_positions = {};
|
||||
std::vector<size_t> anchor_positions;
|
||||
bool opened = false, closed = false;
|
||||
|
||||
for (std::string::const_iterator it = path.begin(); it != path.end(); it++)
|
||||
@ -181,7 +182,15 @@ void expandSelector(const std::string & path, Strings & for_match_paths_expanded
|
||||
std::string expanded_matcher = common_prefix
|
||||
+ path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
|
||||
+ common_suffix;
|
||||
expandSelector(expanded_matcher, for_match_paths_expanded);
|
||||
expandSelectorGlobImpl(expanded_matcher, for_match_paths_expanded);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> expandSelectionGlob(const std::string & path)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
expandSelectorGlobImpl(path, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,8 @@ namespace DB
|
||||
/// Parse globs in string and make a regexp for it.
|
||||
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
|
||||
|
||||
/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
|
||||
void expandSelector(const std::string & path, std::vector<std::string> & for_match_paths_expanded);
|
||||
/// Process {a,b,c...} globs:
|
||||
/// Don't match it against regex, but generate a,b,c strings instead and process each of them separately.
|
||||
/// E.g. for a string like `file{1,2,3}.csv` return vector of strings: {`file1.csv`,`file2.csv`,`file3.csv`}
|
||||
std::vector<std::string> expandSelectionGlob(const std::string & path);
|
||||
}
|
||||
|
@ -173,8 +173,7 @@ namespace
|
||||
HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
|
||||
HDFSFSPtr fs = createHDFSFS(builder.get());
|
||||
|
||||
Strings paths;
|
||||
expandSelector(path_from_uri, paths);
|
||||
Strings paths = expandSelectionGlob(path_from_uri);
|
||||
|
||||
std::vector<StorageHDFS::PathWithInfo> res;
|
||||
|
||||
|
@ -196,8 +196,7 @@ std::vector<std::string> listFilesWithRegexpMatching(
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
Strings for_match_paths_expanded;
|
||||
expandSelector(for_match, for_match_paths_expanded);
|
||||
Strings for_match_paths_expanded = expandSelectionGlob(for_match);
|
||||
|
||||
for (const auto & for_match_expanded : for_match_paths_expanded)
|
||||
listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result);
|
||||
|
Loading…
Reference in New Issue
Block a user