2023-03-03 16:57:10 +00:00
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
|
|
|
#include <Common/OptimizedRegularExpression.h>
|
|
|
|
|
|
|
|
TEST(OptimizeRE, analyze)
|
|
|
|
{
|
2023-03-14 16:44:02 +00:00
|
|
|
auto test_f = [](const std::string & regexp, const std::string & answer, std::vector<std::string> expect_alternatives = {}, bool trival_expected = false)
|
2023-03-03 16:57:10 +00:00
|
|
|
{
|
|
|
|
std::string required;
|
|
|
|
bool is_trivial;
|
|
|
|
bool is_prefix;
|
2023-03-06 18:10:36 +00:00
|
|
|
std::vector<std::string> alternatives;
|
2023-03-13 17:34:47 +00:00
|
|
|
OptimizedRegularExpression::analyze(regexp, required, is_trivial, is_prefix, alternatives);
|
2023-03-06 18:10:36 +00:00
|
|
|
std::cerr << regexp << std::endl;
|
2023-03-03 16:57:10 +00:00
|
|
|
EXPECT_EQ(required, answer);
|
2023-03-06 18:10:36 +00:00
|
|
|
EXPECT_EQ(alternatives, expect_alternatives);
|
2023-03-14 16:44:02 +00:00
|
|
|
EXPECT_EQ(is_trivial, trival_expected);
|
2023-03-03 16:57:10 +00:00
|
|
|
};
|
2023-03-14 16:44:02 +00:00
|
|
|
test_f("abc", "abc", {}, true);
|
|
|
|
test_f("c([^k]*)de", "");
|
2023-03-03 16:57:10 +00:00
|
|
|
test_f("abc(de)fg", "abcdefg");
|
2023-03-13 17:34:47 +00:00
|
|
|
test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"});
|
|
|
|
test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"});
|
2023-03-06 18:10:36 +00:00
|
|
|
test_f("abc|fgk|xyz", "", {"abc","fgk", "xyz"});
|
2023-03-03 16:57:10 +00:00
|
|
|
test_f("(abc)", "abc");
|
2023-03-06 18:10:36 +00:00
|
|
|
test_f("(abc|fgk)", "", {"abc","fgk"});
|
2023-03-13 17:34:47 +00:00
|
|
|
test_f("(abc|fgk)(e|f|zkh|)", "", {"abc","fgk"});
|
|
|
|
test_f("abc(abc|fg)xyzz", "xyzz", {"abcabcxyzz","abcfgxyzz"});
|
2023-03-03 16:57:10 +00:00
|
|
|
test_f("abc[k]xyzz", "xyzz");
|
2023-03-06 18:10:36 +00:00
|
|
|
test_f("(abc[k]xyzz)", "xyzz");
|
2023-03-13 17:34:47 +00:00
|
|
|
test_f("abc((de)fg(hi))jk", "abcdefghijk");
|
|
|
|
test_f("abc((?:de)fg(?:hi))jk", "abcdefghijk");
|
2023-03-03 16:57:10 +00:00
|
|
|
test_f("abc((de)fghi+zzz)jk", "abcdefghi");
|
|
|
|
test_f("abc((de)fg(hi))?jk", "abc");
|
|
|
|
test_f("abc((de)fghi?zzz)jk", "abcdefgh");
|
2023-03-13 17:34:47 +00:00
|
|
|
test_f("abc(*cd)jk", "cdjk");
|
|
|
|
test_f(R"(abc(de|xyz|(\{xx\}))fg)", "abc", {"abcdefg", "abcxyzfg", "abc{xx}fg"});
|
2023-03-06 18:10:36 +00:00
|
|
|
test_f("abc(abc|fg)?xyzz", "xyzz");
|
|
|
|
test_f("abc(abc|fg){0,1}xyzz", "xyzz");
|
2023-03-14 16:44:02 +00:00
|
|
|
test_f("abc(abc|fg)xyzz|bcdd?k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bcfz", "bcgz", ""});
|
|
|
|
test_f("abc(abc|fg)xyzz|bc(dd?x|kk?y|(f))k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bck", "bcfk", "bcfz", "bcgz", ""});
|
2023-03-13 17:34:47 +00:00
|
|
|
test_f("((?:abc|efg|xyz)/[a-zA-Z0-9]{1-50})(/?[^ ]*|)", "", {"abc/", "efg/", "xyz/"});
|
|
|
|
test_f(R"([Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Daumoa(?:-feedfetcher|)|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|))", "", {"pider-", "bingbot", "Yeti-", "Yeti", "Catchpoint bot", "Catchpoint", "harlotte", "Daumoa-feedfetcher", "Daumoa", "Googlebot-", "Googlebot"});
|
2023-03-03 16:57:10 +00:00
|
|
|
}
|