Doing more URL check on domain and topLevelDomain function

This commit is contained in:
Guillaume Tassery 2019-04-23 12:23:59 +07:00
parent 5964646971
commit b567127f49

View File

@ -8,6 +8,42 @@
namespace DB namespace DB
{ {
static inline bool isUnsafeCharUrl(char c)
{
switch (c)
{
case ' ':
case '\t':
case '<':
case '>':
case '#':
case '%':
case '{':
case '}':
case '|':
case '\\':
case '^':
case '~':
case '[':
case ']':
return true;
}
return false;
}
static inline bool isEndOfUrl(char c)
{
switch (c)
{
case ':':
case '/':
case '?':
case '#':
return true;
}
return false;
}
/// Extracts host from given url. /// Extracts host from given url.
inline StringRef getURLHost(const char * data, size_t size) inline StringRef getURLHost(const char * data, size_t size)
{ {
@ -39,14 +75,26 @@ inline StringRef getURLHost(const char * data, size_t size)
pos += 2; pos += 2;
const char * start_of_host = pos; const char * start_of_host = pos;
bool has_dot_delimiter = false;
for (; pos < end; ++pos) for (; pos < end; ++pos)
{ {
if (*pos == '@') if (*pos == '@')
start_of_host = pos + 1; start_of_host = pos + 1;
else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') else if (*pos == '.')
{
if (pos + 1 == end || isEndOfUrl(*(pos + 1)))
return StringRef{};
has_dot_delimiter = true;
}
else if (isEndOfUrl(*pos))
break; break;
else if (isUnsafeCharUrl(*pos))
return StringRef{};
} }
if (!has_dot_delimiter)
return StringRef{};
return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host); return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host);
} }