Fix wrong behave of domain func with URLs contains UserInfo part and '@'

When UserInfo part and '@' appear in the URL, the host after @ should
be returned. For example, when url is "https://user:pass@clickhouse.com/",
start_of_host should be char 'c' after '@', end_of_host should be '/'
other than ':'.
This commit is contained in:
Quanfa Fu 2022-10-12 22:54:16 +08:00
parent 2834143bd4
commit dbe68ab0a8
4 changed files with 39 additions and 7 deletions

View File

@ -74,20 +74,30 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
}
Pos dot_pos = nullptr;
Pos colon_pos = nullptr;
bool has_at_symbol = false;
bool has_terminator_after_colon = false;
const auto * start_of_host = pos;
for (; pos < end; ++pos)
{
switch (*pos)
{
case '.':
dot_pos = pos;
if (has_at_symbol || colon_pos == nullptr)
dot_pos = pos;
break;
case ':': /// end symbols
case '/':
case ':':
if (has_at_symbol || colon_pos) goto done;
colon_pos = pos;
break;
case '/': /// end symbols
case '?':
case '#':
return checkAndReturnHost(pos, dot_pos, start_of_host);
goto done;
case '@': /// myemail@gmail.com
if (has_terminator_after_colon) return std::string_view{};
if (has_at_symbol) goto done;
has_at_symbol = true;
start_of_host = pos + 1;
break;
case ' ': /// restricted symbols in whole URL
@ -106,10 +116,16 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
case ';':
case '=':
case '&':
return std::string_view{};
if (colon_pos == nullptr)
return std::string_view{};
else
has_terminator_after_colon = true;
}
}
done:
if (!has_at_symbol)
pos = colon_pos ? colon_pos : pos;
return checkAndReturnHost(pos, dot_pos, start_of_host);
}

View File

@ -7,6 +7,14 @@ http
====HOST====
www.example.com
example.com
example.com
example.com
example.com
example.com
example.com
www.example.com
127.0.0.1

View File

@ -8,6 +8,14 @@ SELECT protocol('//127.0.0.1:443/') AS Scheme;
SELECT '====HOST====';
SELECT domain('http://paul@www.example.com:80/') AS Host;
SELECT domain('user:password@example.com:8080') AS Host;
SELECT domain('http://user:password@example.com:8080') AS Host;
SELECT domain('http://user:password@example.com:8080/path?query=value#fragment') AS Host;
SELECT domain('newuser:@example.com') AS Host;
SELECT domain('http://:pass@example.com') AS Host;
SELECT domain(':newpass@example.com') AS Host;
SELECT domain('http://user:pass@example@.com') AS Host;
SELECT domain('http://user:pass:example.com') AS Host;
SELECT domain('http:/paul/example/com') AS Host;
SELECT domain('http://www.example.com?q=4') AS Host;
SELECT domain('http://127.0.0.1:443/') AS Host;

View File

@ -1,5 +1,5 @@
ru 262914 69218
91872 89417
ru 262915 69218
91871 89417
com 63298 30285
ua 29037 17475
html 25079 15039