mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
fix extraction of TLD from fqdn
This commit is contained in:
parent
4cf4fa68e1
commit
396ccccfce
@ -44,6 +44,11 @@ public:
|
||||
return str[pos];
|
||||
}
|
||||
|
||||
inline TChar back() const noexcept
|
||||
{
|
||||
return str[len - 1];
|
||||
}
|
||||
|
||||
inline const TChar* data() const noexcept
|
||||
{
|
||||
return str;
|
||||
@ -54,6 +59,11 @@ public:
|
||||
return len == 0;
|
||||
}
|
||||
|
||||
inline TChar front() const noexcept
|
||||
{
|
||||
return str[0];
|
||||
}
|
||||
|
||||
inline bool null() const noexcept
|
||||
{
|
||||
assert(len == 0);
|
||||
|
@ -197,39 +197,27 @@ struct ExtractTopLevelDomain
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
StringView host = getUrlHost(StringView(data, size));
|
||||
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
|
||||
Pos pos = data;
|
||||
Pos end = pos + size;
|
||||
if (!host.empty())
|
||||
{
|
||||
if (host.back() == '.')
|
||||
host = StringView(host.data(), host.size() - 1);
|
||||
|
||||
Pos tmp;
|
||||
size_t protocol_length;
|
||||
ExtractProtocol::execute(data, size, tmp, protocol_length);
|
||||
pos += protocol_length + 3;
|
||||
|
||||
if (pos >= end || pos[-1] != '/' || pos[-2] != '/')
|
||||
return;
|
||||
|
||||
Pos domain_begin = pos;
|
||||
|
||||
while (pos < end && *pos != '/' && *pos != ':' && *pos != '?' && *pos != '#')
|
||||
++pos;
|
||||
|
||||
if (pos == domain_begin)
|
||||
return;
|
||||
|
||||
Pos last_dot = reinterpret_cast<Pos>(memrchr(domain_begin, '.', pos - domain_begin));
|
||||
Pos last_dot = reinterpret_cast<Pos>(memrchr(host.data(), '.', host.size()));
|
||||
|
||||
if (!last_dot)
|
||||
return;
|
||||
|
||||
/// Для IPv4-адресов не выделяем ничего.
|
||||
if (last_dot[1] <= '9')
|
||||
return;
|
||||
|
||||
res_data = last_dot + 1;
|
||||
res_size = pos - res_data;
|
||||
res_size = (host.data() + host.size()) - res_data;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2,7 +2,13 @@ http
|
||||
https
|
||||
svn+ssh
|
||||
|
||||
http
|
||||
www.example.com
|
||||
|
||||
www.example.com
|
||||
127.0.0.1
|
||||
example.com
|
||||
com
|
||||
|
||||
ru
|
||||
ru
|
||||
|
@ -2,8 +2,15 @@ SELECT protocol('http://example.com') AS Scheme;
|
||||
SELECT protocol('https://example.com/') AS Scheme;
|
||||
SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme;
|
||||
SELECT protocol('ftp!://example.com/') AS Scheme;
|
||||
SELECT protocol('http://127.0.0.1:443/') AS Scheme;
|
||||
|
||||
SELECT domain('http://paul@www.example.com:80/') AS Host;
|
||||
SELECT domain('http:/paul/example/com') AS Host;
|
||||
SELECT domain('http://www.example.com?q=4') AS Host;
|
||||
SELECT domain('http://127.0.0.1:443/') AS Host;
|
||||
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
|
||||
|
||||
SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain;
|
||||
SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
|
||||
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
|
||||
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
|
||||
|
Loading…
Reference in New Issue
Block a user