fix extraction of TLD from fqdn

This commit is contained in:
artpaul 2016-12-10 03:49:21 +05:00
parent 4cf4fa68e1
commit 396ccccfce
4 changed files with 38 additions and 27 deletions

View File

@ -44,6 +44,11 @@ public:
return str[pos];
}
inline TChar back() const noexcept
{
return str[len - 1];
}
inline const TChar* data() const noexcept
{
return str;
@ -54,6 +59,11 @@ public:
return len == 0;
}
inline TChar front() const noexcept
{
return str[0];
}
inline bool null() const noexcept
{
assert(len == 0);

View File

@ -197,39 +197,27 @@ struct ExtractTopLevelDomain
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
StringView host = getUrlHost(StringView(data, size));
res_data = data;
res_size = 0;
Pos pos = data;
Pos end = pos + size;
if (!host.empty())
{
if (host.back() == '.')
host = StringView(host.data(), host.size() - 1);
Pos tmp;
size_t protocol_length;
ExtractProtocol::execute(data, size, tmp, protocol_length);
pos += protocol_length + 3;
Pos last_dot = reinterpret_cast<Pos>(memrchr(host.data(), '.', host.size()));
if (pos >= end || pos[-1] != '/' || pos[-2] != '/')
return;
if (!last_dot)
return;
/// Для IPv4-адресов не выделяем ничего.
if (last_dot[1] <= '9')
return;
Pos domain_begin = pos;
while (pos < end && *pos != '/' && *pos != ':' && *pos != '?' && *pos != '#')
++pos;
if (pos == domain_begin)
return;
Pos last_dot = reinterpret_cast<Pos>(memrchr(domain_begin, '.', pos - domain_begin));
if (!last_dot)
return;
/// Для IPv4-адресов не выделяем ничего.
if (last_dot[1] <= '9')
return;
res_data = last_dot + 1;
res_size = pos - res_data;
res_data = last_dot + 1;
res_size = (host.data() + host.size()) - res_data;
}
}
};

View File

@ -2,7 +2,13 @@ http
https
svn+ssh
http
www.example.com
www.example.com
127.0.0.1
example.com
com
ru
ru

View File

@ -2,8 +2,15 @@ SELECT protocol('http://example.com') AS Scheme;
SELECT protocol('https://example.com/') AS Scheme;
SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme;
SELECT protocol('ftp!://example.com/') AS Scheme;
SELECT protocol('http://127.0.0.1:443/') AS Scheme;
SELECT domain('http://paul@www.example.com:80/') AS Host;
SELECT domain('http:/paul/example/com') AS Host;
SELECT domain('http://www.example.com?q=4') AS Host;
SELECT domain('http://127.0.0.1:443/') AS Host;
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain;
SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;