Handle URL without www and scheme for domain and topleveldomain function

This commit is contained in:
Guillaume Tassery 2019-04-18 10:32:42 +07:00
parent 6df315a985
commit 593dcbb33f
3 changed files with 27 additions and 17 deletions

View File

@ -15,10 +15,13 @@ inline StringRef getURLHost(const char * data, size_t size)
Pos pos = data; Pos pos = data;
Pos end = data + size; Pos end = data + size;
if (!ignore_scheme || strncmp("www.", data, 4))
{
if (end == (pos = find_first_symbols<'/'>(pos, end))) if (end == (pos = find_first_symbols<'/'>(pos, end)))
{
if (ignore_scheme)
pos = data;
else
return {}; return {};
}
if (pos != data) if (pos != data)
{ {
@ -31,9 +34,12 @@ inline StringRef getURLHost(const char * data, size_t size)
} }
if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
{
if (!ignore_scheme)
return {}; return {};
pos += 2;
} }
else
pos += 2;
const char * start_of_host = pos; const char * start_of_host = pos;
for (; pos < end; ++pos) for (; pos < end; ++pos)

View File

@ -15,6 +15,7 @@ www.example.com
www.example.com www.example.com
example.com example.com
example.com example.com
example.com
====DOMAIN==== ====DOMAIN====
com com
@ -22,6 +23,7 @@ ru
ru ru
com com
com com
com
====PATH==== ====PATH====
П П
%D%9 %D%9

View File

@ -14,6 +14,7 @@ SELECT domain('http://127.0.0.1:443/') AS Host;
SELECT domain('//www.example.com') AS Host; SELECT domain('//www.example.com') AS Host;
SELECT domain('//paul@www.example.com') AS Host; SELECT domain('//paul@www.example.com') AS Host;
SELECT domain('www.example.com') as Host; SELECT domain('www.example.com') as Host;
SELECT domain('example.com') as Host;
SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
@ -24,7 +25,8 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
SELECT topLevelDomain('//www.example.com') AS Domain; SELECT topLevelDomain('//www.example.com') AS Domain;
SELECT topLevelDomain('www.google.com') as Domain; SELECT topLevelDomain('www.example.com') as Domain;
SELECT topLevelDomain('example.com') as Domain;
SELECT '====PATH===='; SELECT '====PATH====';
SELECT decodeURLComponent('%D0%9F'); SELECT decodeURLComponent('%D0%9F');