mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Handle URL without www and scheme for domain and topleveldomain function
This commit is contained in:
parent
6df315a985
commit
593dcbb33f
@ -15,26 +15,32 @@ inline StringRef getURLHost(const char * data, size_t size)
|
|||||||
Pos pos = data;
|
Pos pos = data;
|
||||||
Pos end = data + size;
|
Pos end = data + size;
|
||||||
|
|
||||||
if (!ignore_scheme || strncmp("www.", data, 4))
|
if (end == (pos = find_first_symbols<'/'>(pos, end)))
|
||||||
{
|
{
|
||||||
if (end == (pos = find_first_symbols<'/'>(pos, end)))
|
if (ignore_scheme)
|
||||||
|
pos = data;
|
||||||
|
else
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
if (pos != data)
|
|
||||||
{
|
|
||||||
StringRef scheme = getURLScheme(data, size);
|
|
||||||
Pos scheme_end = data + scheme.size;
|
|
||||||
|
|
||||||
// Colon must follows after scheme.
|
|
||||||
if (pos - scheme_end != 1 || *scheme_end != ':')
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
|
|
||||||
return {};
|
|
||||||
pos += 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pos != data)
|
||||||
|
{
|
||||||
|
StringRef scheme = getURLScheme(data, size);
|
||||||
|
Pos scheme_end = data + scheme.size;
|
||||||
|
|
||||||
|
// Colon must follows after scheme.
|
||||||
|
if (pos - scheme_end != 1 || *scheme_end != ':')
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
|
||||||
|
{
|
||||||
|
if (!ignore_scheme)
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
pos += 2;
|
||||||
|
|
||||||
const char * start_of_host = pos;
|
const char * start_of_host = pos;
|
||||||
for (; pos < end; ++pos)
|
for (; pos < end; ++pos)
|
||||||
{
|
{
|
||||||
|
@ -15,6 +15,7 @@ www.example.com
|
|||||||
www.example.com
|
www.example.com
|
||||||
example.com
|
example.com
|
||||||
example.com
|
example.com
|
||||||
|
example.com
|
||||||
====DOMAIN====
|
====DOMAIN====
|
||||||
com
|
com
|
||||||
|
|
||||||
@ -22,6 +23,7 @@ ru
|
|||||||
ru
|
ru
|
||||||
com
|
com
|
||||||
com
|
com
|
||||||
|
com
|
||||||
====PATH====
|
====PATH====
|
||||||
П
|
П
|
||||||
%D%9
|
%D%9
|
||||||
|
@ -14,6 +14,7 @@ SELECT domain('http://127.0.0.1:443/') AS Host;
|
|||||||
SELECT domain('//www.example.com') AS Host;
|
SELECT domain('//www.example.com') AS Host;
|
||||||
SELECT domain('//paul@www.example.com') AS Host;
|
SELECT domain('//paul@www.example.com') AS Host;
|
||||||
SELECT domain('www.example.com') as Host;
|
SELECT domain('www.example.com') as Host;
|
||||||
|
SELECT domain('example.com') as Host;
|
||||||
SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
|
SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
|
||||||
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
|
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
|
||||||
|
|
||||||
@ -24,7 +25,8 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
|
|||||||
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
|
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
|
||||||
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
|
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
|
||||||
SELECT topLevelDomain('//www.example.com') AS Domain;
|
SELECT topLevelDomain('//www.example.com') AS Domain;
|
||||||
SELECT topLevelDomain('www.google.com') as Domain;
|
SELECT topLevelDomain('www.example.com') as Domain;
|
||||||
|
SELECT topLevelDomain('example.com') as Domain;
|
||||||
|
|
||||||
SELECT '====PATH====';
|
SELECT '====PATH====';
|
||||||
SELECT decodeURLComponent('%D0%9F');
|
SELECT decodeURLComponent('%D0%9F');
|
||||||
|
Loading…
Reference in New Issue
Block a user