mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Add a way to accept URL without scheme for domain and topLevelDomain
This commit is contained in:
parent
cdc65eca23
commit
6df315a985
@ -9,27 +9,31 @@ namespace DB
|
||||
{
|
||||
|
||||
/// Extracts host from given url.
|
||||
template <bool ignore_scheme = true>
|
||||
inline StringRef getURLHost(const char * data, size_t size)
|
||||
{
|
||||
Pos pos = data;
|
||||
Pos end = data + size;
|
||||
|
||||
if (end == (pos = find_first_symbols<'/'>(pos, end)))
|
||||
return {};
|
||||
|
||||
if (pos != data)
|
||||
if (!ignore_scheme || strncmp("www.", data, 4))
|
||||
{
|
||||
StringRef scheme = getURLScheme(data, size);
|
||||
Pos scheme_end = data + scheme.size;
|
||||
|
||||
// Colon must follows after scheme.
|
||||
if (pos - scheme_end != 1 || *scheme_end != ':')
|
||||
if (end == (pos = find_first_symbols<'/'>(pos, end)))
|
||||
return {};
|
||||
}
|
||||
|
||||
if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
|
||||
return {};
|
||||
pos += 2;
|
||||
if (pos != data)
|
||||
{
|
||||
StringRef scheme = getURLScheme(data, size);
|
||||
Pos scheme_end = data + scheme.size;
|
||||
|
||||
// Colon must follows after scheme.
|
||||
if (pos - scheme_end != 1 || *scheme_end != ':')
|
||||
return {};
|
||||
}
|
||||
|
||||
if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')
|
||||
return {};
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
const char * start_of_host = pos;
|
||||
for (; pos < end; ++pos)
|
||||
|
@ -12,6 +12,7 @@ www.example.com
|
||||
127.0.0.1
|
||||
www.example.com
|
||||
www.example.com
|
||||
www.example.com
|
||||
example.com
|
||||
example.com
|
||||
====DOMAIN====
|
||||
@ -20,6 +21,7 @@ com
|
||||
ru
|
||||
ru
|
||||
com
|
||||
com
|
||||
====PATH====
|
||||
П
|
||||
%D%9
|
||||
|
@ -13,6 +13,7 @@ SELECT domain('http://www.example.com?q=4') AS Host;
|
||||
SELECT domain('http://127.0.0.1:443/') AS Host;
|
||||
SELECT domain('//www.example.com') AS Host;
|
||||
SELECT domain('//paul@www.example.com') AS Host;
|
||||
SELECT domain('www.example.com') as Host;
|
||||
SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
|
||||
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
|
||||
|
||||
@ -23,6 +24,7 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
|
||||
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
|
||||
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
|
||||
SELECT topLevelDomain('//www.example.com') AS Domain;
|
||||
SELECT topLevelDomain('www.google.com') as Domain;
|
||||
|
||||
SELECT '====PATH====';
|
||||
SELECT decodeURLComponent('%D0%9F');
|
||||
|
Loading…
Reference in New Issue
Block a user