mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
host extraction functionality was moved to UrlUtils
This commit is contained in:
parent
544143f5c9
commit
dfb466d156
@ -1,11 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
/// A lightweight non-owning read-only view into a subsequence of a string.
|
||||
class StringView
|
||||
{
|
||||
public:
|
||||
using size_type = size_t;
|
||||
|
||||
static constexpr size_type npos = size_type(-1);
|
||||
|
||||
public:
|
||||
inline StringView() noexcept
|
||||
: str(nullptr)
|
||||
@ -41,11 +47,27 @@ public:
|
||||
return str == nullptr;
|
||||
}
|
||||
|
||||
inline size_t size() const noexcept
|
||||
inline size_type size() const noexcept
|
||||
{
|
||||
return len;
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Returns a substring [pos, pos + count).
|
||||
* If the requested substring extends past the end of the string,
|
||||
* or if count == npos, the returned substring is [pos, size()).
|
||||
*/
|
||||
StringView substr(size_type pos, size_type count = npos) const
|
||||
{
|
||||
if (pos >= len)
|
||||
throw std::out_of_range("pos must be less than len");
|
||||
if (pos + count >= len || count == npos)
|
||||
return StringView(str + pos, len - pos);
|
||||
else
|
||||
return StringView(str + pos, count);
|
||||
}
|
||||
|
||||
public:
|
||||
inline operator bool () const noexcept
|
||||
{
|
||||
@ -54,7 +76,7 @@ public:
|
||||
|
||||
private:
|
||||
const char* str;
|
||||
const size_t len;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
|
||||
#include <DB/Common/StringView.h>
|
||||
|
||||
/** Extracts scheme from given url.
|
||||
*
|
||||
* If there is no valid scheme then
|
||||
* empty StringView will be returned.
|
||||
*/
|
||||
/// Extracts scheme from given url.
|
||||
StringView getUrlScheme(const StringView& url);
|
||||
|
||||
|
||||
/// Extracts host from given url.
|
||||
StringView getUrlHost(const StringView& url);
|
||||
|
@ -85,33 +85,21 @@ struct ExtractDomain
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
StringView host = getUrlHost(StringView(data, size));
|
||||
|
||||
Pos pos = data;
|
||||
Pos end = pos + size;
|
||||
if (host.empty())
|
||||
{
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (without_www && host.size() > 4 && !strncmp(host.data(), "www.", 4))
|
||||
host = host.substr(4);
|
||||
|
||||
Pos tmp;
|
||||
size_t protocol_length;
|
||||
ExtractProtocol::execute(data, size, tmp, protocol_length);
|
||||
pos += protocol_length + 3;
|
||||
|
||||
if (pos >= end || pos[-1] != '/' || pos[-2] != '/')
|
||||
return;
|
||||
|
||||
if (without_www && pos + 4 < end && !strncmp(pos, "www.", 4))
|
||||
pos += 4;
|
||||
|
||||
Pos domain_begin = pos;
|
||||
|
||||
while (pos < end && *pos != '/' && *pos != ':' && *pos != '?' && *pos != '#')
|
||||
++pos;
|
||||
|
||||
if (pos == domain_begin)
|
||||
return;
|
||||
|
||||
res_data = domain_begin;
|
||||
res_size = pos - domain_begin;
|
||||
res_data = host.data();
|
||||
res_size = host.size();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -22,3 +22,36 @@ StringView getUrlScheme(const StringView& url)
|
||||
|
||||
return StringView();
|
||||
}
|
||||
|
||||
|
||||
StringView getUrlHost(const StringView& url)
|
||||
{
|
||||
StringView scheme = getUrlScheme(url);
|
||||
const char* p = url.data() + scheme.size();
|
||||
const char* end = url.data() + url.size();
|
||||
|
||||
// Colon must follows after scheme.
|
||||
if (p == end || *p != ':')
|
||||
return StringView();
|
||||
// Authority component must starts with "//".
|
||||
if (end - p < 2 || (p[1] != '/' || p[2] != '/'))
|
||||
return StringView();
|
||||
else
|
||||
p += 3;
|
||||
|
||||
const char* st = p;
|
||||
|
||||
for (; p < end; ++p)
|
||||
{
|
||||
if (*p == '@')
|
||||
{
|
||||
st = p + 1;
|
||||
}
|
||||
else if (*p == ':' || *p == '/' || *p == '?' || *p == '#')
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (p == st) ? StringView() : StringView(st, p - st);
|
||||
}
|
||||
|
@ -2,3 +2,7 @@ http
|
||||
https
|
||||
svn+ssh
|
||||
|
||||
www.example.com
|
||||
|
||||
www.example.com
|
||||
example.com
|
||||
|
@ -2,3 +2,8 @@ SELECT protocol('http://example.com') AS Scheme;
|
||||
SELECT protocol('https://example.com/') AS Scheme;
|
||||
SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme;
|
||||
SELECT protocol('ftp!://example.com/') AS Scheme;
|
||||
|
||||
SELECT domain('http://paul@www.example.com:80/') AS Host;
|
||||
SELECT domain('http:/paul/example/com') AS Host;
|
||||
SELECT domain('http://www.example.com?q=4') AS Host;
|
||||
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
|
||||
|
Loading…
Reference in New Issue
Block a user