mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Add port() function
It uses domain() (getURLHost()) internally so it has the same limitations: - no ipv6 support - no host-without-dots support (i.e. "foobar")
This commit is contained in:
parent
36c7a486a1
commit
409cff11df
@ -117,6 +117,10 @@ Returns the part of the domain that includes top-level subdomains up to the “f
|
||||
|
||||
For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
|
||||
### port(URL) {#port}
|
||||
|
||||
Returns the port or zero if there is no port in the URL (or in case of validation error).
|
||||
|
||||
### path {#path}
|
||||
|
||||
Returns the path. Example: `/top/news.html` The path does not include the query string.
|
||||
|
95
src/Functions/URL/port.cpp
Normal file
95
src/Functions/URL/port.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringOrArrayToT.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include "domain.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
struct ExtractPort
|
||||
{
|
||||
static constexpr auto name = "port";
|
||||
static constexpr auto is_fixed_to_constant = true;
|
||||
|
||||
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt16> & res)
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
|
||||
ColumnString::Offset prev_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = parse(data, prev_offset, offsets[i] - prev_offset - 1);
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, UInt16 & res) { res = parse(data, 0, n); }
|
||||
|
||||
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt16> & res)
|
||||
{
|
||||
size_t size = data.size() / n;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = parse(data, i * n, n);
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt16> &)
|
||||
{
|
||||
throw Exception("Cannot apply function port to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
private:
|
||||
static UInt16 parse(const ColumnString::Chars & buf, size_t offset, size_t size)
|
||||
{
|
||||
const char * p = reinterpret_cast<const char *>(&buf[0]) + offset;
|
||||
const char * end = p + size;
|
||||
|
||||
StringRef host = getURLHost(p, size);
|
||||
if (!host.size)
|
||||
return 0;
|
||||
if (host.size == size)
|
||||
return 0;
|
||||
|
||||
p = host.data + host.size;
|
||||
if (*p++ != ':')
|
||||
return 0;
|
||||
|
||||
Int64 port = 0;
|
||||
while (p < end)
|
||||
{
|
||||
if (*p == '/')
|
||||
break;
|
||||
if (!isNumericASCII(*p))
|
||||
return 0;
|
||||
|
||||
port = (port * 10) + (*p - '0');
|
||||
if (port < 0 || port > UInt16(-1))
|
||||
return 0;
|
||||
++p;
|
||||
}
|
||||
return port;
|
||||
}
|
||||
};
|
||||
|
||||
struct NamePort
|
||||
{
|
||||
static constexpr auto name = "port";
|
||||
};
|
||||
|
||||
using FunctionPort = FunctionStringOrArrayToT<ExtractPort, NamePort, UInt16>;
|
||||
|
||||
void registerFunctionPort(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionPort>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ void registerFunctionDomain(FunctionFactory & factory);
|
||||
void registerFunctionDomainWithoutWWW(FunctionFactory & factory);
|
||||
void registerFunctionFirstSignificantSubdomain(FunctionFactory & factory);
|
||||
void registerFunctionTopLevelDomain(FunctionFactory & factory);
|
||||
void registerFunctionPort(FunctionFactory & factory);
|
||||
void registerFunctionPath(FunctionFactory & factory);
|
||||
void registerFunctionPathFull(FunctionFactory & factory);
|
||||
void registerFunctionQueryString(FunctionFactory & factory);
|
||||
@ -33,6 +34,7 @@ void registerFunctionsURL(FunctionFactory & factory)
|
||||
registerFunctionDomainWithoutWWW(factory);
|
||||
registerFunctionFirstSignificantSubdomain(factory);
|
||||
registerFunctionTopLevelDomain(factory);
|
||||
registerFunctionPort(factory);
|
||||
registerFunctionPath(factory);
|
||||
registerFunctionPathFull(factory);
|
||||
registerFunctionQueryString(factory);
|
||||
|
21
tests/queries/0_stateless/01284_port.reference
Normal file
21
tests/queries/0_stateless/01284_port.reference
Normal file
@ -0,0 +1,21 @@
|
||||
ipv4
|
||||
0
|
||||
80
|
||||
80
|
||||
80
|
||||
80
|
||||
hostname
|
||||
0
|
||||
80
|
||||
80
|
||||
80
|
||||
80
|
||||
ipv6
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
host-no-dot
|
||||
0
|
25
tests/queries/0_stateless/01284_port.sql
Normal file
25
tests/queries/0_stateless/01284_port.sql
Normal file
@ -0,0 +1,25 @@
|
||||
select 'ipv4';
|
||||
select port('http://127.0.0.1/');
|
||||
select port('http://127.0.0.1:80');
|
||||
select port('http://127.0.0.1:80/');
|
||||
select port('//127.0.0.1:80/');
|
||||
select port('127.0.0.1:80');
|
||||
select 'hostname';
|
||||
select port('http://foobar.com/');
|
||||
select port('http://foobar.com:80');
|
||||
select port('http://foobar.com:80/');
|
||||
select port('//foobar.com:80/');
|
||||
select port('foobar.com:80');
|
||||
|
||||
--
|
||||
-- Known limitations of domain() (getURLHost())
|
||||
--
|
||||
select 'ipv6';
|
||||
select port('http://[2001:db8::8a2e:370:7334]/');
|
||||
select port('http://[2001:db8::8a2e:370:7334]:80');
|
||||
select port('http://[2001:db8::8a2e:370:7334]:80/');
|
||||
select port('//[2001:db8::8a2e:370:7334]:80/');
|
||||
select port('[2001:db8::8a2e:370:7334]:80');
|
||||
select port('2001:db8::8a2e:370:7334:80');
|
||||
select 'host-no-dot';
|
||||
select port('//foobar:80/');
|
Loading…
Reference in New Issue
Block a user