create netloc function

This commit is contained in:
Guillaume Tassery 2020-06-02 15:34:08 +07:00
parent 92ac608447
commit 500a8d22fa
5 changed files with 83 additions and 0 deletions

View File

@ -0,0 +1,17 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "netloc.h"
namespace DB
{
struct NameNetloc { static constexpr auto name = "netloc"; };
using FunctionNetloc = FunctionStringToString<ExtractSubstringImpl<ExtractNetloc>, NameNetloc>;
void registerFunctionNetloc(FunctionFactory & factory)
{
factory.registerFunction<FunctionNetloc>();
}
}

View File

@ -0,0 +1,42 @@
#pragma once
#include "FunctionsURL.h"
#include <common/find_symbols.h>
namespace DB
{
struct ExtractNetloc
{
static size_t getReserveLengthForElement() { return 10; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
res_size = size;
Pos pos = data;
Pos end = pos + size;
pos = find_first_symbols<'/'>(pos, end);
if (end == pos)
return;
/// Note that strings are zero-terminated.
bool has_subsequent_slash = pos[1] == '/';
if (!has_subsequent_slash)
return;
res_data = pos + 2;
res_size = end - res_data;
/// Search for next slash.
pos = find_first_symbols<'/', '?'>(pos + 2, end);
if (end == pos)
return;
res_size = pos - res_data;
}
};
}

View File

@ -26,6 +26,7 @@ void registerFunctionCutFragment(FunctionFactory & factory);
void registerFunctionCutQueryStringAndFragment(FunctionFactory & factory);
void registerFunctionCutURLParameter(FunctionFactory & factory);
void registerFunctionDecodeURLComponent(FunctionFactory & factory);
void registerFunctionNetloc(FunctionFactory & factory);
void registerFunctionsURL(FunctionFactory & factory)
{
@ -52,6 +53,7 @@ void registerFunctionsURL(FunctionFactory & factory)
registerFunctionCutQueryStringAndFragment(factory);
registerFunctionCutURLParameter(factory);
registerFunctionDecodeURLComponent(factory);
registerFunctionNetloc(factory);
}
}

View File

@ -16,6 +16,17 @@ www.example.com
example.com
example.com
example.com
====NETLOC====
paul@www.example.com:80
127.0.0.1:443
127.0.0.1:443
example.ru
example.ru
paul:zozo@example.ru
paul:zozo@example.ru
www.example.com
www.example.com
example.com
====DOMAIN====
com

View File

@ -18,6 +18,17 @@ SELECT domain('example.com') as Host;
SELECT domainWithoutWWW('//paul@www.example.com') AS Host;
SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host;
SELECT '====NETLOC====';
SELECT netloc('http://paul@www.example.com:80/') AS Netloc;
SELECT netloc('http://127.0.0.1:443/') AS Netloc;
SELECT netloc('http://127.0.0.1:443') AS Netloc;
SELECT netloc('svn+ssh://example.ru/?q=hello%20world') AS Netloc;
SELECT netloc('svn+ssh://example.ru/?q=hello%20world') AS Netloc;
SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc;
SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc;
SELECT netloc('//www.example.com') AS Netloc;
SELECT netloc('www.example.com') as Netloc;
SELECT netloc('example.com') as Netloc;
SELECT '====DOMAIN====';
SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain;