Continuation of #6256

This commit is contained in:
Alexey Milovidov 2020-04-15 03:12:47 +03:00
parent 596202b4d6
commit db9760af41
5 changed files with 65 additions and 44 deletions

View File

@ -1,35 +1,15 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h> #include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h" #include "FunctionsURL.h"
#include "path.h"
#include <common/find_symbols.h> #include <common/find_symbols.h>
namespace DB namespace DB
{ {
struct ExtractPath
{
static size_t getReserveLengthForElement() { return 25; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
res_size = 0;
Pos pos = data;
Pos end = pos + size;
if (end != (pos = find_first_symbols<'/'>(pos, end)) && pos[1] == '/' && end != (pos = find_first_symbols<'/'>(pos + 2, end)))
{
Pos query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end);
res_data = pos;
res_size = query_string_or_fragment - res_data;
}
}
};
struct NamePath { static constexpr auto name = "path"; }; struct NamePath { static constexpr auto name = "path"; };
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath>; using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath<false>>, NamePath>;
void registerFunctionPath(FunctionFactory & factory) void registerFunctionPath(FunctionFactory & factory)
{ {

56
src/Functions/URL/path.h Normal file
View File

@ -0,0 +1,56 @@
#pragma once
#include <common/find_symbols.h>
#include <Functions/URL/FunctionsURL.h>
namespace DB
{
template <bool with_query_string>
struct ExtractPath
{
static size_t getReserveLengthForElement() { return 25; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
res_size = 0;
Pos pos = data;
Pos end = pos + size;
/// We support URLs with and without schema:
/// 1. http://host/path
/// 2. host/path
/// We search for first slash and if there is subsequent slash, then skip and repeat search for the next slash.
pos = find_first_symbols<'/'>(pos, end);
if (end == pos)
return;
/// Note that strings are zero-terminated.
bool has_subsequent_slash = pos[1] == '/';
if (has_subsequent_slash)
{
/// Search for next slash.
pos = find_first_symbols<'/'>(pos + 2, end);
if (end == pos)
return;
}
res_data = pos;
if constexpr (with_query_string)
{
res_size = end - res_data;
}
else
{
Pos query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end);
res_size = query_string_or_fragment - res_data;
}
}
};
}

View File

@ -1,33 +1,14 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h> #include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h" #include "FunctionsURL.h"
#include "path.h"
#include <common/find_symbols.h> #include <common/find_symbols.h>
namespace DB namespace DB
{ {
struct ExtractPathFull
{
static size_t getReserveLengthForElement() { return 30; }
static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
res_size = 0;
Pos pos = data;
Pos end = pos + size;
if (end != (pos = find_first_symbols<'/'>(pos, end)) && pos[1] == '/' && end != (pos = find_first_symbols<'/'>(pos + 2, end)))
{
res_data = pos;
res_size = end - res_data;
}
}
};
struct NamePathFull { static constexpr auto name = "pathFull"; }; struct NamePathFull { static constexpr auto name = "pathFull"; };
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>, NamePathFull>; using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPath<true>>, NamePathFull>;
void registerFunctionPathFull(FunctionFactory & factory) void registerFunctionPathFull(FunctionFactory & factory)
{ {

View File

@ -0,0 +1,2 @@
/a/b/c
/?query=hello world+foo+bar

View File

@ -0,0 +1,2 @@
SELECT path('www.example.com:443/a/b/c') AS Path;
SELECT decodeURLComponent(materialize(pathFull('www.example.com/?query=hello%20world+foo%2Bbar'))) AS Path;