mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Continuation of #6256
This commit is contained in:
parent
596202b4d6
commit
db9760af41
@ -1,35 +1,15 @@
|
|||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/FunctionStringToString.h>
|
#include <Functions/FunctionStringToString.h>
|
||||||
#include "FunctionsURL.h"
|
#include "FunctionsURL.h"
|
||||||
|
#include "path.h"
|
||||||
#include <common/find_symbols.h>
|
#include <common/find_symbols.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
struct ExtractPath
|
|
||||||
{
|
|
||||||
static size_t getReserveLengthForElement() { return 25; }
|
|
||||||
|
|
||||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
|
||||||
{
|
|
||||||
res_data = data;
|
|
||||||
res_size = 0;
|
|
||||||
|
|
||||||
Pos pos = data;
|
|
||||||
Pos end = pos + size;
|
|
||||||
|
|
||||||
if (end != (pos = find_first_symbols<'/'>(pos, end)) && pos[1] == '/' && end != (pos = find_first_symbols<'/'>(pos + 2, end)))
|
|
||||||
{
|
|
||||||
Pos query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end);
|
|
||||||
|
|
||||||
res_data = pos;
|
|
||||||
res_size = query_string_or_fragment - res_data;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct NamePath { static constexpr auto name = "path"; };
|
struct NamePath { static constexpr auto name = "path"; };
|
||||||
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath>;
|
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath<false>>, NamePath>;
|
||||||
|
|
||||||
void registerFunctionPath(FunctionFactory & factory)
|
void registerFunctionPath(FunctionFactory & factory)
|
||||||
{
|
{
|
||||||
|
56
src/Functions/URL/path.h
Normal file
56
src/Functions/URL/path.h
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <common/find_symbols.h>
|
||||||
|
#include <Functions/URL/FunctionsURL.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
template <bool with_query_string>
|
||||||
|
struct ExtractPath
|
||||||
|
{
|
||||||
|
static size_t getReserveLengthForElement() { return 25; }
|
||||||
|
|
||||||
|
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||||
|
{
|
||||||
|
res_data = data;
|
||||||
|
res_size = 0;
|
||||||
|
|
||||||
|
Pos pos = data;
|
||||||
|
Pos end = pos + size;
|
||||||
|
|
||||||
|
/// We support URLs with and without schema:
|
||||||
|
/// 1. http://host/path
|
||||||
|
/// 2. host/path
|
||||||
|
/// We search for first slash and if there is subsequent slash, then skip and repeat search for the next slash.
|
||||||
|
|
||||||
|
pos = find_first_symbols<'/'>(pos, end);
|
||||||
|
if (end == pos)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/// Note that strings are zero-terminated.
|
||||||
|
bool has_subsequent_slash = pos[1] == '/';
|
||||||
|
if (has_subsequent_slash)
|
||||||
|
{
|
||||||
|
/// Search for next slash.
|
||||||
|
pos = find_first_symbols<'/'>(pos + 2, end);
|
||||||
|
if (end == pos)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
res_data = pos;
|
||||||
|
|
||||||
|
if constexpr (with_query_string)
|
||||||
|
{
|
||||||
|
res_size = end - res_data;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Pos query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end);
|
||||||
|
res_size = query_string_or_fragment - res_data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -1,33 +1,14 @@
|
|||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/FunctionStringToString.h>
|
#include <Functions/FunctionStringToString.h>
|
||||||
#include "FunctionsURL.h"
|
#include "FunctionsURL.h"
|
||||||
|
#include "path.h"
|
||||||
#include <common/find_symbols.h>
|
#include <common/find_symbols.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
struct ExtractPathFull
|
|
||||||
{
|
|
||||||
static size_t getReserveLengthForElement() { return 30; }
|
|
||||||
|
|
||||||
static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
|
|
||||||
{
|
|
||||||
res_data = data;
|
|
||||||
res_size = 0;
|
|
||||||
|
|
||||||
Pos pos = data;
|
|
||||||
Pos end = pos + size;
|
|
||||||
|
|
||||||
if (end != (pos = find_first_symbols<'/'>(pos, end)) && pos[1] == '/' && end != (pos = find_first_symbols<'/'>(pos + 2, end)))
|
|
||||||
{
|
|
||||||
res_data = pos;
|
|
||||||
res_size = end - res_data;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct NamePathFull { static constexpr auto name = "pathFull"; };
|
struct NamePathFull { static constexpr auto name = "pathFull"; };
|
||||||
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>, NamePathFull>;
|
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPath<true>>, NamePathFull>;
|
||||||
|
|
||||||
void registerFunctionPathFull(FunctionFactory & factory)
|
void registerFunctionPathFull(FunctionFactory & factory)
|
||||||
{
|
{
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
/a/b/c
|
||||||
|
/?query=hello world+foo+bar
|
@ -0,0 +1,2 @@
|
|||||||
|
SELECT path('www.example.com:443/a/b/c') AS Path;
|
||||||
|
SELECT decodeURLComponent(materialize(pathFull('www.example.com/?query=hello%20world+foo%2Bbar'))) AS Path;
|
Loading…
Reference in New Issue
Block a user