diff --git a/src/Functions/URL/path.cpp b/src/Functions/URL/path.cpp index 43e21ece00f..2260604c1fc 100644 --- a/src/Functions/URL/path.cpp +++ b/src/Functions/URL/path.cpp @@ -1,35 +1,15 @@ #include #include #include "FunctionsURL.h" +#include "path.h" #include + namespace DB { -struct ExtractPath -{ - static size_t getReserveLengthForElement() { return 25; } - - static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) - { - res_data = data; - res_size = 0; - - Pos pos = data; - Pos end = pos + size; - - if (end != (pos = find_first_symbols<'/'>(pos, end)) && pos[1] == '/' && end != (pos = find_first_symbols<'/'>(pos + 2, end))) - { - Pos query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end); - - res_data = pos; - res_size = query_string_or_fragment - res_data; - } - } -}; - struct NamePath { static constexpr auto name = "path"; }; -using FunctionPath = FunctionStringToString, NamePath>; +using FunctionPath = FunctionStringToString>, NamePath>; void registerFunctionPath(FunctionFactory & factory) { diff --git a/src/Functions/URL/path.h b/src/Functions/URL/path.h new file mode 100644 index 00000000000..f2c5d31a0b0 --- /dev/null +++ b/src/Functions/URL/path.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +template +struct ExtractPath +{ + static size_t getReserveLengthForElement() { return 25; } + + static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) + { + res_data = data; + res_size = 0; + + Pos pos = data; + Pos end = pos + size; + + /// We support URLs with and without schema: + /// 1. http://host/path + /// 2. host/path + /// We search for first slash and if there is subsequent slash, then skip and repeat search for the next slash. + + pos = find_first_symbols<'/'>(pos, end); + if (end == pos) + return; + + /// Note that strings are zero-terminated. + bool has_subsequent_slash = pos[1] == '/'; + if (has_subsequent_slash) + { + /// Search for next slash. + pos = find_first_symbols<'/'>(pos + 2, end); + if (end == pos) + return; + } + + res_data = pos; + + if constexpr (with_query_string) + { + res_size = end - res_data; + } + else + { + Pos query_string_or_fragment = find_first_symbols<'?', '#'>(pos, end); + res_size = query_string_or_fragment - res_data; + } + } +}; + +} diff --git a/src/Functions/URL/pathFull.cpp b/src/Functions/URL/pathFull.cpp index da31737c0f9..661fb298c04 100644 --- a/src/Functions/URL/pathFull.cpp +++ b/src/Functions/URL/pathFull.cpp @@ -1,33 +1,14 @@ #include #include #include "FunctionsURL.h" +#include "path.h" #include namespace DB { -struct ExtractPathFull -{ - static size_t getReserveLengthForElement() { return 30; } - - static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size) - { - res_data = data; - res_size = 0; - - Pos pos = data; - Pos end = pos + size; - - if (end != (pos = find_first_symbols<'/'>(pos, end)) && pos[1] == '/' && end != (pos = find_first_symbols<'/'>(pos + 2, end))) - { - res_data = pos; - res_size = end - res_data; - } - } -}; - struct NamePathFull { static constexpr auto name = "pathFull"; }; -using FunctionPathFull = FunctionStringToString, NamePathFull>; +using FunctionPathFull = FunctionStringToString>, NamePathFull>; void registerFunctionPathFull(FunctionFactory & factory) { diff --git a/tests/queries/0_stateless/01199_url_functions_path_without_schema_yiurule.reference b/tests/queries/0_stateless/01199_url_functions_path_without_schema_yiurule.reference new file mode 100644 index 00000000000..9d75f9c90df --- /dev/null +++ b/tests/queries/0_stateless/01199_url_functions_path_without_schema_yiurule.reference @@ -0,0 +1,2 @@ +/a/b/c +/?query=hello world+foo+bar diff --git a/tests/queries/0_stateless/01199_url_functions_path_without_schema_yiurule.sql b/tests/queries/0_stateless/01199_url_functions_path_without_schema_yiurule.sql new file mode 100644 index 00000000000..14b0f4fd8d5 --- /dev/null +++ b/tests/queries/0_stateless/01199_url_functions_path_without_schema_yiurule.sql @@ -0,0 +1,2 @@ +SELECT path('www.example.com:443/a/b/c') AS Path; +SELECT decodeURLComponent(materialize(pathFull('www.example.com/?query=hello%20world+foo%2Bbar'))) AS Path;