diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d4ca884e067..1203201c932 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -453,6 +453,7 @@ add_library (dbms include/DB/Common/randomSeed.h include/DB/Common/unaligned.h include/DB/Common/ThreadPool.h + include/DB/Common/StringView.h include/DB/IO/CompressedStream.h include/DB/IO/ReadBufferFromFileDescriptor.h include/DB/IO/CompressedWriteBuffer.h @@ -627,6 +628,7 @@ add_library (dbms src/Common/getNumberOfPhysicalCPUCores.cpp src/Common/randomSeed.cpp src/Common/ThreadPool.cpp + src/Common/UrlUtils.cpp src/Core/Field.cpp src/Core/FieldVisitors.cpp diff --git a/dbms/include/DB/Common/StringView.h b/dbms/include/DB/Common/StringView.h new file mode 100644 index 00000000000..3da013b08a2 --- /dev/null +++ b/dbms/include/DB/Common/StringView.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +/// A lightweight non-owning read-only view into a subsequence of a string. +class StringView +{ +public: + inline StringView() noexcept + : str(nullptr) + , len(0) + { + } + + constexpr inline StringView(const char* data_, size_t len_) noexcept + : str(data_) + , len(len_) + { + } + + inline StringView(const std::string& str) noexcept + : str(str.data()) + , len(str.size()) + { + } + + inline const char* data() const noexcept + { + return str; + } + + inline bool empty() const noexcept + { + return len == 0; + } + + inline bool null() const noexcept + { + assert(len == 0); + return str == nullptr; + } + + inline size_t size() const noexcept + { + return len; + } + +public: + inline operator bool () const noexcept + { + return !empty(); + } + +private: + const char* str; + const size_t len; +}; + + +/// It creates StringView from literal constant at compile time. +template +constexpr inline StringView MakeStringView(const char (&str)[size]) +{ + return StringView(str, size - 1); +} diff --git a/dbms/include/DB/Common/UrlUtils.h b/dbms/include/DB/Common/UrlUtils.h new file mode 100644 index 00000000000..e5a0844a66c --- /dev/null +++ b/dbms/include/DB/Common/UrlUtils.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +/** Extracts scheme from given url. + * + * If there is no valid scheme then + * empty StringView will be returned. + */ +StringView getUrlScheme(const StringView& url); diff --git a/dbms/include/DB/Functions/FunctionsURL.h b/dbms/include/DB/Functions/FunctionsURL.h index 40c2c2bee81..304d67769a3 100644 --- a/dbms/include/DB/Functions/FunctionsURL.h +++ b/dbms/include/DB/Functions/FunctionsURL.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -66,12 +67,10 @@ struct ExtractProtocol res_data = data; res_size = 0; - Pos pos = data; + StringView scheme = getUrlScheme(StringView(data, size)); + Pos pos = data + scheme.size(); - while (isAlphaNumericASCII(*pos)) - ++pos; - - if (pos == data || pos + 3 >= data + size) + if (scheme.empty() || (data + size) - pos < 4) return; if (pos[0] == ':') diff --git a/dbms/src/Common/UrlUtils.cpp b/dbms/src/Common/UrlUtils.cpp new file mode 100644 index 00000000000..8212b94f9b3 --- /dev/null +++ b/dbms/src/Common/UrlUtils.cpp @@ -0,0 +1,24 @@ +#include +#include + +StringView getUrlScheme(const StringView& url) +{ + // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + const char* p = url.data(); + const char* end = url.data() + url.size(); + + if (isAlphaASCII(*p)) + { + for (++p; p < end; ++p) + { + if (!(isAlphaNumericASCII(*p) || *p == '+' || *p == '-' || *p == '.')) + { + break; + } + } + + return StringView(url.data(), p - url.data()); + } + + return StringView(); +} diff --git a/dbms/tests/queries/0_stateless/00395_url_functions.reference b/dbms/tests/queries/0_stateless/00395_url_functions.reference new file mode 100644 index 00000000000..fb130c42c78 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00395_url_functions.reference @@ -0,0 +1,4 @@ +http +https +svn+ssh + diff --git a/dbms/tests/queries/0_stateless/00395_url_functions.sql b/dbms/tests/queries/0_stateless/00395_url_functions.sql new file mode 100644 index 00000000000..4a66973d1c1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00395_url_functions.sql @@ -0,0 +1,4 @@ +SELECT protocol('http://example.com') AS Scheme; +SELECT protocol('https://example.com/') AS Scheme; +SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme; +SELECT protocol('ftp!://example.com/') AS Scheme;