mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
url's scheme extraction was fixed according to rfc3986.
This commit is contained in:
parent
ca7d0a4742
commit
544143f5c9
@ -453,6 +453,7 @@ add_library (dbms
|
||||
include/DB/Common/randomSeed.h
|
||||
include/DB/Common/unaligned.h
|
||||
include/DB/Common/ThreadPool.h
|
||||
include/DB/Common/StringView.h
|
||||
include/DB/IO/CompressedStream.h
|
||||
include/DB/IO/ReadBufferFromFileDescriptor.h
|
||||
include/DB/IO/CompressedWriteBuffer.h
|
||||
@ -627,6 +628,7 @@ add_library (dbms
|
||||
src/Common/getNumberOfPhysicalCPUCores.cpp
|
||||
src/Common/randomSeed.cpp
|
||||
src/Common/ThreadPool.cpp
|
||||
src/Common/UrlUtils.cpp
|
||||
|
||||
src/Core/Field.cpp
|
||||
src/Core/FieldVisitors.cpp
|
||||
|
66
dbms/include/DB/Common/StringView.h
Normal file
66
dbms/include/DB/Common/StringView.h
Normal file
@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
/// A lightweight non-owning read-only view into a subsequence of a string.
|
||||
class StringView
|
||||
{
|
||||
public:
|
||||
inline StringView() noexcept
|
||||
: str(nullptr)
|
||||
, len(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr inline StringView(const char* data_, size_t len_) noexcept
|
||||
: str(data_)
|
||||
, len(len_)
|
||||
{
|
||||
}
|
||||
|
||||
inline StringView(const std::string& str) noexcept
|
||||
: str(str.data())
|
||||
, len(str.size())
|
||||
{
|
||||
}
|
||||
|
||||
inline const char* data() const noexcept
|
||||
{
|
||||
return str;
|
||||
}
|
||||
|
||||
inline bool empty() const noexcept
|
||||
{
|
||||
return len == 0;
|
||||
}
|
||||
|
||||
inline bool null() const noexcept
|
||||
{
|
||||
assert(len == 0);
|
||||
return str == nullptr;
|
||||
}
|
||||
|
||||
inline size_t size() const noexcept
|
||||
{
|
||||
return len;
|
||||
}
|
||||
|
||||
public:
|
||||
inline operator bool () const noexcept
|
||||
{
|
||||
return !empty();
|
||||
}
|
||||
|
||||
private:
|
||||
const char* str;
|
||||
const size_t len;
|
||||
};
|
||||
|
||||
|
||||
/// It creates StringView from literal constant at compile time.
|
||||
template <size_t size>
|
||||
constexpr inline StringView MakeStringView(const char (&str)[size])
|
||||
{
|
||||
return StringView(str, size - 1);
|
||||
}
|
10
dbms/include/DB/Common/UrlUtils.h
Normal file
10
dbms/include/DB/Common/UrlUtils.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <DB/Common/StringView.h>
|
||||
|
||||
/** Extracts scheme from given url.
|
||||
*
|
||||
* If there is no valid scheme then
|
||||
* empty StringView will be returned.
|
||||
*/
|
||||
StringView getUrlScheme(const StringView& url);
|
@ -3,6 +3,7 @@
|
||||
#include <DB/DataTypes/DataTypeString.h>
|
||||
#include <DB/Columns/ColumnString.h>
|
||||
#include <DB/Columns/ColumnConst.h>
|
||||
#include <DB/Common/UrlUtils.h>
|
||||
#include <DB/Functions/FunctionsString.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <DB/Functions/FunctionsStringArray.h>
|
||||
@ -66,12 +67,10 @@ struct ExtractProtocol
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
|
||||
Pos pos = data;
|
||||
StringView scheme = getUrlScheme(StringView(data, size));
|
||||
Pos pos = data + scheme.size();
|
||||
|
||||
while (isAlphaNumericASCII(*pos))
|
||||
++pos;
|
||||
|
||||
if (pos == data || pos + 3 >= data + size)
|
||||
if (scheme.empty() || (data + size) - pos < 4)
|
||||
return;
|
||||
|
||||
if (pos[0] == ':')
|
||||
|
24
dbms/src/Common/UrlUtils.cpp
Normal file
24
dbms/src/Common/UrlUtils.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
#include <DB/Common/StringUtils.h>
|
||||
#include <DB/Common/UrlUtils.h>
|
||||
|
||||
StringView getUrlScheme(const StringView& url)
|
||||
{
|
||||
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
const char* p = url.data();
|
||||
const char* end = url.data() + url.size();
|
||||
|
||||
if (isAlphaASCII(*p))
|
||||
{
|
||||
for (++p; p < end; ++p)
|
||||
{
|
||||
if (!(isAlphaNumericASCII(*p) || *p == '+' || *p == '-' || *p == '.'))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return StringView(url.data(), p - url.data());
|
||||
}
|
||||
|
||||
return StringView();
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
http
|
||||
https
|
||||
svn+ssh
|
||||
|
4
dbms/tests/queries/0_stateless/00395_url_functions.sql
Normal file
4
dbms/tests/queries/0_stateless/00395_url_functions.sql
Normal file
@ -0,0 +1,4 @@
|
||||
SELECT protocol('http://example.com') AS Scheme;
|
||||
SELECT protocol('https://example.com/') AS Scheme;
|
||||
SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme;
|
||||
SELECT protocol('ftp!://example.com/') AS Scheme;
|
Loading…
Reference in New Issue
Block a user