url's scheme extraction was fixed according to rfc3986.

This commit is contained in:
artpaul 2016-12-09 22:38:15 +05:00
parent ca7d0a4742
commit 544143f5c9
7 changed files with 114 additions and 5 deletions

View File

@ -453,6 +453,7 @@ add_library (dbms
include/DB/Common/randomSeed.h
include/DB/Common/unaligned.h
include/DB/Common/ThreadPool.h
include/DB/Common/StringView.h
include/DB/IO/CompressedStream.h
include/DB/IO/ReadBufferFromFileDescriptor.h
include/DB/IO/CompressedWriteBuffer.h
@ -627,6 +628,7 @@ add_library (dbms
src/Common/getNumberOfPhysicalCPUCores.cpp
src/Common/randomSeed.cpp
src/Common/ThreadPool.cpp
src/Common/UrlUtils.cpp
src/Core/Field.cpp
src/Core/FieldVisitors.cpp

View File

@ -0,0 +1,66 @@
#pragma once
#include <cassert>
#include <string>
/// A lightweight non-owning read-only view into a subsequence of a string.
class StringView
{
public:
inline StringView() noexcept
: str(nullptr)
, len(0)
{
}
constexpr inline StringView(const char* data_, size_t len_) noexcept
: str(data_)
, len(len_)
{
}
inline StringView(const std::string& str) noexcept
: str(str.data())
, len(str.size())
{
}
inline const char* data() const noexcept
{
return str;
}
inline bool empty() const noexcept
{
return len == 0;
}
inline bool null() const noexcept
{
assert(len == 0);
return str == nullptr;
}
inline size_t size() const noexcept
{
return len;
}
public:
inline operator bool () const noexcept
{
return !empty();
}
private:
const char* str;
const size_t len;
};
/// It creates StringView from literal constant at compile time.
template <size_t size>
constexpr inline StringView MakeStringView(const char (&str)[size])
{
return StringView(str, size - 1);
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <DB/Common/StringView.h>
/** Extracts scheme from given url.
*
* If there is no valid scheme then
* empty StringView will be returned.
*/
StringView getUrlScheme(const StringView& url);

View File

@ -3,6 +3,7 @@
#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Common/UrlUtils.h>
#include <DB/Functions/FunctionsString.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <DB/Functions/FunctionsStringArray.h>
@ -66,12 +67,10 @@ struct ExtractProtocol
res_data = data;
res_size = 0;
Pos pos = data;
StringView scheme = getUrlScheme(StringView(data, size));
Pos pos = data + scheme.size();
while (isAlphaNumericASCII(*pos))
++pos;
if (pos == data || pos + 3 >= data + size)
if (scheme.empty() || (data + size) - pos < 4)
return;
if (pos[0] == ':')

View File

@ -0,0 +1,24 @@
#include <DB/Common/StringUtils.h>
#include <DB/Common/UrlUtils.h>
StringView getUrlScheme(const StringView& url)
{
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
const char* p = url.data();
const char* end = url.data() + url.size();
if (isAlphaASCII(*p))
{
for (++p; p < end; ++p)
{
if (!(isAlphaNumericASCII(*p) || *p == '+' || *p == '-' || *p == '.'))
{
break;
}
}
return StringView(url.data(), p - url.data());
}
return StringView();
}

View File

@ -0,0 +1,4 @@
http
https
svn+ssh

View File

@ -0,0 +1,4 @@
SELECT protocol('http://example.com') AS Scheme;
SELECT protocol('https://example.com/') AS Scheme;
SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme;
SELECT protocol('ftp!://example.com/') AS Scheme;