- use std::experimental::string_view

- rename unquoteUrl to decodeURLComponent
- fix code-style
This commit is contained in:
artpaul 2016-12-12 11:09:00 +05:00
parent bea52b8631
commit 529f75c8b4
10 changed files with 42 additions and 163 deletions

View File

@ -628,7 +628,7 @@ add_library (dbms
src/Common/getNumberOfPhysicalCPUCores.cpp
src/Common/randomSeed.cpp
src/Common/ThreadPool.cpp
src/Common/UrlUtils.cpp
src/Common/URLUtils.cpp
src/Common/hex.cpp
src/Core/Field.cpp

View File

@ -1,135 +1,12 @@
#pragma once
#include <cassert>
#include <stdexcept>
#include <string>
/// A lightweight non-owning read-only view into a subsequence of a string.
template <
typename TChar,
typename TTraits = std::char_traits<TChar>
>
class StringViewImpl
{
public:
using size_type = size_t;
using traits_type = TTraits;
using value_type = typename TTraits::char_type;
static constexpr size_type npos = size_type(-1);
public:
inline StringViewImpl() noexcept
: str(nullptr)
, len(0)
{
}
constexpr inline StringViewImpl(const TChar* data_, size_t len_) noexcept
: str(data_)
, len(len_)
{
}
inline StringViewImpl(const std::basic_string<TChar>& str) noexcept
: str(str.data())
, len(str.size())
{
}
inline TChar at(size_type pos) const
{
if (pos >= len)
throw std::out_of_range("pos must be less than len");
return str[pos];
}
inline TChar back() const noexcept
{
return str[len - 1];
}
inline const TChar* data() const noexcept
{
return str;
}
inline bool empty() const noexcept
{
return len == 0;
}
inline TChar front() const noexcept
{
return str[0];
}
inline bool null() const noexcept
{
assert(len == 0);
return str == nullptr;
}
inline size_type size() const noexcept
{
return len;
}
public:
/**
* Returns a substring [pos, pos + count).
* If the requested substring extends past the end of the string,
* or if count == npos, the returned substring is [pos, size()).
*/
StringViewImpl substr(size_type pos, size_type count = npos) const
{
if (pos >= len)
throw std::out_of_range("pos must be less than len");
if (pos + count >= len || count == npos)
return StringViewImpl(str + pos, len - pos);
else
return StringViewImpl(str + pos, count);
}
public:
inline operator bool () const noexcept
{
return !empty();
}
inline TChar operator [] (size_type pos) const noexcept
{
return str[pos];
}
inline bool operator < (const StringViewImpl& other) const noexcept
{
if (len < other.len)
return true;
if (len > other.len)
return false;
return TTraits::compare(str, other.str, len) < 0;
}
inline bool operator == (const StringViewImpl& other) const noexcept
{
if (len == other.len)
return TTraits::compare(str, other.str, len) == 0;
return false;
}
private:
const TChar* str;
size_t len;
};
#include <experimental/string_view>
using StringView = std::experimental::string_view;
/// It creates StringView from literal constant at compile time.
template <typename TChar, size_t size>
constexpr inline StringViewImpl<TChar> MakeStringView(const TChar (&str)[size])
constexpr inline std::experimental::basic_string_view<TChar> makeStringView(const TChar (&str)[size])
{
return StringViewImpl<TChar>(str, size - 1);
return std::experimental::basic_string_view<TChar>(str, size - 1);
}
using StringView = StringViewImpl<char>;

View File

@ -3,12 +3,12 @@
#include <DB/Common/StringView.h>
/// Percent decode of url data.
std::string decodeUrl(const StringView& url);
std::string decodeUrl(const StringView & url);
/// Extracts scheme from given url.
StringView getUrlScheme(const StringView& url);
StringView getUrlScheme(const StringView & url);
/// Extracts host from given url.
StringView getUrlHost(const StringView& url);
StringView getUrlHost(const StringView & url);

View File

@ -1,3 +1,3 @@
#pragma once
extern const char* const char2DigitTable;
extern const char* const char_to_digit_table;

View File

@ -3,7 +3,7 @@
#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Common/UrlUtils.h>
#include <DB/Common/URLUtils.h>
#include <DB/Functions/FunctionsString.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <DB/Functions/FunctionsStringArray.h>
@ -60,7 +60,7 @@ using Pos = const char *;
struct ExtractProtocol
{
static size_t getReserveLengthForElement() { return MakeStringView("https").size() + 1; }
static size_t getReserveLengthForElement() { return makeStringView("https").size() + 1; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
@ -971,7 +971,7 @@ struct CutSubstringImpl
};
struct UrlDecodeImpl
struct DecodeURLComponentImpl
{
static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
@ -986,7 +986,7 @@ struct UrlDecodeImpl
for (size_t i = 0; i < size; ++i)
{
const char * current = reinterpret_cast<const char *>(&data[prev_offset]);
std::string url = decodeUrl(StringView(current, offsets[i] - prev_offset - 1));
std::string url(decodeUrl(StringView(current, offsets[i] - prev_offset - 1)));
res_data.resize(res_data.size() + url.size() + 1);
memcpy(&res_data[res_offset], url.data(), url.size());
@ -1022,7 +1022,7 @@ struct NamePathFull { static constexpr auto name = "pathFull"; };
struct NameQueryString { static constexpr auto name = "queryString"; };
struct NameFragment { static constexpr auto name = "fragment"; };
struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; };
struct NameUnquoteUrl { static constexpr auto name = "unquoteUrl"; };
struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
@ -1044,7 +1044,7 @@ using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPath
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> ;
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> ;
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
using FunctionUnquoteUrl = FunctionStringToString<UrlDecodeImpl, NameUnquoteUrl>;
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;

View File

@ -1,8 +1,8 @@
#include <DB/Common/hex.h>
#include <DB/Common/StringUtils.h>
#include <DB/Common/UrlUtils.h>
#include <DB/Common/URLUtils.h>
std::string decodeUrl(const StringView& url)
std::string decodeUrl(const StringView & url)
{
const char* p = url.data();
const char* st = url.data();
@ -14,8 +14,8 @@ std::string decodeUrl(const StringView& url)
if (*p != '%' || end - p < 3)
continue;
unsigned char h = char2DigitTable[static_cast<unsigned char>(p[1])];
unsigned char l = char2DigitTable[static_cast<unsigned char>(p[2])];
unsigned char h = char_to_digit_table[static_cast<unsigned char>(p[1])];
unsigned char l = char_to_digit_table[static_cast<unsigned char>(p[2])];
if (h != 0xFF && l != 0xFF)
{
@ -38,7 +38,7 @@ std::string decodeUrl(const StringView& url)
return result;
}
StringView getUrlScheme(const StringView& url)
StringView getUrlScheme(const StringView & url)
{
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
const char* p = url.data();
@ -61,7 +61,7 @@ StringView getUrlScheme(const StringView& url)
}
StringView getUrlHost(const StringView& url)
StringView getUrlHost(const StringView & url)
{
StringView scheme = getUrlScheme(url);
const char* p = url.data() + scheme.size();

View File

@ -1,18 +1,20 @@
#include <DB/Common/hex.h>
const char* const char2DigitTable = ("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff");
const char* const char_to_digit_table = (
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
);

View File

@ -27,7 +27,7 @@ void registerFunctionsURL(FunctionFactory & factory)
factory.registerFunction<FunctionCutFragment>();
factory.registerFunction<FunctionCutQueryStringAndFragment>();
factory.registerFunction<FunctionCutURLParameter>();
factory.registerFunction<FunctionUnquoteUrl>();
factory.registerFunction<FunctionDecodeURLComponent>();
}
}

View File

@ -12,4 +12,4 @@ com
ru
ru
http://127.0.0.1/?query=hello world+foo+bar
/?query=hello world+foo+bar

View File

@ -15,4 +15,4 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
SELECT unquoteUrl('http://127.0.0.1/?query=hello%20world+foo%2Bbar') AS Url;
SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path;