mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
- use std::experimental::string_view
- rename unquoteUrl to decodeURLComponent - fix code-style
This commit is contained in:
parent
bea52b8631
commit
529f75c8b4
@ -628,7 +628,7 @@ add_library (dbms
|
||||
src/Common/getNumberOfPhysicalCPUCores.cpp
|
||||
src/Common/randomSeed.cpp
|
||||
src/Common/ThreadPool.cpp
|
||||
src/Common/UrlUtils.cpp
|
||||
src/Common/URLUtils.cpp
|
||||
src/Common/hex.cpp
|
||||
|
||||
src/Core/Field.cpp
|
||||
|
@ -1,135 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
/// A lightweight non-owning read-only view into a subsequence of a string.
|
||||
template <
|
||||
typename TChar,
|
||||
typename TTraits = std::char_traits<TChar>
|
||||
>
|
||||
class StringViewImpl
|
||||
{
|
||||
public:
|
||||
using size_type = size_t;
|
||||
using traits_type = TTraits;
|
||||
using value_type = typename TTraits::char_type;
|
||||
|
||||
static constexpr size_type npos = size_type(-1);
|
||||
|
||||
public:
|
||||
inline StringViewImpl() noexcept
|
||||
: str(nullptr)
|
||||
, len(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr inline StringViewImpl(const TChar* data_, size_t len_) noexcept
|
||||
: str(data_)
|
||||
, len(len_)
|
||||
{
|
||||
}
|
||||
|
||||
inline StringViewImpl(const std::basic_string<TChar>& str) noexcept
|
||||
: str(str.data())
|
||||
, len(str.size())
|
||||
{
|
||||
}
|
||||
|
||||
inline TChar at(size_type pos) const
|
||||
{
|
||||
if (pos >= len)
|
||||
throw std::out_of_range("pos must be less than len");
|
||||
return str[pos];
|
||||
}
|
||||
|
||||
inline TChar back() const noexcept
|
||||
{
|
||||
return str[len - 1];
|
||||
}
|
||||
|
||||
inline const TChar* data() const noexcept
|
||||
{
|
||||
return str;
|
||||
}
|
||||
|
||||
inline bool empty() const noexcept
|
||||
{
|
||||
return len == 0;
|
||||
}
|
||||
|
||||
inline TChar front() const noexcept
|
||||
{
|
||||
return str[0];
|
||||
}
|
||||
|
||||
inline bool null() const noexcept
|
||||
{
|
||||
assert(len == 0);
|
||||
return str == nullptr;
|
||||
}
|
||||
|
||||
inline size_type size() const noexcept
|
||||
{
|
||||
return len;
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Returns a substring [pos, pos + count).
|
||||
* If the requested substring extends past the end of the string,
|
||||
* or if count == npos, the returned substring is [pos, size()).
|
||||
*/
|
||||
StringViewImpl substr(size_type pos, size_type count = npos) const
|
||||
{
|
||||
if (pos >= len)
|
||||
throw std::out_of_range("pos must be less than len");
|
||||
if (pos + count >= len || count == npos)
|
||||
return StringViewImpl(str + pos, len - pos);
|
||||
else
|
||||
return StringViewImpl(str + pos, count);
|
||||
}
|
||||
|
||||
public:
|
||||
inline operator bool () const noexcept
|
||||
{
|
||||
return !empty();
|
||||
}
|
||||
|
||||
inline TChar operator [] (size_type pos) const noexcept
|
||||
{
|
||||
return str[pos];
|
||||
}
|
||||
|
||||
inline bool operator < (const StringViewImpl& other) const noexcept
|
||||
{
|
||||
if (len < other.len)
|
||||
return true;
|
||||
if (len > other.len)
|
||||
return false;
|
||||
return TTraits::compare(str, other.str, len) < 0;
|
||||
}
|
||||
|
||||
inline bool operator == (const StringViewImpl& other) const noexcept
|
||||
{
|
||||
if (len == other.len)
|
||||
return TTraits::compare(str, other.str, len) == 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
const TChar* str;
|
||||
size_t len;
|
||||
};
|
||||
#include <experimental/string_view>
|
||||
|
||||
using StringView = std::experimental::string_view;
|
||||
|
||||
/// It creates StringView from literal constant at compile time.
|
||||
template <typename TChar, size_t size>
|
||||
constexpr inline StringViewImpl<TChar> MakeStringView(const TChar (&str)[size])
|
||||
constexpr inline std::experimental::basic_string_view<TChar> makeStringView(const TChar (&str)[size])
|
||||
{
|
||||
return StringViewImpl<TChar>(str, size - 1);
|
||||
return std::experimental::basic_string_view<TChar>(str, size - 1);
|
||||
}
|
||||
|
||||
|
||||
using StringView = StringViewImpl<char>;
|
||||
|
@ -3,12 +3,12 @@
|
||||
#include <DB/Common/StringView.h>
|
||||
|
||||
/// Percent decode of url data.
|
||||
std::string decodeUrl(const StringView& url);
|
||||
std::string decodeUrl(const StringView & url);
|
||||
|
||||
|
||||
/// Extracts scheme from given url.
|
||||
StringView getUrlScheme(const StringView& url);
|
||||
StringView getUrlScheme(const StringView & url);
|
||||
|
||||
|
||||
/// Extracts host from given url.
|
||||
StringView getUrlHost(const StringView& url);
|
||||
StringView getUrlHost(const StringView & url);
|
@ -1,3 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
extern const char* const char2DigitTable;
|
||||
extern const char* const char_to_digit_table;
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <DB/DataTypes/DataTypeString.h>
|
||||
#include <DB/Columns/ColumnString.h>
|
||||
#include <DB/Columns/ColumnConst.h>
|
||||
#include <DB/Common/UrlUtils.h>
|
||||
#include <DB/Common/URLUtils.h>
|
||||
#include <DB/Functions/FunctionsString.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <DB/Functions/FunctionsStringArray.h>
|
||||
@ -60,7 +60,7 @@ using Pos = const char *;
|
||||
|
||||
struct ExtractProtocol
|
||||
{
|
||||
static size_t getReserveLengthForElement() { return MakeStringView("https").size() + 1; }
|
||||
static size_t getReserveLengthForElement() { return makeStringView("https").size() + 1; }
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
@ -971,7 +971,7 @@ struct CutSubstringImpl
|
||||
};
|
||||
|
||||
|
||||
struct UrlDecodeImpl
|
||||
struct DecodeURLComponentImpl
|
||||
{
|
||||
static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
|
||||
ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
|
||||
@ -986,7 +986,7 @@ struct UrlDecodeImpl
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const char * current = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
std::string url = decodeUrl(StringView(current, offsets[i] - prev_offset - 1));
|
||||
std::string url(decodeUrl(StringView(current, offsets[i] - prev_offset - 1)));
|
||||
|
||||
res_data.resize(res_data.size() + url.size() + 1);
|
||||
memcpy(&res_data[res_offset], url.data(), url.size());
|
||||
@ -1022,7 +1022,7 @@ struct NamePathFull { static constexpr auto name = "pathFull"; };
|
||||
struct NameQueryString { static constexpr auto name = "queryString"; };
|
||||
struct NameFragment { static constexpr auto name = "fragment"; };
|
||||
struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; };
|
||||
struct NameUnquoteUrl { static constexpr auto name = "unquoteUrl"; };
|
||||
struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
|
||||
|
||||
struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
|
||||
|
||||
@ -1044,7 +1044,7 @@ using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPath
|
||||
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> ;
|
||||
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> ;
|
||||
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
|
||||
using FunctionUnquoteUrl = FunctionStringToString<UrlDecodeImpl, NameUnquoteUrl>;
|
||||
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
|
||||
|
||||
using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
#include <DB/Common/hex.h>
|
||||
#include <DB/Common/StringUtils.h>
|
||||
#include <DB/Common/UrlUtils.h>
|
||||
#include <DB/Common/URLUtils.h>
|
||||
|
||||
std::string decodeUrl(const StringView& url)
|
||||
std::string decodeUrl(const StringView & url)
|
||||
{
|
||||
const char* p = url.data();
|
||||
const char* st = url.data();
|
||||
@ -14,8 +14,8 @@ std::string decodeUrl(const StringView& url)
|
||||
if (*p != '%' || end - p < 3)
|
||||
continue;
|
||||
|
||||
unsigned char h = char2DigitTable[static_cast<unsigned char>(p[1])];
|
||||
unsigned char l = char2DigitTable[static_cast<unsigned char>(p[2])];
|
||||
unsigned char h = char_to_digit_table[static_cast<unsigned char>(p[1])];
|
||||
unsigned char l = char_to_digit_table[static_cast<unsigned char>(p[2])];
|
||||
|
||||
if (h != 0xFF && l != 0xFF)
|
||||
{
|
||||
@ -38,7 +38,7 @@ std::string decodeUrl(const StringView& url)
|
||||
return result;
|
||||
}
|
||||
|
||||
StringView getUrlScheme(const StringView& url)
|
||||
StringView getUrlScheme(const StringView & url)
|
||||
{
|
||||
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
const char* p = url.data();
|
||||
@ -61,7 +61,7 @@ StringView getUrlScheme(const StringView& url)
|
||||
}
|
||||
|
||||
|
||||
StringView getUrlHost(const StringView& url)
|
||||
StringView getUrlHost(const StringView & url)
|
||||
{
|
||||
StringView scheme = getUrlScheme(url);
|
||||
const char* p = url.data() + scheme.size();
|
@ -1,18 +1,20 @@
|
||||
#include <DB/Common/hex.h>
|
||||
|
||||
const char* const char2DigitTable = ("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff");
|
||||
const char* const char_to_digit_table = (
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
);
|
||||
|
@ -27,7 +27,7 @@ void registerFunctionsURL(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionCutFragment>();
|
||||
factory.registerFunction<FunctionCutQueryStringAndFragment>();
|
||||
factory.registerFunction<FunctionCutURLParameter>();
|
||||
factory.registerFunction<FunctionUnquoteUrl>();
|
||||
factory.registerFunction<FunctionDecodeURLComponent>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -12,4 +12,4 @@ com
|
||||
|
||||
ru
|
||||
ru
|
||||
http://127.0.0.1/?query=hello world+foo+bar
|
||||
/?query=hello world+foo+bar
|
||||
|
@ -15,4 +15,4 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain;
|
||||
SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain;
|
||||
SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain;
|
||||
|
||||
SELECT unquoteUrl('http://127.0.0.1/?query=hello%20world+foo%2Bbar') AS Url;
|
||||
SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path;
|
||||
|
Loading…
Reference in New Issue
Block a user