split FunctionsStringSearch.h (#572)

* split FunctionsStringSearch.h

* wip

* includes

* format
This commit is contained in:
proller 2017-03-10 20:52:36 +03:00 committed by alexey-milovidov
parent 39e8a38fa6
commit a2d78e674f
9 changed files with 1291 additions and 1213 deletions

View File

@ -6,7 +6,8 @@
#include <DB/Columns/ColumnConst.h> #include <DB/Columns/ColumnConst.h>
#include <DB/Columns/ColumnArray.h> #include <DB/Columns/ColumnArray.h>
#include <DB/Functions/IFunction.h> #include <DB/Functions/IFunction.h>
#include <DB/Functions/FunctionsStringSearch.h> #include <DB/Functions/Regexps.h>
#include <DB/DataTypes/DataTypeString.h>
namespace DB namespace DB

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,6 @@
#include <DB/Common/StringUtils.h> #include <DB/Common/StringUtils.h>
#include <DB/Common/StringView.h> #include <DB/Common/StringView.h>
#include <DB/Functions/FunctionsString.h> #include <DB/Functions/FunctionsString.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <DB/Functions/FunctionsStringArray.h> #include <DB/Functions/FunctionsStringArray.h>
#ifdef __APPLE__ #ifdef __APPLE__
@ -1019,54 +1018,4 @@ struct DecodeURLComponentImpl
ColumnString::Chars_t & res_data); ColumnString::Chars_t & res_data);
}; };
struct NameProtocol { static constexpr auto name = "protocol"; };
struct NameDomain { static constexpr auto name = "domain"; };
struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; };
struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; };
struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; };
struct NamePath { static constexpr auto name = "path"; };
struct NamePathFull { static constexpr auto name = "pathFull"; };
struct NameQueryString { static constexpr auto name = "queryString"; };
struct NameFragment { static constexpr auto name = "fragment"; };
struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; };
struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
struct NameCutWWW { static constexpr auto name = "cutWWW"; };
struct NameCutQueryString { static constexpr auto name = "cutQueryString"; };
struct NameCutFragment { static constexpr auto name = "cutFragment"; };
struct NameCutQueryStringAndFragment { static constexpr auto name = "cutQueryStringAndFragment"; };
struct NameExtractURLParameter { static constexpr auto name = "extractURLParameter"; };
struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; };
using FunctionProtocol = FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, NameProtocol> ;
using FunctionDomain = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, NameDomain> ;
using FunctionDomainWithoutWWW = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true> >, NameDomainWithoutWWW>;
using FunctionFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain>;
using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, NameTopLevelDomain> ;
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath> ;
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>, NamePathFull> ;
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> ;
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> ;
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;
using FunctionCutWWW = FunctionStringToString<CutSubstringImpl<ExtractWWW>, NameCutWWW> ;
using FunctionCutQueryString = FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, NameCutQueryString> ;
using FunctionCutFragment = FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, NameCutFragment> ;
using FunctionCutQueryStringAndFragment = FunctionStringToString<CutSubstringImpl<ExtractQueryStringAndFragment<false> >, NameCutQueryStringAndFragment>;
using FunctionExtractURLParameter = FunctionsStringSearchToString<ExtractURLParameterImpl, NameExtractURLParameter>;
using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
} }

View File

@ -12,7 +12,6 @@
#include <DB/Columns/ColumnConst.h> #include <DB/Columns/ColumnConst.h>
#include <DB/Common/Volnitsky.h> #include <DB/Common/Volnitsky.h>
#include <DB/Functions/IFunction.h> #include <DB/Functions/IFunction.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <DB/IO/ReadBufferFromMemory.h> #include <DB/IO/ReadBufferFromMemory.h>
/** Функции для извлечения параметров визитов. /** Функции для извлечения параметров визитов.
@ -414,21 +413,5 @@ struct ExtractParamToStringImpl
}; };
struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; };
struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; };
struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; };
struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; };
struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; };
struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; };
struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; };
using FunctionVisitParamHas = FunctionsStringSearch<ExtractParamImpl<HasParam>, NameVisitParamHas>;
using FunctionVisitParamExtractUInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<UInt64> >, NameVisitParamExtractUInt>;
using FunctionVisitParamExtractInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Int64> >, NameVisitParamExtractInt>;
using FunctionVisitParamExtractFloat = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Float64> >, NameVisitParamExtractFloat>;
using FunctionVisitParamExtractBool = FunctionsStringSearch<ExtractParamImpl<ExtractBool>, NameVisitParamExtractBool>;
using FunctionVisitParamExtractRaw = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractRaw>, NameVisitParamExtractRaw>;
using FunctionVisitParamExtractString = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractString>, NameVisitParamExtractString>;
} }

View File

@ -0,0 +1,44 @@
#pragma once
#include <DB/Common/OptimizedRegularExpression.h>
#include <DB/Functions/ObjectPool.h>
#include <DB/Functions/likePatternToRegexp.h>
namespace ProfileEvents
{
extern const Event RegexpCreated;
}
namespace DB {
namespace Regexps
{
using Regexp = OptimizedRegularExpressionImpl<false>;
using Pool = ObjectPoolMap<Regexp, String>;
template <bool like>
inline Regexp createRegexp(const std::string & pattern, int flags) { return {pattern, flags}; }
template <>
inline Regexp createRegexp<true>(const std::string & pattern, int flags) { return {likePatternToRegexp(pattern), flags}; }
template <bool like, bool no_capture>
inline Pool::Pointer get(const std::string & pattern)
{
/// C++11 has thread-safe function-local statics on most modern compilers.
static Pool known_regexps; /// Разные переменные для разных параметров шаблона.
return known_regexps.get(pattern, [&pattern]
{
int flags = OptimizedRegularExpression::RE_DOT_NL;
if (no_capture)
flags |= OptimizedRegularExpression::RE_NO_CAPTURE;
ProfileEvents::increment(ProfileEvents::RegexpCreated);
return new Regexp{createRegexp<like>(pattern, flags)};
});
}
}
}

View File

@ -0,0 +1,63 @@
#pragma once
#include <DB/Core/Types.h>
namespace DB
{
/// Переводит выражение LIKE в regexp re2. Например, abc%def -> ^abc.*def$
inline String likePatternToRegexp(const String & pattern)
{
String res;
res.reserve(pattern.size() * 2);
const char * pos = pattern.data();
const char * end = pos + pattern.size();
if (pos < end && *pos == '%')
++pos;
else
res = "^";
while (pos < end)
{
switch (*pos)
{
case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{':
res += '\\';
res += *pos;
break;
case '%':
if (pos + 1 != end)
res += ".*";
else
return res;
break;
case '_':
res += ".";
break;
case '\\':
++pos;
if (pos == end)
res += "\\\\";
else
{
if (*pos == '%' || *pos == '_')
res += *pos;
else
{
res += '\\';
res += *pos;
}
}
break;
default:
res += *pos;
break;
}
++pos;
}
res += '$';
return res;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
#include <DB/Common/hex.h> #include <DB/Common/hex.h>
#include <DB/Functions/FunctionFactory.h> #include <DB/Functions/FunctionFactory.h>
#include <DB/Functions/FunctionsURL.h> #include <DB/Functions/FunctionsURL.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <common/find_first_symbols.h> #include <common/find_first_symbols.h>
namespace DB namespace DB
@ -125,6 +126,55 @@ void DecodeURLComponentImpl::vector_fixed(const ColumnString::Chars_t & data, si
throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
} }
struct NameProtocol { static constexpr auto name = "protocol"; };
struct NameDomain { static constexpr auto name = "domain"; };
struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; };
struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; };
struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; };
struct NamePath { static constexpr auto name = "path"; };
struct NamePathFull { static constexpr auto name = "pathFull"; };
struct NameQueryString { static constexpr auto name = "queryString"; };
struct NameFragment { static constexpr auto name = "fragment"; };
struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; };
struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
struct NameCutWWW { static constexpr auto name = "cutWWW"; };
struct NameCutQueryString { static constexpr auto name = "cutQueryString"; };
struct NameCutFragment { static constexpr auto name = "cutFragment"; };
struct NameCutQueryStringAndFragment { static constexpr auto name = "cutQueryStringAndFragment"; };
struct NameExtractURLParameter { static constexpr auto name = "extractURLParameter"; };
struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; };
using FunctionProtocol = FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, NameProtocol> ;
using FunctionDomain = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, NameDomain> ;
using FunctionDomainWithoutWWW = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true> >, NameDomainWithoutWWW>;
using FunctionFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain>;
using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, NameTopLevelDomain> ;
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath> ;
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>, NamePathFull> ;
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> ;
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> ;
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;
using FunctionCutWWW = FunctionStringToString<CutSubstringImpl<ExtractWWW>, NameCutWWW> ;
using FunctionCutQueryString = FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, NameCutQueryString> ;
using FunctionCutFragment = FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, NameCutFragment> ;
using FunctionCutQueryStringAndFragment = FunctionStringToString<CutSubstringImpl<ExtractQueryStringAndFragment<false> >, NameCutQueryStringAndFragment>;
using FunctionExtractURLParameter = FunctionsStringSearchToString<ExtractURLParameterImpl, NameExtractURLParameter>;
using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
void registerFunctionsURL(FunctionFactory & factory) void registerFunctionsURL(FunctionFactory & factory)
{ {

View File

@ -1,9 +1,31 @@
#include <DB/Functions/FunctionFactory.h> #include <DB/Functions/FunctionFactory.h>
#include <DB/Functions/FunctionsVisitParam.h> #include <DB/Functions/FunctionsVisitParam.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <DB/Functions/FunctionsString.h>
#include <DB/Functions/FunctionsURL.h>
namespace DB namespace DB
{ {
struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; };
struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; };
struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; };
struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; };
struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; };
struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; };
struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; };
using FunctionVisitParamHas = FunctionsStringSearch<ExtractParamImpl<HasParam>, NameVisitParamHas>;
using FunctionVisitParamExtractUInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<UInt64> >, NameVisitParamExtractUInt>;
using FunctionVisitParamExtractInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Int64> >, NameVisitParamExtractInt>;
using FunctionVisitParamExtractFloat = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Float64> >, NameVisitParamExtractFloat>;
using FunctionVisitParamExtractBool = FunctionsStringSearch<ExtractParamImpl<ExtractBool>, NameVisitParamExtractBool>;
using FunctionVisitParamExtractRaw = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractRaw>, NameVisitParamExtractRaw>;
using FunctionVisitParamExtractString = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractString>, NameVisitParamExtractString>;
void registerFunctionsVisitParam(FunctionFactory & factory) void registerFunctionsVisitParam(FunctionFactory & factory)
{ {
factory.registerFunction<FunctionVisitParamHas>(); factory.registerFunction<FunctionVisitParamHas>();