mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
split FunctionsStringSearch.h (#572)
* split FunctionsStringSearch.h * wip * includes * format
This commit is contained in:
parent
39e8a38fa6
commit
a2d78e674f
@ -6,7 +6,8 @@
|
||||
#include <DB/Columns/ColumnConst.h>
|
||||
#include <DB/Columns/ColumnArray.h>
|
||||
#include <DB/Functions/IFunction.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <DB/Functions/Regexps.h>
|
||||
#include <DB/DataTypes/DataTypeString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,6 @@
|
||||
#include <DB/Common/StringUtils.h>
|
||||
#include <DB/Common/StringView.h>
|
||||
#include <DB/Functions/FunctionsString.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <DB/Functions/FunctionsStringArray.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
@ -1019,54 +1018,4 @@ struct DecodeURLComponentImpl
|
||||
ColumnString::Chars_t & res_data);
|
||||
};
|
||||
|
||||
|
||||
struct NameProtocol { static constexpr auto name = "protocol"; };
|
||||
struct NameDomain { static constexpr auto name = "domain"; };
|
||||
struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; };
|
||||
struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; };
|
||||
struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; };
|
||||
struct NamePath { static constexpr auto name = "path"; };
|
||||
struct NamePathFull { static constexpr auto name = "pathFull"; };
|
||||
struct NameQueryString { static constexpr auto name = "queryString"; };
|
||||
struct NameFragment { static constexpr auto name = "fragment"; };
|
||||
struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; };
|
||||
struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
|
||||
|
||||
struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
|
||||
|
||||
struct NameCutWWW { static constexpr auto name = "cutWWW"; };
|
||||
struct NameCutQueryString { static constexpr auto name = "cutQueryString"; };
|
||||
struct NameCutFragment { static constexpr auto name = "cutFragment"; };
|
||||
struct NameCutQueryStringAndFragment { static constexpr auto name = "cutQueryStringAndFragment"; };
|
||||
|
||||
struct NameExtractURLParameter { static constexpr auto name = "extractURLParameter"; };
|
||||
struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; };
|
||||
|
||||
using FunctionProtocol = FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, NameProtocol> ;
|
||||
using FunctionDomain = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, NameDomain> ;
|
||||
using FunctionDomainWithoutWWW = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true> >, NameDomainWithoutWWW>;
|
||||
using FunctionFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain>;
|
||||
using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, NameTopLevelDomain> ;
|
||||
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath> ;
|
||||
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>, NamePathFull> ;
|
||||
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> ;
|
||||
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> ;
|
||||
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
|
||||
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
|
||||
|
||||
using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;
|
||||
|
||||
using FunctionCutWWW = FunctionStringToString<CutSubstringImpl<ExtractWWW>, NameCutWWW> ;
|
||||
using FunctionCutQueryString = FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, NameCutQueryString> ;
|
||||
using FunctionCutFragment = FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, NameCutFragment> ;
|
||||
using FunctionCutQueryStringAndFragment = FunctionStringToString<CutSubstringImpl<ExtractQueryStringAndFragment<false> >, NameCutQueryStringAndFragment>;
|
||||
|
||||
using FunctionExtractURLParameter = FunctionsStringSearchToString<ExtractURLParameterImpl, NameExtractURLParameter>;
|
||||
using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
|
||||
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
|
||||
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
|
||||
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
|
||||
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
|
||||
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
|
||||
|
||||
}
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <DB/Columns/ColumnConst.h>
|
||||
#include <DB/Common/Volnitsky.h>
|
||||
#include <DB/Functions/IFunction.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <DB/IO/ReadBufferFromMemory.h>
|
||||
|
||||
/** Функции для извлечения параметров визитов.
|
||||
@ -414,21 +413,5 @@ struct ExtractParamToStringImpl
|
||||
};
|
||||
|
||||
|
||||
struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; };
|
||||
struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; };
|
||||
struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; };
|
||||
struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; };
|
||||
struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; };
|
||||
struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; };
|
||||
struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; };
|
||||
|
||||
|
||||
using FunctionVisitParamHas = FunctionsStringSearch<ExtractParamImpl<HasParam>, NameVisitParamHas>;
|
||||
using FunctionVisitParamExtractUInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<UInt64> >, NameVisitParamExtractUInt>;
|
||||
using FunctionVisitParamExtractInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Int64> >, NameVisitParamExtractInt>;
|
||||
using FunctionVisitParamExtractFloat = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Float64> >, NameVisitParamExtractFloat>;
|
||||
using FunctionVisitParamExtractBool = FunctionsStringSearch<ExtractParamImpl<ExtractBool>, NameVisitParamExtractBool>;
|
||||
using FunctionVisitParamExtractRaw = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractRaw>, NameVisitParamExtractRaw>;
|
||||
using FunctionVisitParamExtractString = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractString>, NameVisitParamExtractString>;
|
||||
|
||||
}
|
||||
|
44
dbms/include/DB/Functions/Regexps.h
Normal file
44
dbms/include/DB/Functions/Regexps.h
Normal file
@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include <DB/Common/OptimizedRegularExpression.h>
|
||||
#include <DB/Functions/ObjectPool.h>
|
||||
#include <DB/Functions/likePatternToRegexp.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event RegexpCreated;
|
||||
}
|
||||
|
||||
|
||||
namespace DB {
|
||||
|
||||
|
||||
namespace Regexps
|
||||
{
|
||||
using Regexp = OptimizedRegularExpressionImpl<false>;
|
||||
using Pool = ObjectPoolMap<Regexp, String>;
|
||||
|
||||
template <bool like>
|
||||
inline Regexp createRegexp(const std::string & pattern, int flags) { return {pattern, flags}; }
|
||||
|
||||
template <>
|
||||
inline Regexp createRegexp<true>(const std::string & pattern, int flags) { return {likePatternToRegexp(pattern), flags}; }
|
||||
|
||||
template <bool like, bool no_capture>
|
||||
inline Pool::Pointer get(const std::string & pattern)
|
||||
{
|
||||
/// C++11 has thread-safe function-local statics on most modern compilers.
|
||||
static Pool known_regexps; /// Разные переменные для разных параметров шаблона.
|
||||
|
||||
return known_regexps.get(pattern, [&pattern]
|
||||
{
|
||||
int flags = OptimizedRegularExpression::RE_DOT_NL;
|
||||
if (no_capture)
|
||||
flags |= OptimizedRegularExpression::RE_NO_CAPTURE;
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RegexpCreated);
|
||||
return new Regexp{createRegexp<like>(pattern, flags)};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
63
dbms/include/DB/Functions/likePatternToRegexp.h
Normal file
63
dbms/include/DB/Functions/likePatternToRegexp.h
Normal file
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#include <DB/Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// Переводит выражение LIKE в regexp re2. Например, abc%def -> ^abc.*def$
|
||||
inline String likePatternToRegexp(const String & pattern)
|
||||
{
|
||||
String res;
|
||||
res.reserve(pattern.size() * 2);
|
||||
const char * pos = pattern.data();
|
||||
const char * end = pos + pattern.size();
|
||||
|
||||
if (pos < end && *pos == '%')
|
||||
++pos;
|
||||
else
|
||||
res = "^";
|
||||
|
||||
while (pos < end)
|
||||
{
|
||||
switch (*pos)
|
||||
{
|
||||
case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{':
|
||||
res += '\\';
|
||||
res += *pos;
|
||||
break;
|
||||
case '%':
|
||||
if (pos + 1 != end)
|
||||
res += ".*";
|
||||
else
|
||||
return res;
|
||||
break;
|
||||
case '_':
|
||||
res += ".";
|
||||
break;
|
||||
case '\\':
|
||||
++pos;
|
||||
if (pos == end)
|
||||
res += "\\\\";
|
||||
else
|
||||
{
|
||||
if (*pos == '%' || *pos == '_')
|
||||
res += *pos;
|
||||
else
|
||||
{
|
||||
res += '\\';
|
||||
res += *pos;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
res += *pos;
|
||||
break;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
|
||||
res += '$';
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
#include <DB/Common/hex.h>
|
||||
#include <DB/Functions/FunctionFactory.h>
|
||||
#include <DB/Functions/FunctionsURL.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <common/find_first_symbols.h>
|
||||
|
||||
namespace DB
|
||||
@ -125,6 +126,55 @@ void DecodeURLComponentImpl::vector_fixed(const ColumnString::Chars_t & data, si
|
||||
throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
struct NameProtocol { static constexpr auto name = "protocol"; };
|
||||
struct NameDomain { static constexpr auto name = "domain"; };
|
||||
struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; };
|
||||
struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; };
|
||||
struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; };
|
||||
struct NamePath { static constexpr auto name = "path"; };
|
||||
struct NamePathFull { static constexpr auto name = "pathFull"; };
|
||||
struct NameQueryString { static constexpr auto name = "queryString"; };
|
||||
struct NameFragment { static constexpr auto name = "fragment"; };
|
||||
struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; };
|
||||
struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
|
||||
|
||||
struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
|
||||
|
||||
struct NameCutWWW { static constexpr auto name = "cutWWW"; };
|
||||
struct NameCutQueryString { static constexpr auto name = "cutQueryString"; };
|
||||
struct NameCutFragment { static constexpr auto name = "cutFragment"; };
|
||||
struct NameCutQueryStringAndFragment { static constexpr auto name = "cutQueryStringAndFragment"; };
|
||||
|
||||
struct NameExtractURLParameter { static constexpr auto name = "extractURLParameter"; };
|
||||
struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; };
|
||||
|
||||
using FunctionProtocol = FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, NameProtocol> ;
|
||||
using FunctionDomain = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, NameDomain> ;
|
||||
using FunctionDomainWithoutWWW = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true> >, NameDomainWithoutWWW>;
|
||||
using FunctionFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain>;
|
||||
using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, NameTopLevelDomain> ;
|
||||
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, NamePath> ;
|
||||
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>, NamePathFull> ;
|
||||
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, NameQueryString> ;
|
||||
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, NameFragment> ;
|
||||
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
|
||||
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
|
||||
|
||||
using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;
|
||||
|
||||
using FunctionCutWWW = FunctionStringToString<CutSubstringImpl<ExtractWWW>, NameCutWWW> ;
|
||||
using FunctionCutQueryString = FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, NameCutQueryString> ;
|
||||
using FunctionCutFragment = FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, NameCutFragment> ;
|
||||
using FunctionCutQueryStringAndFragment = FunctionStringToString<CutSubstringImpl<ExtractQueryStringAndFragment<false> >, NameCutQueryStringAndFragment>;
|
||||
|
||||
using FunctionExtractURLParameter = FunctionsStringSearchToString<ExtractURLParameterImpl, NameExtractURLParameter>;
|
||||
using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
|
||||
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
|
||||
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
|
||||
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
|
||||
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
|
||||
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
|
||||
|
||||
|
||||
void registerFunctionsURL(FunctionFactory & factory)
|
||||
{
|
||||
|
@ -1,9 +1,31 @@
|
||||
#include <DB/Functions/FunctionFactory.h>
|
||||
#include <DB/Functions/FunctionsVisitParam.h>
|
||||
#include <DB/Functions/FunctionsStringSearch.h>
|
||||
#include <DB/Functions/FunctionsString.h>
|
||||
#include <DB/Functions/FunctionsURL.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; };
|
||||
struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; };
|
||||
struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; };
|
||||
struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; };
|
||||
struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; };
|
||||
struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; };
|
||||
struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; };
|
||||
|
||||
|
||||
using FunctionVisitParamHas = FunctionsStringSearch<ExtractParamImpl<HasParam>, NameVisitParamHas>;
|
||||
using FunctionVisitParamExtractUInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<UInt64> >, NameVisitParamExtractUInt>;
|
||||
using FunctionVisitParamExtractInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Int64> >, NameVisitParamExtractInt>;
|
||||
using FunctionVisitParamExtractFloat = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Float64> >, NameVisitParamExtractFloat>;
|
||||
using FunctionVisitParamExtractBool = FunctionsStringSearch<ExtractParamImpl<ExtractBool>, NameVisitParamExtractBool>;
|
||||
using FunctionVisitParamExtractRaw = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractRaw>, NameVisitParamExtractRaw>;
|
||||
using FunctionVisitParamExtractString = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractString>, NameVisitParamExtractString>;
|
||||
|
||||
|
||||
|
||||
void registerFunctionsVisitParam(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionVisitParamHas>();
|
||||
|
Loading…
Reference in New Issue
Block a user