#include #include #include #include #include namespace DB { /// We assume that size of the dst buf isn't less than src_size. static size_t decodeURL(const char * src, size_t src_size, char * dst) { const char * src_prev_pos = src; const char * src_curr_pos = src; const char * src_end = src + src_size; char * dst_pos = dst; while (true) { src_curr_pos = find_first_symbols<'%'>(src_curr_pos, src_end); if (src_curr_pos == src_end) { break; } else if (src_end - src_curr_pos < 3) { src_curr_pos = src_end; break; } else { unsigned char high = unhex(src_curr_pos[1]); unsigned char low = unhex(src_curr_pos[2]); if (high != 0xFF && low != 0xFF) { unsigned char octet = (high << 4) + low; size_t bytes_to_copy = src_curr_pos - src_prev_pos; memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy); dst_pos += bytes_to_copy; *dst_pos = octet; ++dst_pos; src_prev_pos = src_curr_pos + 3; } src_curr_pos += 3; } } if (src_prev_pos < src_curr_pos) { size_t bytes_to_copy = src_curr_pos - src_prev_pos; memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy); dst_pos += bytes_to_copy; } return dst_pos - dst; } size_t ExtractProtocol::getReserveLengthForElement() { return makeStringView("https").size() + 1; } void ExtractProtocol::execute(Pos data, size_t size, Pos & res_data, size_t & res_size) { res_data = data; res_size = 0; StringView scheme = getURLScheme(StringView(data, size)); Pos pos = data + scheme.size(); if (scheme.empty() || (data + size) - pos < 4) return; if (pos[0] == ':') res_size = pos - data; } void DecodeURLComponentImpl::vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets) { res_data.resize(data.size()); size_t size = offsets.size(); res_offsets.resize(size); size_t prev_offset = 0; size_t res_offset = 0; for (size_t i = 0; i < size; ++i) { const char * src_data = reinterpret_cast(&data[prev_offset]); size_t src_size = offsets[i] - prev_offset; size_t dst_size = decodeURL(src_data, src_size, reinterpret_cast(res_data.data() + res_offset)); res_offset += dst_size; res_offsets[i] = res_offset; prev_offset = offsets[i]; } res_data.resize(res_offset); } void DecodeURLComponentImpl::vector_fixed(const ColumnString::Chars_t & data, size_t n, ColumnString::Chars_t & res_data) { throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); } struct NameProtocol { static constexpr auto name = "protocol"; }; struct NameDomain { static constexpr auto name = "domain"; }; struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; }; struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; }; struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; }; struct NamePath { static constexpr auto name = "path"; }; struct NamePathFull { static constexpr auto name = "pathFull"; }; struct NameQueryString { static constexpr auto name = "queryString"; }; struct NameFragment { static constexpr auto name = "fragment"; }; struct NameQueryStringAndFragment { static constexpr auto name = "queryStringAndFragment"; }; struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; }; struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; }; struct NameCutWWW { static constexpr auto name = "cutWWW"; }; struct NameCutQueryString { static constexpr auto name = "cutQueryString"; }; struct NameCutFragment { static constexpr auto name = "cutFragment"; }; struct NameCutQueryStringAndFragment { static constexpr auto name = "cutQueryStringAndFragment"; }; struct NameExtractURLParameter { static constexpr auto name = "extractURLParameter"; }; struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; }; using FunctionProtocol = FunctionStringToString, NameProtocol>; using FunctionDomain = FunctionStringToString>, NameDomain>; using FunctionDomainWithoutWWW = FunctionStringToString>, NameDomainWithoutWWW>; using FunctionFirstSignificantSubdomain = FunctionStringToString, NameFirstSignificantSubdomain>; using FunctionTopLevelDomain = FunctionStringToString, NameTopLevelDomain>; using FunctionPath = FunctionStringToString, NamePath>; using FunctionPathFull = FunctionStringToString, NamePathFull>; using FunctionQueryString = FunctionStringToString>, NameQueryString>; using FunctionFragment = FunctionStringToString>, NameFragment>; using FunctionQueryStringAndFragment = FunctionStringToString>, NameQueryStringAndFragment>; using FunctionDecodeURLComponent = FunctionStringToString; using FunctionCutToFirstSignificantSubdomain = FunctionStringToString, NameCutToFirstSignificantSubdomain>; using FunctionCutWWW = FunctionStringToString, NameCutWWW>; using FunctionCutQueryString = FunctionStringToString>, NameCutQueryString>; using FunctionCutFragment = FunctionStringToString>, NameCutFragment>; using FunctionCutQueryStringAndFragment = FunctionStringToString>, NameCutQueryStringAndFragment>; using FunctionExtractURLParameter = FunctionsStringSearchToString; using FunctionCutURLParameter = FunctionsStringSearchToString; using FunctionExtractURLParameters = FunctionTokens; using FunctionExtractURLParameters = FunctionTokens; using FunctionURLHierarchy = FunctionTokens; using FunctionURLPathHierarchy = FunctionTokens; using FunctionExtractURLParameterNames = FunctionTokens; void registerFunctionsURL(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); } }