#pragma once #include #include namespace DB { /** URL processing functions. See implementation in separate .cpp files. * All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons. * * Functions for extraction parts of URL. * If URL has nothing like, then empty string is returned. * * domain * domainWithoutWWW * topLevelDomain * protocol * path * queryString * fragment * queryStringAndFragment * * Functions, removing parts from URL. * If URL has nothing like, then it is returned unchanged. * * cutWWW * cutFragment * cutQueryString * cutQueryStringAndFragment * * Extract value of parameter in query string or in fragment identifier. Return empty string, if URL has no such parameter. * If there are many parameters with same name - return value of first one. Value is not %-decoded. * * extractURLParameter(URL, name) * * Extract all parameters from URL in form of array of strings name=value. * extractURLParameters(URL) * * Extract names of all parameters from URL in form of array of strings. * extractURLParameterNames(URL) * * Remove specified parameter from URL. * cutURLParameter(URL, name) * * Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs. * URLHierarchy(URL) */ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; } using Pos = const char *; /** Select part of string using the Extractor. */ template struct ExtractSubstringImpl { static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { size_t size = offsets.size(); res_offsets.resize(size); res_data.reserve(size * Extractor::getReserveLengthForElement()); size_t prev_offset = 0; size_t res_offset = 0; /// Matched part. Pos start; size_t length; for (size_t i = 0; i < size; ++i) { Extractor::execute(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); res_data.resize(res_data.size() + length + 1); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length); res_offset += length + 1; res_data[res_offset - 1] = 0; res_offsets[i] = res_offset; prev_offset = offsets[i]; } } static void constant(const std::string & data, std::string & res_data) { Pos start; size_t length; Extractor::execute(data.data(), data.size(), start, length); res_data.assign(start, length); } static void vector_fixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); } }; /** Delete part of string using the Extractor. */ template struct CutSubstringImpl { static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { res_data.reserve(data.size()); size_t size = offsets.size(); res_offsets.resize(size); size_t prev_offset = 0; size_t res_offset = 0; /// Matched part. Pos start; size_t length; for (size_t i = 0; i < size; ++i) { const char * current = reinterpret_cast(&data[prev_offset]); Extractor::execute(current, offsets[i] - prev_offset - 1, start, length); size_t start_index = start - reinterpret_cast(data.data()); res_data.resize(res_data.size() + offsets[i] - prev_offset - length); memcpySmallAllowReadWriteOverflow15( &res_data[res_offset], current, start - current); memcpySmallAllowReadWriteOverflow15( &res_data[res_offset + start - current], start + length, offsets[i] - start_index - length); res_offset += offsets[i] - prev_offset - length; res_offsets[i] = res_offset; prev_offset = offsets[i]; } } static void constant(const std::string & data, std::string & res_data) { Pos start; size_t length; Extractor::execute(data.data(), data.size(), start, length); res_data.reserve(data.size() - length); res_data.append(data.data(), start); res_data.append(start + length, data.data() + data.size()); } static void vector_fixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); } }; }