ClickHouse/dbms/include/DB/Functions/FunctionsURL.h

#pragma once

#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Common/StringUtils.h>
#include <DB/Common/StringView.h>
#include <DB/Functions/FunctionsString.h>
#include <DB/Functions/FunctionsStringSearch.h>
#include <DB/Functions/FunctionsStringArray.h>

#ifdef __APPLE__
#include <common/apple_memrchr.h>
#endif

namespace DB
{

/** URL processing functions.
  * All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
  *
  * Functions for extraction parts of URL.
  * If URL has nothing like, then empty string is returned.
  *
  *  domain
  *  domainWithoutWWW
  *  topLevelDomain
  *  protocol
  *  path
  *  queryString
  *  fragment
  *  queryStringAndFragment
  *
  * Functions, removing parts from URL.
  * If URL has nothing like, then it is retured unchanged.
  *
  *  cutWWW
  *  cutFragment
  *  cutQueryString
  *  cutQueryStringAndFragment
  *
  * Extract value of parameter in query string or in fragment identifier. Return empty string, if URL has no such parameter.
  * If there are many parameters with same name - return value of first one. Value is not %-decoded.
  *
  *  extractURLParameter(URL, name)
  *
  * Extract all parameters from URL in form of array of strings name=value.
  *  extractURLParameters(URL)
  *
  * Extract names of all parameters from URL in form of array of strings.
  *  extractURLParameterNames(URL)
  *
  * Remove specified parameter from URL.
  *  cutURLParameter(URL, name)
  *
  * Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs.
  *  URLHierarchy(URL)
  */

using Pos = const char *;


/// Extracts scheme from given url.
inline StringView getURLScheme(const StringView & url)
{
	// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
	const char * p = url.data();
	const char * end = url.data() + url.size();

	if (isAlphaASCII(*p))
	{
		for (++p; p < end; ++p)
		{
			if (!(isAlphaNumericASCII(*p) || *p == '+' || *p == '-' || *p == '.'))
			{
				break;
			}
		}

		return StringView(url.data(), p - url.data());
	}

	return StringView();
}


/// Extracts host from given url.
inline StringView getURLHost(const StringView & url)
{
	StringView scheme = getURLScheme(url);
	const char * p = url.data() + scheme.size();
	const char * end = url.data() + url.size();

	// Colon must follows after scheme.
	if (p == end || *p != ':')
		return StringView();
	// Authority component must starts with "//".
	if (end - p < 2 || (p[1] != '/' || p[2] != '/'))
		return StringView();
	else
		p += 3;

	const char * st = p;

	for (; p < end; ++p)
	{
		if (*p == '@')
		{
			st = p + 1;
		}
		else if (*p == ':' || *p == '/' || *p == '?' || *p == '#')
		{
			break;
		}
	}

	return (p == st) ? StringView() : StringView(st, p - st);
}


struct ExtractProtocol
{
	static size_t getReserveLengthForElement();

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size);
};

template <bool without_www>
struct ExtractDomain
{
	static size_t getReserveLengthForElement() { return 15; }

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		StringView host = getURLHost(StringView(data, size));

		if (host.empty())
		{
			res_data = data;
			res_size = 0;
		}
		else
		{
			if (without_www && host.size() > 4 && !strncmp(host.data(), "www.", 4))
				host = host.substr(4);

			res_data = host.data();
			res_size = host.size();
		}
	}
};

struct ExtractFirstSignificantSubdomain
{
	static size_t getReserveLengthForElement() { return 10; }

	static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
	{
		res_data = data;
		res_size = 0;

		Pos tmp;
		size_t domain_length;
		ExtractDomain<true>::execute(data, size, tmp, domain_length);

		if (domain_length == 0)
			return;

		if (out_domain_end)
			*out_domain_end = tmp + domain_length;

		/// cut useless dot
		if (tmp[domain_length - 1] == '.')
			--domain_length;

		res_data = tmp;
		res_size = domain_length;

		auto begin = tmp;
		auto end = begin + domain_length;
		const char * last_3_periods[3]{};
		auto pos = static_cast<const char *>(memchr(begin, '.', domain_length));

		while (pos)
		{
			last_3_periods[2] = last_3_periods[1];
			last_3_periods[1] = last_3_periods[0];
			last_3_periods[0] = pos;
			pos = static_cast<const char *>(memchr(pos + 1, '.', end - pos - 1));
		}

		if (!last_3_periods[0])
			return;

		if (!last_3_periods[1])
		{
			res_size = last_3_periods[0] - begin;
			return;
		}

		if (!last_3_periods[2])
			last_3_periods[2] = begin - 1;

		if (!strncmp(last_3_periods[1] + 1, "com.", 4)		/// Note that in ColumnString every value has zero byte after it.
			|| !strncmp(last_3_periods[1] + 1, "net.", 4)
			|| !strncmp(last_3_periods[1] + 1, "org.", 4)
			|| !strncmp(last_3_periods[1] + 1, "co.", 3))
		{
			res_data += last_3_periods[2] + 1 - begin;
			res_size = last_3_periods[1] - last_3_periods[2] - 1;
			return;
		}

		res_data += last_3_periods[1] + 1 - begin;
		res_size = last_3_periods[0] - last_3_periods[1] - 1;
	}
};

struct CutToFirstSignificantSubdomain
{
	static size_t getReserveLengthForElement() { return 15; }

	static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos tmp_data;
		size_t tmp_length;
		Pos domain_end;
		ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end);

		if (tmp_length == 0)
			return;

		res_data = tmp_data;
		res_size = domain_end - tmp_data;
	}
};

struct ExtractTopLevelDomain
{
	static size_t getReserveLengthForElement() { return 5; }

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		StringView host = getURLHost(StringView(data, size));

		res_data = data;
		res_size = 0;

		if (!host.empty())
		{
			if (host.back() == '.')
				host = StringView(host.data(), host.size() - 1);

			Pos last_dot = reinterpret_cast<Pos>(memrchr(host.data(), '.', host.size()));

			if (!last_dot)
				return;
			/// Для IPv4-адресов не выделяем ничего.
			if (last_dot[1] <= '9')
				return;

			res_data = last_dot + 1;
			res_size = (host.data() + host.size()) - res_data;
		}
	}
};

struct ExtractPath
{
	static size_t getReserveLengthForElement() { return 25; }

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos pos = data;
		Pos end = pos + size;

		if (nullptr != (pos = strchr(data, '/')) && pos[1] == '/' && nullptr != (pos = strchr(pos + 2, '/')))
		{
			Pos query_string_or_fragment = strpbrk(pos, "?#");

			res_data = pos;
			res_size = (query_string_or_fragment ? query_string_or_fragment : end) - res_data;
		}
	}
};

struct ExtractPathFull
{
	static size_t getReserveLengthForElement() { return 30; }

	static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos pos = data;
		Pos end = pos + size;

		if (nullptr != (pos = strchr(data, '/')) && pos[1] == '/' && nullptr != (pos = strchr(pos + 2, '/')))
		{
			res_data = pos;
			res_size = end - res_data;
		}
	}
};

template <bool without_leading_char>
struct ExtractQueryString
{
	static size_t getReserveLengthForElement() { return 10; }

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos pos = data;
		Pos end = pos + size;

		if (nullptr != (pos = strchr(data, '?')))
		{
			Pos fragment = strchr(pos, '#');

			res_data = pos + (without_leading_char ? 1 : 0);
			res_size = (fragment ? fragment : end) - res_data;
		}
	}
};

template <bool without_leading_char>
struct ExtractFragment
{
	static size_t getReserveLengthForElement() { return 10; }

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos pos = data;
		Pos end = pos + size;

		if (nullptr != (pos = strchr(data, '#')))
		{
			res_data = pos + (without_leading_char ? 1 : 0);
			res_size = end - res_data;
		}
	}
};

template <bool without_leading_char>
struct ExtractQueryStringAndFragment
{
	static size_t getReserveLengthForElement() { return 20; }

	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos pos = data;
		Pos end = pos + size;

		if (nullptr != (pos = strchr(data, '?')))
		{
			res_data = pos + (without_leading_char ? 1 : 0);
			res_size = end - res_data;
		}
		else if (nullptr != (pos = strchr(data, '#')))
		{
			res_data = pos;
			res_size = end - res_data;
		}
	}
};

/// С точкой на конце.
struct ExtractWWW
{
	static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
	{
		res_data = data;
		res_size = 0;

		Pos pos = data;
		Pos end = pos + size;

		Pos tmp;
		size_t protocol_length;
		ExtractProtocol::execute(data, size, tmp, protocol_length);
		pos += protocol_length + 3;

		if (pos >= end || pos[-1] != '/' || pos[-2] != '/')
			return;

		if (pos + 4 < end && !strncmp(pos, "www.", 4))
		{
			res_data = pos;
			res_size = 4;
		}
	}
};


struct ExtractURLParameterImpl
{
	static void vector(const ColumnString::Chars_t & data,
					    const ColumnString::Offsets_t & offsets,
					    std::string pattern,
						ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
	{
		res_data.reserve(data.size()  / 5);
		res_offsets.resize(offsets.size());

		pattern += '=';
		const char * param_str = pattern.c_str();
		size_t param_len = pattern.size();

		size_t prev_offset = 0;
		size_t res_offset = 0;

		for (size_t i = 0; i < offsets.size(); ++i)
		{
			size_t cur_offset = offsets[i];

			const char * str = reinterpret_cast<const char *>(&data[prev_offset]);

			const char * pos = nullptr;
			const char * begin = strpbrk(str, "?#");
			if (begin != nullptr)
			{
				pos = begin + 1;
				while (true)
				{
					pos = strstr(pos, param_str);

					if (pos == nullptr)
						break;

					if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&')
					{
						pos += param_len;
						continue;
					}
					else
					{
						pos += param_len;
						break;
					}
				}
			}

			if (pos != nullptr)
			{
				const char * end = strpbrk(pos, "&#");
				if (end == nullptr)
					end = pos + strlen(pos);

				res_data.resize(res_offset + (end - pos) + 1);
				memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], pos, end - pos);
				res_offset += end - pos;
			}
			else
			{
				res_data.resize(res_offset + 1);
			}

			res_data[res_offset] = 0;
			++res_offset;
			res_offsets[i] = res_offset;

			prev_offset = cur_offset;
		}
	}
};


struct CutURLParameterImpl
{
	static void vector(const ColumnString::Chars_t & data,
					    const ColumnString::Offsets_t & offsets,
					    std::string pattern,
						ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
	{
		res_data.reserve(data.size());
		res_offsets.resize(offsets.size());

		pattern += '=';
		const char * param_str = pattern.c_str();
		size_t param_len = pattern.size();

		size_t prev_offset = 0;
		size_t res_offset = 0;

		for (size_t i = 0; i < offsets.size(); ++i)
		{
			size_t cur_offset = offsets[i];

			const char * url_begin = reinterpret_cast<const char *>(&data[prev_offset]);
			const char * url_end = reinterpret_cast<const char *>(&data[cur_offset]) - 1;
			const char * begin_pos = url_begin;
			const char * end_pos = begin_pos;

			do
			{
				const char * begin = strpbrk(url_begin, "?#");
				if (begin == nullptr)
					break;

				const char * pos = strstr(begin + 1, param_str);
				if (pos == nullptr)
					break;

				if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&')
				{
					pos = nullptr;
					break;
				}

				begin_pos = pos;
				end_pos = begin_pos + param_len;

				/// Пропустим значение.
				while (*end_pos && *end_pos != '&' && *end_pos != '#')
					++end_pos;

				/// Захватим '&' до или после параметра.
				if (*end_pos == '&')
					++end_pos;
				else if (begin_pos[-1] == '&')
					--begin_pos;
			} while (false);

			size_t cut_length = (url_end - url_begin) - (end_pos - begin_pos);
			res_data.resize(res_offset + cut_length + 1);
			memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], url_begin, begin_pos - url_begin);
			memcpySmallAllowReadWriteOverflow15(&res_data[res_offset] + (begin_pos - url_begin), end_pos, url_end - end_pos);
			res_offset += cut_length + 1;
			res_data[res_offset - 1] = 0;
			res_offsets[i] = res_offset;

			prev_offset = cur_offset;
		}
	}
};


class ExtractURLParametersImpl
{
private:
	Pos pos;
	Pos end;
	bool first;

public:
	static constexpr auto name = "extractURLParameters";
	static String getName() { return name; }

	static size_t getNumberOfArguments() { return 1; }

	static void checkArguments(const DataTypes & arguments)
	{
		if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
			throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
			ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
	}

	void init(Block & block, const ColumnNumbers & arguments) {}

	/// Возвращает позицию аргумента, являющегося столбцом строк
	size_t getStringsArgumentPosition()
	{
		return 0;
	}

	/// Вызывается для каждой следующей строки.
	void set(Pos pos_, Pos end_)
	{
		pos = pos_;
		end = end_;
		first = true;
	}

	/// Получить следующий токен, если есть, или вернуть false.
	bool get(Pos & token_begin, Pos & token_end)
	{
		if (pos == nullptr)
			return false;

		if (first)
		{
			first = false;
			pos = strpbrk(pos, "?#");
			if (pos == nullptr)
				return false;
			++pos;
		}

		while (true)
		{
			token_begin = pos;
			pos = strpbrk(pos, "=&#?");
			if (pos == nullptr)
				return false;

			if (*pos == '?')
			{
				++pos;
				continue;
			}

			break;
		}

		if (*pos == '&' || *pos == '#')
		{
			token_end = pos++;
		}
		else
		{
			++pos;
			pos = strpbrk(pos, "&#");
			if (pos == nullptr)
				token_end = end;
			else
				token_end = pos++;
		}

		return true;
	}
};

class ExtractURLParameterNamesImpl
{
private:
	Pos pos;
	Pos end;
	bool first;

public:
	static constexpr auto name = "extractURLParameterNames";
	static String getName() { return name; }

	static size_t getNumberOfArguments() { return 1; }

	static void checkArguments(const DataTypes & arguments)
	{
		if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
			throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
			ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
	}

	/// Возвращает позицию аргумента, являющегося столбцом строк
	size_t getStringsArgumentPosition()
	{
		return 0;
	}

	void init(Block & block, const ColumnNumbers & arguments) {}

	/// Вызывается для каждой следующей строки.
	void set(Pos pos_, Pos end_)
	{
		pos = pos_;
		end = end_;
		first = true;
	}

	/// Получить следующий токен, если есть, или вернуть false.
	bool get(Pos & token_begin, Pos & token_end)
	{
		if (pos == nullptr)
			return false;

		if (first)
		{
			first = false;
			pos = strpbrk(pos, "?#");
		}
		else
			pos = strpbrk(pos, "&#");

		if (pos == nullptr)
			return false;
		++pos;

		while (true)
		{
			token_begin = pos;

			pos = strpbrk(pos, "=&#?");
			if (pos == nullptr)
				return false;
			else
				token_end = pos;

			if (*pos == '?')
			{
				++pos;
				continue;
			}

			break;
		}

		return true;
	}
};

class URLHierarchyImpl
{
private:
	Pos begin;
	Pos pos;
	Pos end;

public:
	static constexpr auto name = "URLHierarchy";
	static String getName() { return name; }

	static size_t getNumberOfArguments() { return 1; }

	static void checkArguments(const DataTypes & arguments)
	{
		if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
			throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
			ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
	}

	void init(Block & block, const ColumnNumbers & arguments) {}

	/// Возвращает позицию аргумента, являющегося столбцом строк
	size_t getStringsArgumentPosition()
	{
		return 0;
	}

	/// Вызывается для каждой следующей строки.
	void set(Pos pos_, Pos end_)
	{
		begin = pos = pos_;
		end = end_;
	}

	/// Получить следующий токен, если есть, или вернуть false.
	bool get(Pos & token_begin, Pos & token_end)
	{
		/// Код из URLParser.

		if (pos == end)
			return false;

		if (pos == begin)
		{
			/// Распарсим всё, что идёт до пути

			/// Предположим, что протокол уже переведён в нижний регистр.
			while (pos < end && ((*pos > 'a' && *pos < 'z') || (*pos > '0' && *pos < '9')))
				++pos;

			/** Будем вычислять иерархию только для URL-ов, в которых есть протокол, и после него идут два слеша.
			 * (http, file - подходят, mailto, magnet - не подходят), и после двух слешей ещё хоть что-нибудь есть
			 * Для остальных просто вернём полный URL как единственный элемент иерархии.
			 */
			if (pos == begin || pos == end || !(*pos++ == ':' && pos < end && *pos++ == '/' && pos < end && *pos++ == '/' && pos < end))
			{
				pos = end;
				token_begin = begin;
				token_end = end;
				return true;
			}

			/// Доменом для простоты будем считать всё, что после протокола и двух слешей, до следующего слеша или до ? или до #
			while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
				++pos;

			if (pos != end)
				++pos;

			token_begin = begin;
			token_end = pos;

			return true;
		}

		/// Идём до следующего / или ? или #, пропуская все те, что вначале.
		while (pos < end && (*pos == '/' || *pos == '?' || *pos == '#'))
			++pos;
		if (pos == end)
			return false;
		while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
			++pos;

		if (pos != end)
			++pos;

		token_begin = begin;
		token_end = pos;

		return true;
	}
};


class URLPathHierarchyImpl
{
private:
	Pos begin;
	Pos pos;
	Pos end;
	Pos start;

public:
	static constexpr auto name = "URLPathHierarchy";
	static String getName() { return name; }

	static size_t getNumberOfArguments() { return 1; }

	static void checkArguments(const DataTypes & arguments)
	{
		if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
			throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
			ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
	}

	void init(Block & block, const ColumnNumbers & arguments) {}

	/// Возвращает позицию аргумента, являющегося столбцом строк
	size_t getStringsArgumentPosition()
	{
		return 0;
	}

	/// Вызывается для каждой следующей строки.
	void set(Pos pos_, Pos end_)
	{
		begin = pos = pos_;
		start = begin;
		end = end_;
	}

	/// Получить следующий токен, если есть, или вернуть false.
	bool get(Pos & token_begin, Pos & token_end)
	{
		/// Код из URLParser.

		if (pos == end)
			return false;

		if (pos == begin)
		{
			/// Распарсим всё, что идёт до пути

			/// Предположим, что протокол уже переведён в нижний регистр.
			while (pos < end && ((*pos > 'a' && *pos < 'z') || (*pos > '0' && *pos < '9')))
				++pos;

			/** Будем вычислять иерархию только для URL-ов, в которых есть протокол, и после него идут два слеша.
			 * (http, file - подходят, mailto, magnet - не подходят), и после двух слешей ещё хоть что-нибудь есть
			 * Для остальных просто вернём пустой массив.
			 */
			if (pos == begin || pos == end || !(*pos++ == ':' && pos < end && *pos++ == '/' && pos < end && *pos++ == '/' && pos < end))
			{
				pos = end;
				return false;
			}

			/// Доменом для простоты будем считать всё, что после протокола и двух слешей, до следующего слеша или до ? или до #
			while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
				++pos;

			start = pos;

			if (pos != end)
				++pos;
		}

		/// Идём до следующего / или ? или #, пропуская все те, что вначале.
		while (pos < end && (*pos == '/' || *pos == '?' || *pos == '#'))
			++pos;
		if (pos == end)
			return false;
		while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
			++pos;

		if (pos != end)
			++pos;

		token_begin = start;
		token_end = pos;

		return true;
	}
};


/** Выделить кусок строки, используя Extractor.
  */
template <typename Extractor>
struct ExtractSubstringImpl
{
	static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
		ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
	{
		size_t size = offsets.size();
		res_offsets.resize(size);
		res_data.reserve(size * Extractor::getReserveLengthForElement());

		size_t prev_offset = 0;
		size_t res_offset = 0;

		/// Выделенный кусок.
		Pos start;
		size_t length;

		for (size_t i = 0; i < size; ++i)
		{
			Extractor::execute(reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);

			res_data.resize(res_data.size() + length + 1);
			memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length);
			res_offset += length + 1;
			res_data[res_offset - 1] = 0;

			res_offsets[i] = res_offset;
			prev_offset = offsets[i];
		}
	}

	static void constant(const std::string & data,
		std::string & res_data)
	{
		Pos start;
		size_t length;
		Extractor::execute(data.data(), data.size(), start, length);
		res_data.assign(start, length);
	}

	static void vector_fixed(const ColumnString::Chars_t & data, size_t n,
		ColumnString::Chars_t & res_data)
	{
		throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
	}
};


/** Удалить кусок строки, используя Extractor.
  */
template <typename Extractor>
struct CutSubstringImpl
{
	static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
		ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
	{
		res_data.reserve(data.size());
		size_t size = offsets.size();
		res_offsets.resize(size);

		size_t prev_offset = 0;
		size_t res_offset = 0;

		/// Выделенный кусок.
		Pos start;
		size_t length;

		for (size_t i = 0; i < size; ++i)
		{
			const char * current = reinterpret_cast<const char *>(&data[prev_offset]);
			Extractor::execute(current, offsets[i] - prev_offset - 1, start, length);
			size_t start_index = start - reinterpret_cast<const char *>(&data[0]);

			res_data.resize(res_data.size() + offsets[i] - prev_offset - length);
			memcpySmallAllowReadWriteOverflow15(
				&res_data[res_offset], current, start - current);
			memcpySmallAllowReadWriteOverflow15(
				&res_data[res_offset + start - current], start + length, offsets[i] - start_index - length);
			res_offset += offsets[i] - prev_offset - length;

			res_offsets[i] = res_offset;
			prev_offset = offsets[i];
		}
	}

	static void constant(const std::string & data,
		std::string & res_data)
	{
		Pos start;
		size_t length;
		Extractor::execute(data.data(), data.size(), start, length);
		res_data.reserve(data.size() - length);
		res_data.append(data.data(), start);
		res_data.append(start + length, data.data() + data.size());
	}

	static void vector_fixed(const ColumnString::Chars_t & data, size_t n,
		ColumnString::Chars_t & res_data)
	{
		throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
	}
};


/// Percent decode of url data.
struct DecodeURLComponentImpl
{
	static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
		ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets);

	static void constant(const std::string & data,
		std::string & res_data);

	static void vector_fixed(const ColumnString::Chars_t & data, size_t n,
		ColumnString::Chars_t & res_data);
};


struct NameProtocol 					{ static constexpr auto name = "protocol"; };
struct NameDomain 						{ static constexpr auto name = "domain"; };
struct NameDomainWithoutWWW 			{ static constexpr auto name = "domainWithoutWWW"; };
struct NameFirstSignificantSubdomain	{ static constexpr auto name = "firstSignificantSubdomain"; };
struct NameTopLevelDomain 				{ static constexpr auto name = "topLevelDomain"; };
struct NamePath 						{ static constexpr auto name = "path"; };
struct NamePathFull						{ static constexpr auto name = "pathFull"; };
struct NameQueryString					{ static constexpr auto name = "queryString"; };
struct NameFragment 					{ static constexpr auto name = "fragment"; };
struct NameQueryStringAndFragment		{ static constexpr auto name = "queryStringAndFragment"; };
struct NameDecodeURLComponent           { static constexpr auto name = "decodeURLComponent"; };

struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };

struct NameCutWWW 						{ static constexpr auto name = "cutWWW"; };
struct NameCutQueryString				{ static constexpr auto name = "cutQueryString"; };
struct NameCutFragment 					{ static constexpr auto name = "cutFragment"; };
struct NameCutQueryStringAndFragment 	{ static constexpr auto name = "cutQueryStringAndFragment"; };

struct NameExtractURLParameter			{ static constexpr auto name = "extractURLParameter"; };
struct NameCutURLParameter 				{ static constexpr auto name = "cutURLParameter"; };

using FunctionProtocol = FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, 				NameProtocol>	 	;
using FunctionDomain = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, 		NameDomain>	 		;
using FunctionDomainWithoutWWW = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true>  >, 		NameDomainWithoutWWW>;
using FunctionFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain>;
using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, 		NameTopLevelDomain>	;
using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, 					NamePath>			;
using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>,				NamePathFull>		;
using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, 	NameQueryString>	;
using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, 		NameFragment>		;
using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;

using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;

using FunctionCutWWW = FunctionStringToString<CutSubstringImpl<ExtractWWW>, 						NameCutWWW>			;
using FunctionCutQueryString = FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, 		NameCutQueryString>	;
using FunctionCutFragment = FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, 			NameCutFragment>	;
using FunctionCutQueryStringAndFragment = FunctionStringToString<CutSubstringImpl<ExtractQueryStringAndFragment<false> >, NameCutQueryStringAndFragment>;

using FunctionExtractURLParameter = FunctionsStringSearchToString<ExtractURLParameterImpl, NameExtractURLParameter>;
using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;

}
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								#pragma once
 								#include <DB/DataTypes/DataTypeString.h>
 								#include <DB/Columns/ColumnString.h>
 								#include <DB/Columns/ColumnConst.h>
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+								#include <DB/Common/StringUtils.h>
 								#include <DB/Common/StringView.h>
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								#include <DB/Functions/FunctionsString.h>
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+								#include <DB/Functions/FunctionsStringSearch.h>
 								#include <DB/Functions/FunctionsStringArray.h>
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												Make it compilable on OS X

It's still hackish and dirty, but server and client compies.

Server starts, but throwes meaningless exception on any query.

Client seems to be working fine.

Linux compilation might (but shouldn't) be broken (not tested).

											
										
										
											2016-10-26 22:27:38 +00:00
+								#ifdef __APPLE__
 								#include <common/apple_memrchr.h>
 								#endif
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
 								namespace DB
 								{
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								/** URL processing functions.
 								  * All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Functions for extraction parts of URL.
 								  * If URL has nothing like, then empty string is returned.
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
+								  *
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								  *  domain
 								  *  domainWithoutWWW
 								  *  topLevelDomain
 								  *  protocol
 								  *  path
 								  *  queryString
 								  *  fragment
 								  *  queryStringAndFragment
 								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Functions, removing parts from URL.
 								  * If URL has nothing like, then it is retured unchanged.
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
+								  *
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								  *  cutWWW
 								  *  cutFragment
 								  *  cutQueryString
 								  *  cutQueryStringAndFragment
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-21 03:45:48 +00:00
+								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Extract value of parameter in query string or in fragment identifier. Return empty string, if URL has no such parameter.
 								  * If there are many parameters with same name - return value of first one. Value is not %-decoded.
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-21 03:45:48 +00:00
+								  *
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+								  *  extractURLParameter(URL, name)
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
+								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Extract all parameters from URL in form of array of strings name=value.
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+								  *  extractURLParameters(URL)
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Extract names of all parameters from URL in form of array of strings.
-												Fixed coding style. Changed extractAll behaviour [#CONV-8285]

Now extractAll works without overcrossing.
New version:
#extractAll('abba', 'abbabba)
>['abba']

Old one:
#extractAll('abba', 'abbabba)
>['abba', 'abba']


											
										
										
											2013-08-05 08:40:56 +00:00
+								  *  extractURLParameterNames(URL)
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
+								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Remove specified parameter from URL.
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+								  *  cutURLParameter(URL, name)
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
+								  *
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+								  * Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs.
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+								  *  URLHierarchy(URL)
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								  */
-												Using std::shared_ptr for data types [#METR-21503].

											
										
										
											2016-05-28 10:35:44 +00:00
+								using Pos = const char *;
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
 								/// Extracts scheme from given url.
-												style

											
										
										
											2016-12-15 12:33:50 +00:00
+								inline StringView getURLScheme(const StringView & url)
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								{
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+									// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-												style

											
										
										
											2016-12-15 18:59:07 +00:00
+									const char * p = url.data();
 									const char * end = url.data() + url.size();
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+									if (isAlphaASCII(*p))
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									{
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+										for (++p; p < end; ++p)
 										{
 											if (!(isAlphaNumericASCII(*p) || *p == '+' || *p == '-' || *p == '.'))
 											{
 												break;
 											}
 										}
 										return StringView(url.data(), p - url.data());
 									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+									return StringView();
 								}
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+								/// Extracts host from given url.
-												style

											
										
										
											2016-12-15 12:33:50 +00:00
+								inline StringView getURLHost(const StringView & url)
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+								{
-												style

											
										
										
											2016-12-15 12:33:50 +00:00
+									StringView scheme = getURLScheme(url);
-												style

											
										
										
											2016-12-15 18:59:07 +00:00
+									const char * p = url.data() + scheme.size();
 									const char * end = url.data() + url.size();
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
 									// Colon must follows after scheme.
 									if (p == end || *p != ':')
 										return StringView();
 									// Authority component must starts with "//".
 									if (end - p < 2 || (p[1] != '/' || p[2] != '/'))
 										return StringView();
 									else
 										p += 3;
-												style

											
										
										
											2016-12-15 18:59:07 +00:00
+									const char * st = p;
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
 									for (; p < end; ++p)
 									{
 										if (*p == '@')
 										{
 											st = p + 1;
 										}
 										else if (*p == ':' || *p == '/' || *p == '?' || *p == '#')
 										{
 											break;
 										}
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									}
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
 									return (p == st) ? StringView() : StringView(st, p - st);
 								}
 								struct ExtractProtocol
 								{
 									static size_t getReserveLengthForElement();
 									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size);
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								};
 								template <bool without_www>
 								struct ExtractDomain
 								{
 									static size_t getReserveLengthForElement() { return 15; }
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
-												style

											
										
										
											2016-12-15 12:33:50 +00:00
+										StringView host = getURLHost(StringView(data, size));
-												host extraction functionality was moved to UrlUtils

											
										
										
											2016-12-09 19:31:16 +00:00
 										if (host.empty())
 										{
 											res_data = data;
 											res_size = 0;
 										}
 										else
 										{
 											if (without_www && host.size() > 4 && !strncmp(host.data(), "www.", 4))
 												host = host.substr(4);
 											res_data = host.data();
 											res_size = host.size();
 										}
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									}
 								};
-												dbms: implement firstSignificantSubdomain, cutToFirstSignificantSubdomain. [#METR-13151]

											
										
										
											2014-10-27 15:16:11 +00:00
+								struct ExtractFirstSignificantSubdomain
 								{
 									static size_t getReserveLengthForElement() { return 10; }
 									static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos tmp;
 										size_t domain_length;
 										ExtractDomain<true>::execute(data, size, tmp, domain_length);
 										if (domain_length == 0)
 											return;
 										if (out_domain_end)
 											*out_domain_end = tmp + domain_length;
 										/// cut useless dot
 										if (tmp[domain_length - 1] == '.')
 											--domain_length;
 										res_data = tmp;
 										res_size = domain_length;
 										auto begin = tmp;
 										auto end = begin + domain_length;
 										const char * last_3_periods[3]{};
 										auto pos = static_cast<const char *>(memchr(begin, '.', domain_length));
 										while (pos)
 										{
 											last_3_periods[2] = last_3_periods[1];
 											last_3_periods[1] = last_3_periods[0];
 											last_3_periods[0] = pos;
 											pos = static_cast<const char *>(memchr(pos + 1, '.', end - pos - 1));
 										}
 										if (!last_3_periods[0])
 											return;
 										if (!last_3_periods[1])
 										{
 											res_size = last_3_periods[0] - begin;
 											return;
 										}
 										if (!last_3_periods[2])
 											last_3_periods[2] = begin - 1;
-												Fixed error in 'firstSignificantSubdomain' function [#METR-20000].

											
										
										
											2016-10-20 05:21:49 +00:00
+										if (!strncmp(last_3_periods[1] + 1, "com.", 4)		/// Note that in ColumnString every value has zero byte after it.
 											|| !strncmp(last_3_periods[1] + 1, "net.", 4)
 											|| !strncmp(last_3_periods[1] + 1, "org.", 4)
 											|| !strncmp(last_3_periods[1] + 1, "co.", 3))
-												dbms: implement firstSignificantSubdomain, cutToFirstSignificantSubdomain. [#METR-13151]

											
										
										
											2014-10-27 15:16:11 +00:00
+										{
 											res_data += last_3_periods[2] + 1 - begin;
 											res_size = last_3_periods[1] - last_3_periods[2] - 1;
 											return;
 										}
 										res_data += last_3_periods[1] + 1 - begin;
 										res_size = last_3_periods[0] - last_3_periods[1] - 1;
 									}
 								};
 								struct CutToFirstSignificantSubdomain
 								{
 									static size_t getReserveLengthForElement() { return 15; }
 									static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos tmp_data;
 										size_t tmp_length;
 										Pos domain_end;
 										ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end);
 										if (tmp_length == 0)
 											return;
 										res_data = tmp_data;
 										res_size = domain_end - tmp_data;
 									}
 								};
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								struct ExtractTopLevelDomain
 								{
 									static size_t getReserveLengthForElement() { return 5; }
 									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
-												style

											
										
										
											2016-12-15 12:33:50 +00:00
+										StringView host = getURLHost(StringView(data, size));
-												fix extraction of TLD from fqdn

											
										
										
											2016-12-09 22:49:21 +00:00
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										res_data = data;
 										res_size = 0;
-												fix extraction of TLD from fqdn

											
										
										
											2016-12-09 22:49:21 +00:00
+										if (!host.empty())
 										{
 											if (host.back() == '.')
 												host = StringView(host.data(), host.size() - 1);
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												fix extraction of TLD from fqdn

											
										
										
											2016-12-09 22:49:21 +00:00
+											Pos last_dot = reinterpret_cast<Pos>(memrchr(host.data(), '.', host.size()));
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												fix extraction of TLD from fqdn

											
										
										
											2016-12-09 22:49:21 +00:00
+											if (!last_dot)
 												return;
 											/// Для IPv4-адресов не выделяем ничего.
 											if (last_dot[1] <= '9')
 												return;
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												fix extraction of TLD from fqdn

											
										
										
											2016-12-09 22:49:21 +00:00
+											res_data = last_dot + 1;
 											res_size = (host.data() + host.size()) - res_data;
 										}
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									}
 								};
 								struct ExtractPath
 								{
 									static size_t getReserveLengthForElement() { return 25; }
 									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos pos = data;
 										Pos end = pos + size;
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:47:51 +00:00
+										if (nullptr != (pos = strchr(data, '/')) && pos[1] == '/' && nullptr != (pos = strchr(pos + 2, '/')))
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										{
 											Pos query_string_or_fragment = strpbrk(pos, "?#");
 											res_data = pos;
 											res_size = (query_string_or_fragment ? query_string_or_fragment : end) - res_data;
 										}
 									}
 								};
-												dbms: add pathFull function. [#METR-13679]

											
										
										
											2014-12-05 13:31:48 +00:00
+								struct ExtractPathFull
 								{
 									static size_t getReserveLengthForElement() { return 30; }
 									static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos pos = data;
 										Pos end = pos + size;
 										if (nullptr != (pos = strchr(data, '/')) && pos[1] == '/' && nullptr != (pos = strchr(pos + 2, '/')))
 										{
-												dbms: restore leading slash in pathFull. [#METR-13679]

											
										
										
											2014-12-19 14:17:02 +00:00
+											res_data = pos;
-												dbms: add pathFull function. [#METR-13679]

											
										
										
											2014-12-05 13:31:48 +00:00
+											res_size = end - res_data;
 										}
 									}
 								};
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								template <bool without_leading_char>
 								struct ExtractQueryString
 								{
 									static size_t getReserveLengthForElement() { return 10; }
 									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos pos = data;
 										Pos end = pos + size;
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:47:51 +00:00
+										if (nullptr != (pos = strchr(data, '?')))
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										{
 											Pos fragment = strchr(pos, '#');
 											res_data = pos + (without_leading_char ? 1 : 0);
 											res_size = (fragment ? fragment : end) - res_data;
 										}
 									}
 								};
 								template <bool without_leading_char>
 								struct ExtractFragment
 								{
 									static size_t getReserveLengthForElement() { return 10; }
 									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos pos = data;
 										Pos end = pos + size;
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:47:51 +00:00
+										if (nullptr != (pos = strchr(data, '#')))
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										{
 											res_data = pos + (without_leading_char ? 1 : 0);
 											res_size = end - res_data;
 										}
 									}
 								};
 								template <bool without_leading_char>
 								struct ExtractQueryStringAndFragment
 								{
 									static size_t getReserveLengthForElement() { return 20; }
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos pos = data;
 										Pos end = pos + size;
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:47:51 +00:00
+										if (nullptr != (pos = strchr(data, '?')))
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										{
 											res_data = pos + (without_leading_char ? 1 : 0);
 											res_size = end - res_data;
 										}
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+										else if (nullptr != (pos = strchr(data, '#')))
 										{
 											res_data = pos;
 											res_size = end - res_data;
 										}
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									}
 								};
 								/// С точкой на конце.
 								struct ExtractWWW
 								{
 									static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
 									{
 										res_data = data;
 										res_size = 0;
 										Pos pos = data;
 										Pos end = pos + size;
 										Pos tmp;
 										size_t protocol_length;
 										ExtractProtocol::execute(data, size, tmp, protocol_length);
 										pos += protocol_length + 3;
-												Fixed error in URL functions [#METR-2944].

											
										
										
											2016-11-21 02:39:37 +00:00
+										if (pos >= end || pos[-1] != '/' || pos[-2] != '/')
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+											return;
 										if (pos + 4 < end && !strncmp(pos, "www.", 4))
 										{
 											res_data = pos;
 											res_size = 4;
 										}
 									}
 								};
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+								struct ExtractURLParameterImpl
 								{
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+									static void vector(const ColumnString::Chars_t & data,
-												Separated ColumnString from ColumnArray and ColumnFixedString from ColumnFixedArray; removed ColumnFixedArray [#CONV-2944].



											
										
										
											2013-05-05 15:25:25 +00:00
+													    const ColumnString::Offsets_t & offsets,
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+													    std::string pattern,
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+														ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+									{
 										res_data.reserve(data.size()  / 5);
 										res_offsets.resize(offsets.size());
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+										pattern += '=';
 										const char * param_str = pattern.c_str();
 										size_t param_len = pattern.size();
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+										size_t prev_offset = 0;
 										size_t res_offset = 0;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+										for (size_t i = 0; i < offsets.size(); ++i)
 										{
 											size_t cur_offset = offsets[i];
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+											const char * str = reinterpret_cast<const char *>(&data[prev_offset]);
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+											const char * pos = nullptr;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+											const char * begin = strpbrk(str, "?#");
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+											if (begin != nullptr)
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+											{
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+												pos = begin + 1;
 												while (true)
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+												{
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+													pos = strstr(pos, param_str);
 													if (pos == nullptr)
 														break;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+													if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&')
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+													{
 														pos += param_len;
 														continue;
 													}
 													else
 													{
 														pos += param_len;
 														break;
 													}
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+												}
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+											}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+											if (pos != nullptr)
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+											{
-												dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461].

											
										
										
											2015-07-24 19:23:22 +00:00
+												const char * end = strpbrk(pos, "&#");
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+												if (end == nullptr)
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+													end = pos + strlen(pos);
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+												res_data.resize(res_offset + (end - pos) + 1);
-												Attempt to improve performance [#METR-20892].

											
										
										
											2016-04-15 00:33:21 +00:00
+												memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], pos, end - pos);
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+												res_offset += end - pos;
 											}
 											else
 											{
 												res_data.resize(res_offset + 1);
 											}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+											res_data[res_offset] = 0;
 											++res_offset;
 											res_offsets[i] = res_offset;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
+											prev_offset = cur_offset;
 										}
 									}
 								};
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+								struct CutURLParameterImpl
 								{
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+									static void vector(const ColumnString::Chars_t & data,
-												Separated ColumnString from ColumnArray and ColumnFixedString from ColumnFixedArray; removed ColumnFixedArray [#CONV-2944].



											
										
										
											2013-05-05 15:25:25 +00:00
+													    const ColumnString::Offsets_t & offsets,
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													    std::string pattern,
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+														ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+									{
 										res_data.reserve(data.size());
 										res_offsets.resize(offsets.size());
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+										pattern += '=';
 										const char * param_str = pattern.c_str();
 										size_t param_len = pattern.size();
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+										size_t prev_offset = 0;
 										size_t res_offset = 0;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+										for (size_t i = 0; i < offsets.size(); ++i)
 										{
 											size_t cur_offset = offsets[i];
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+											const char * url_begin = reinterpret_cast<const char *>(&data[prev_offset]);
 											const char * url_end = reinterpret_cast<const char *>(&data[cur_offset]) - 1;
 											const char * begin_pos = url_begin;
 											const char * end_pos = begin_pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+											do
 											{
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+												const char * begin = strpbrk(url_begin, "?#");
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+												if (begin == nullptr)
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													break;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+												const char * pos = strstr(begin + 1, param_str);
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+												if (pos == nullptr)
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													break;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
 												if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&')
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+												{
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+													pos = nullptr;
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													break;
 												}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+												begin_pos = pos;
 												end_pos = begin_pos + param_len;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+												/// Пропустим значение.
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+												while (*end_pos && *end_pos != '&' && *end_pos != '#')
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													++end_pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+												/// Захватим '&' до или после параметра.
 												if (*end_pos == '&')
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													++end_pos;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+												else if (begin_pos[-1] == '&')
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+													--begin_pos;
 											} while (false);
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: fixed function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:39:47 +00:00
+											size_t cut_length = (url_end - url_begin) - (end_pos - begin_pos);
 											res_data.resize(res_offset + cut_length + 1);
-												Attempt to improve performance [#METR-20892].

											
										
										
											2016-04-15 00:33:21 +00:00
+											memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], url_begin, begin_pos - url_begin);
 											memcpySmallAllowReadWriteOverflow15(&res_data[res_offset] + (begin_pos - url_begin), end_pos, url_end - end_pos);
-												clickhouse: fixed function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:39:47 +00:00
+											res_offset += cut_length + 1;
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+											res_data[res_offset - 1] = 0;
 											res_offsets[i] = res_offset;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function cutURLParameter [#CONV-6788].


											
										
										
											2013-03-18 10:27:45 +00:00
+											prev_offset = cur_offset;
 										}
 									}
 								};
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+								class ExtractURLParametersImpl
 								{
 								private:
 									Pos pos;
 									Pos end;
 									bool first;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+								public:
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+									static constexpr auto name = "extractURLParameters";
 									static String getName() { return name; }
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												Functions: little better [#METR-2944].

											
										
										
											2016-12-29 19:38:10 +00:00
+									static size_t getNumberOfArguments() { return 1; }
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+									static void checkArguments(const DataTypes & arguments)
 									{
-												dbms, contrib: GCC 6 build fixes [#METR-20000]

											
										
										
											2016-05-23 00:40:28 +00:00
+										if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
 											throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
 											ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+									void init(Block & block, const ColumnNumbers & arguments) {}
-												Changed order of arguments in extractAll(s, re).[#CONV-8285]


											
										
										
											2013-08-07 11:25:02 +00:00
 									/// Возвращает позицию аргумента, являющегося столбцом строк
 									size_t getStringsArgumentPosition()
 									{
 										return 0;
 									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+									/// Вызывается для каждой следующей строки.
 									void set(Pos pos_, Pos end_)
 									{
 										pos = pos_;
 										end = end_;
 										first = true;
 									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+									/// Получить следующий токен, если есть, или вернуть false.
 									bool get(Pos & token_begin, Pos & token_end)
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									{
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+										if (pos == nullptr)
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+											return false;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+										if (first)
 										{
 											first = false;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+											pos = strpbrk(pos, "?#");
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+											if (pos == nullptr)
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+												return false;
 											++pos;
 										}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+										while (true)
 										{
 											token_begin = pos;
 											pos = strpbrk(pos, "=&#?");
 											if (pos == nullptr)
 												return false;
 											if (*pos == '?')
 											{
 												++pos;
 												continue;
 											}
 											break;
 										}
 										if (*pos == '&' || *pos == '#')
 										{
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											token_end = pos++;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+										}
 										else
 										{
 											++pos;
 											pos = strpbrk(pos, "&#");
 											if (pos == nullptr)
 												token_end = end;
 											else
 												token_end = pos++;
 										}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+										return true;
 									}
 								};
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+								class ExtractURLParameterNamesImpl
 								{
 								private:
 									Pos pos;
 									Pos end;
 									bool first;
 								public:
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+									static constexpr auto name = "extractURLParameterNames";
 									static String getName() { return name; }
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
-												Functions: little better [#METR-2944].

											
										
										
											2016-12-29 19:38:10 +00:00
+									static size_t getNumberOfArguments() { return 1; }
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+									static void checkArguments(const DataTypes & arguments)
 									{
-												dbms, contrib: GCC 6 build fixes [#METR-20000]

											
										
										
											2016-05-23 00:40:28 +00:00
+										if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
 											throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
 											ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+									}
-												Changed order of arguments in extractAll(s, re).[#CONV-8285]


											
										
										
											2013-08-07 11:25:02 +00:00
+									/// Возвращает позицию аргумента, являющегося столбцом строк
 									size_t getStringsArgumentPosition()
 									{
 										return 0;
 									}
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+									void init(Block & block, const ColumnNumbers & arguments) {}
 									/// Вызывается для каждой следующей строки.
 									void set(Pos pos_, Pos end_)
 									{
 										pos = pos_;
 										end = end_;
 										first = true;
 									}
 									/// Получить следующий токен, если есть, или вернуть false.
 									bool get(Pos & token_begin, Pos & token_end)
 									{
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+										if (pos == nullptr)
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+											return false;
 										if (first)
 										{
 											first = false;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+											pos = strpbrk(pos, "?#");
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+										}
-												Fixed coding style. Changed extractAll behaviour [#CONV-8285]

Now extractAll works without overcrossing.
New version:
#extractAll('abba', 'abbabba)
>['abba']

Old one:
#extractAll('abba', 'abbabba)
>['abba', 'abba']


											
										
										
											2013-08-05 08:40:56 +00:00
+										else
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+											pos = strpbrk(pos, "&#");
-												Fixed coding style. Changed extractAll behaviour [#CONV-8285]

Now extractAll works without overcrossing.
New version:
#extractAll('abba', 'abbabba)
>['abba']

Old one:
#extractAll('abba', 'abbabba)
>['abba', 'abba']


											
										
										
											2013-08-05 08:40:56 +00:00
-												Improvement [#METR-2807].

											
										
										
											2014-04-08 07:31:51 +00:00
+										if (pos == nullptr)
-												Fixed coding style. Changed extractAll behaviour [#CONV-8285]

Now extractAll works without overcrossing.
New version:
#extractAll('abba', 'abbabba)
>['abba']

Old one:
#extractAll('abba', 'abbabba)
>['abba', 'abba']


											
										
										
											2013-08-05 08:40:56 +00:00
+											return false;
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
+										++pos;
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+										while (true)
 										{
 											token_begin = pos;
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
-												dbms: improved URL-parsing functions for URLs with parameters without values and "parameters" after fragment identifier [#METR-19806].

											
										
										
											2016-01-26 21:24:09 +00:00
+											pos = strpbrk(pos, "=&#?");
 											if (pos == nullptr)
 												return false;
 											else
 												token_end = pos;
 											if (*pos == '?')
 											{
 												++pos;
 												continue;
 											}
 											break;
 										}
-												ClickHouse: Added function extractURLParameterNames(URL) [#CONV-8285]


											
										
										
											2013-08-02 13:55:43 +00:00
 										return true;
 									}
 								};
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
 								class URLHierarchyImpl
 								{
 								private:
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+									Pos begin;
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									Pos pos;
 									Pos end;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+								public:
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+									static constexpr auto name = "URLHierarchy";
 									static String getName() { return name; }
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												Functions: little better [#METR-2944].

											
										
										
											2016-12-29 19:38:10 +00:00
+									static size_t getNumberOfArguments() { return 1; }
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									static void checkArguments(const DataTypes & arguments)
 									{
-												dbms, contrib: GCC 6 build fixes [#METR-20000]

											
										
										
											2016-05-23 00:40:28 +00:00
+										if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
 											throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
 											ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									void init(Block & block, const ColumnNumbers & arguments) {}
-												Changed order of arguments in extractAll(s, re).[#CONV-8285]


											
										
										
											2013-08-07 11:25:02 +00:00
 									/// Возвращает позицию аргумента, являющегося столбцом строк
 									size_t getStringsArgumentPosition()
 									{
 										return 0;
 									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									/// Вызывается для каждой следующей строки.
 									void set(Pos pos_, Pos end_)
 									{
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										begin = pos = pos_;
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+										end = end_;
 									}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: slightly fixed extractURLParameters [#CONV-6788].


											
										
										
											2013-03-06 09:00:58 +00:00
+									/// Получить следующий токен, если есть, или вернуть false.
 									bool get(Pos & token_begin, Pos & token_end)
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+									{
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										/// Код из URLParser.
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										if (pos == end)
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+											return false;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										if (pos == begin)
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+										{
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											/// Распарсим всё, что идёт до пути
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											/// Предположим, что протокол уже переведён в нижний регистр.
 											while (pos < end && ((*pos > 'a' && *pos < 'z') || (*pos > '0' && *pos < '9')))
 												++pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											/** Будем вычислять иерархию только для URL-ов, в которых есть протокол, и после него идут два слеша.
 											 * (http, file - подходят, mailto, magnet - не подходят), и после двух слешей ещё хоть что-нибудь есть
 											 * Для остальных просто вернём полный URL как единственный элемент иерархии.
 											 */
 											if (pos == begin || pos == end || !(*pos++ == ':' && pos < end && *pos++ == '/' && pos < end && *pos++ == '/' && pos < end))
 											{
 												pos = end;
 												token_begin = begin;
 												token_end = end;
 												return true;
 											}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											/// Доменом для простоты будем считать всё, что после протокола и двух слешей, до следующего слеша или до ? или до #
 											while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
 												++pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											if (pos != end)
 												++pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											token_begin = begin;
 											token_end = pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+											return true;
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+										}
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										/// Идём до следующего / или ? или #, пропуская все те, что вначале.
 										while (pos < end && (*pos == '/' || *pos == '?' || *pos == '#'))
 											++pos;
 										if (pos == end)
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+											return false;
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
 											++pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										if (pos != end)
 											++pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added URLHierarchy function [#CONV-6788].


											
										
										
											2013-03-06 11:22:17 +00:00
+										token_begin = begin;
 										token_end = pos;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												clickhouse: added function extractURLParameters [#CONV-6788].


											
										
										
											2013-03-05 13:30:23 +00:00
+										return true;
 									}
 								};
-												functions: URLPathHierarchy implementation [METR-9922]



											
										
										
											2014-02-11 19:18:38 +00:00
+								class URLPathHierarchyImpl
 								{
 								private:
 									Pos begin;
 									Pos pos;
 									Pos end;
 									Pos start;
 								public:
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+									static constexpr auto name = "URLPathHierarchy";
 									static String getName() { return name; }
-												functions: URLPathHierarchy implementation [METR-9922]



											
										
										
											2014-02-11 19:18:38 +00:00
-												Functions: little better [#METR-2944].

											
										
										
											2016-12-29 19:38:10 +00:00
+									static size_t getNumberOfArguments() { return 1; }
-												functions: URLPathHierarchy implementation [METR-9922]



											
										
										
											2014-02-11 19:18:38 +00:00
+									static void checkArguments(const DataTypes & arguments)
 									{
-												dbms, contrib: GCC 6 build fixes [#METR-20000]

											
										
										
											2016-05-23 00:40:28 +00:00
+										if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
 											throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
 											ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-												functions: URLPathHierarchy implementation [METR-9922]



											
										
										
											2014-02-11 19:18:38 +00:00
+									}
 									void init(Block & block, const ColumnNumbers & arguments) {}
 									/// Возвращает позицию аргумента, являющегося столбцом строк
 									size_t getStringsArgumentPosition()
 									{
 										return 0;
 									}
 									/// Вызывается для каждой следующей строки.
 									void set(Pos pos_, Pos end_)
 									{
 										begin = pos = pos_;
 										start = begin;
 										end = end_;
 									}
 									/// Получить следующий токен, если есть, или вернуть false.
 									bool get(Pos & token_begin, Pos & token_end)
 									{
 										/// Код из URLParser.
 										if (pos == end)
 											return false;
 										if (pos == begin)
 										{
 											/// Распарсим всё, что идёт до пути
 											/// Предположим, что протокол уже переведён в нижний регистр.
 											while (pos < end && ((*pos > 'a' && *pos < 'z') || (*pos > '0' && *pos < '9')))
 												++pos;
 											/** Будем вычислять иерархию только для URL-ов, в которых есть протокол, и после него идут два слеша.
 											 * (http, file - подходят, mailto, magnet - не подходят), и после двух слешей ещё хоть что-нибудь есть
 											 * Для остальных просто вернём пустой массив.
 											 */
 											if (pos == begin || pos == end || !(*pos++ == ':' && pos < end && *pos++ == '/' && pos < end && *pos++ == '/' && pos < end))
 											{
 												pos = end;
 												return false;
 											}
 											/// Доменом для простоты будем считать всё, что после протокола и двух слешей, до следующего слеша или до ? или до #
 											while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
 												++pos;
 											start = pos;
 											if (pos != end)
 												++pos;
 										}
 										/// Идём до следующего / или ? или #, пропуская все те, что вначале.
 										while (pos < end && (*pos == '/' || *pos == '?' || *pos == '#'))
 											++pos;
 										if (pos == end)
 											return false;
 										while (pos < end && !(*pos == '/' || *pos == '?' || *pos == '#'))
 											++pos;
 										if (pos != end)
 											++pos;
 										token_begin = start;
 										token_end = pos;
 										return true;
 									}
 								};
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+								/** Выделить кусок строки, используя Extractor.
 								  */
 								template <typename Extractor>
 								struct ExtractSubstringImpl
 								{
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+									static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
 										ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									{
 										size_t size = offsets.size();
 										res_offsets.resize(size);
-												clickhouse: probably slightly improved performance of URL functions [#CONV-2807].


											
										
										
											2013-04-17 12:52:55 +00:00
+										res_data.reserve(size * Extractor::getReserveLengthForElement());
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										size_t prev_offset = 0;
 										size_t res_offset = 0;
 										/// Выделенный кусок.
 										Pos start;
 										size_t length;
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+										for (size_t i = 0; i < size; ++i)
 										{
 											Extractor::execute(reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);
-												dbms: improved performance on short queries [#METR-11571].

											
										
										
											2014-06-26 00:58:14 +00:00
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+											res_data.resize(res_data.size() + length + 1);
-												Attempt to improve performance [#METR-20892].

											
										
										
											2016-04-15 00:33:21 +00:00
+											memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length);
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+											res_offset += length + 1;
 											res_data[res_offset - 1] = 0;
 											res_offsets[i] = res_offset;
 											prev_offset = offsets[i];
 										}
 									}
 									static void constant(const std::string & data,
 										std::string & res_data)
 									{
 										Pos start;
 										size_t length;
 										Extractor::execute(data.data(), data.size(), start, length);
 										res_data.assign(start, length);
 									}
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+									static void vector_fixed(const ColumnString::Chars_t & data, size_t n,
 										ColumnString::Chars_t & res_data)
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									{
 										throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
 									}
 								};
 								/** Удалить кусок строки, используя Extractor.
 								  */
 								template <typename Extractor>
 								struct CutSubstringImpl
 								{
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+									static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
 										ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									{
 										res_data.reserve(data.size());
 										size_t size = offsets.size();
 										res_offsets.resize(size);
 										size_t prev_offset = 0;
 										size_t res_offset = 0;
 										/// Выделенный кусок.
 										Pos start;
 										size_t length;
 										for (size_t i = 0; i < size; ++i)
 										{
 											const char * current = reinterpret_cast<const char *>(&data[prev_offset]);
 											Extractor::execute(current, offsets[i] - prev_offset - 1, start, length);
 											size_t start_index = start - reinterpret_cast<const char *>(&data[0]);
 											res_data.resize(res_data.size() + offsets[i] - prev_offset - length);
-												Attempt to improve performance [#METR-20892].

											
										
										
											2016-04-15 00:33:21 +00:00
+											memcpySmallAllowReadWriteOverflow15(
 												&res_data[res_offset], current, start - current);
 											memcpySmallAllowReadWriteOverflow15(
 												&res_data[res_offset + start - current], start + length, offsets[i] - start_index - length);
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+											res_offset += offsets[i] - prev_offset - length;
 											res_offsets[i] = res_offset;
 											prev_offset = offsets[i];
 										}
 									}
 									static void constant(const std::string & data,
 										std::string & res_data)
 									{
 										Pos start;
 										size_t length;
 										Extractor::execute(data.data(), data.size(), start, length);
-												dbms: fix cut* functions for constant strings

											
										
										
											2014-12-05 11:44:37 +00:00
+										res_data.reserve(data.size() - length);
 										res_data.append(data.data(), start);
 										res_data.append(start + length, data.data() + data.size());
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									}
-												dbms: preparation [#CONV-2944].



											
										
										
											2013-09-15 05:51:43 +00:00
+									static void vector_fixed(const ColumnString::Chars_t & data, size_t n,
 										ColumnString::Chars_t & res_data)
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
+									{
 										throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
 									}
 								};
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+								/// Percent decode of url data.
-												- use std::experimental::string_view
- rename unquoteUrl to decodeURLComponent
- fix code-style

											
										
										
											2016-12-12 06:09:00 +00:00
+								struct DecodeURLComponentImpl
-												implement unquoteUrl

											
										
										
											2016-12-10 21:04:58 +00:00
+								{
 									static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+										ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets);
-												implement unquoteUrl

											
										
										
											2016-12-10 21:04:58 +00:00
 									static void constant(const std::string & data,
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+										std::string & res_data);
-												implement unquoteUrl

											
										
										
											2016-12-10 21:04:58 +00:00
 									static void vector_fixed(const ColumnString::Chars_t & data, size_t n,
-												move all url's functions to FunctionsURL

											
										
										
											2016-12-15 12:05:05 +00:00
+										ColumnString::Chars_t & res_data);
-												implement unquoteUrl

											
										
										
											2016-12-10 21:04:58 +00:00
+								};
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+								struct NameProtocol 					{ static constexpr auto name = "protocol"; };
 								struct NameDomain 						{ static constexpr auto name = "domain"; };
 								struct NameDomainWithoutWWW 			{ static constexpr auto name = "domainWithoutWWW"; };
 								struct NameFirstSignificantSubdomain	{ static constexpr auto name = "firstSignificantSubdomain"; };
 								struct NameTopLevelDomain 				{ static constexpr auto name = "topLevelDomain"; };
 								struct NamePath 						{ static constexpr auto name = "path"; };
-												dbms: add pathFull function. [#METR-13679]

											
										
										
											2014-12-05 13:31:48 +00:00
+								struct NamePathFull						{ static constexpr auto name = "pathFull"; };
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+								struct NameQueryString					{ static constexpr auto name = "queryString"; };
 								struct NameFragment 					{ static constexpr auto name = "fragment"; };
 								struct NameQueryStringAndFragment		{ static constexpr auto name = "queryStringAndFragment"; };
-												- use std::experimental::string_view
- rename unquoteUrl to decodeURLComponent
- fix code-style

											
										
										
											2016-12-12 06:09:00 +00:00
+								struct NameDecodeURLComponent           { static constexpr auto name = "decodeURLComponent"; };
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+								struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; };
-												dbms: implement firstSignificantSubdomain, cutToFirstSignificantSubdomain. [#METR-13151]

											
										
										
											2014-10-27 15:16:11 +00:00
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+								struct NameCutWWW 						{ static constexpr auto name = "cutWWW"; };
 								struct NameCutQueryString				{ static constexpr auto name = "cutQueryString"; };
 								struct NameCutFragment 					{ static constexpr auto name = "cutFragment"; };
 								struct NameCutQueryStringAndFragment 	{ static constexpr auto name = "cutQueryStringAndFragment"; };
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
-												dbms: refactor IFunction descendants' registration in FunctionFactory

											
										
										
											2014-11-12 17:23:26 +00:00
+								struct NameExtractURLParameter			{ static constexpr auto name = "extractURLParameter"; };
 								struct NameCutURLParameter 				{ static constexpr auto name = "cutURLParameter"; };
-												clickhouse: added function extractURLParameter [#CONV-6788].


											
										
										
											2013-03-05 12:12:47 +00:00
-												Using std::shared_ptr for data types [#METR-21503].

											
										
										
											2016-05-28 10:35:44 +00:00
+								using FunctionProtocol = FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, 				NameProtocol>	 	;
 								using FunctionDomain = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<false> >, 		NameDomain>	 		;
 								using FunctionDomainWithoutWWW = FunctionStringToString<ExtractSubstringImpl<ExtractDomain<true>  >, 		NameDomainWithoutWWW>;
 								using FunctionFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>, NameFirstSignificantSubdomain>;
 								using FunctionTopLevelDomain = FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, 		NameTopLevelDomain>	;
 								using FunctionPath = FunctionStringToString<ExtractSubstringImpl<ExtractPath>, 					NamePath>			;
 								using FunctionPathFull = FunctionStringToString<ExtractSubstringImpl<ExtractPathFull>,				NamePathFull>		;
 								using FunctionQueryString = FunctionStringToString<ExtractSubstringImpl<ExtractQueryString<true> >, 	NameQueryString>	;
 								using FunctionFragment = FunctionStringToString<ExtractSubstringImpl<ExtractFragment<true> >, 		NameFragment>		;
 								using FunctionQueryStringAndFragment = FunctionStringToString<ExtractSubstringImpl<ExtractQueryStringAndFragment<true> >, NameQueryStringAndFragment>;
-												- use std::experimental::string_view
- rename unquoteUrl to decodeURLComponent
- fix code-style

											
										
										
											2016-12-12 06:09:00 +00:00
+								using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
-												Using std::shared_ptr for data types [#METR-21503].

											
										
										
											2016-05-28 10:35:44 +00:00
 								using FunctionCutToFirstSignificantSubdomain = FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>, NameCutToFirstSignificantSubdomain>;
 								using FunctionCutWWW = FunctionStringToString<CutSubstringImpl<ExtractWWW>, 						NameCutWWW>			;
 								using FunctionCutQueryString = FunctionStringToString<CutSubstringImpl<ExtractQueryString<false> >, 		NameCutQueryString>	;
 								using FunctionCutFragment = FunctionStringToString<CutSubstringImpl<ExtractFragment<false> >, 			NameCutFragment>	;
 								using FunctionCutQueryStringAndFragment = FunctionStringToString<CutSubstringImpl<ExtractQueryStringAndFragment<false> >, NameCutQueryStringAndFragment>;
 								using FunctionExtractURLParameter = FunctionsStringSearchToString<ExtractURLParameterImpl, NameExtractURLParameter>;
 								using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
 								using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
 								using FunctionExtractURLParameters = FunctionTokens<ExtractURLParametersImpl>;
 								using FunctionURLHierarchy = FunctionTokens<URLHierarchyImpl>;
 								using FunctionURLPathHierarchy = FunctionTokens<URLPathHierarchyImpl>;
 								using FunctionExtractURLParameterNames = FunctionTokens<ExtractURLParameterNamesImpl>;
-												dbms: development [#CONV-2944].



											
										
										
											2012-07-16 03:42:36 +00:00
 								}