dbms: refactor position function family, remove debug versions. [#METR-18210]

2024-11-26 17:41:59 +00:00 · 2015-10-14 15:12:56 +03:00 · 2015-10-14 15:12:56 +03:00 · 957c8ecafe
commit 957c8ecafe
parent 3e3dfa0362
5 changed files with 739 additions and 1167 deletions
--- a/dbms/include/DB/Common/StringSearcher.h
+++ b/dbms/include/DB/Common/StringSearcher.h
@ -0,0 +1,657 @@
+#pragma once
+
+#include <DB/Common/UTF8Helpers.h>
+#include <statdaemons/ext/range.hpp>
+#include <Poco/UTF8Encoding.h>
+#include <Poco/Unicode.h>
+#include <stdint.h>
+#include <string.h>
+
+
+namespace DB
+{
+
+
+/// Performs case-sensitive and case-insensitive search of UTF-8 strings
+template <bool CaseSensitive, bool ASCII> class StringSearcher;
+
+/// Case-insensitive UTF-8 searcher
+template <> class StringSearcher<false, false>
+{
+	using UTF8SequenceBuffer = UInt8[6];
+
+	static constexpr auto n = sizeof(__m128i);
+
+	const int page_size = getpagesize();
+
+	/// string to be searched for
+	const char * const needle;
+	const std::size_t needle_size;
+	bool first_needle_symbol_is_ascii{};
+	/// lower and uppercase variants of the first octet of the first character in `needle`
+	UInt8 l{};
+	UInt8 u{};
+	/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
+	__m128i patl, patu;
+	/// lower and uppercase vectors of first 16 characters of `needle`
+	__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
+	int cachemask{};
+	std::size_t cache_valid_len{};
+	std::size_t cache_actual_len{};
+
+	bool page_safe(const void * const ptr) const
+	{
+		return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
+	}
+
+public:
+	StringSearcher(const char * const needle, const std::size_t needle_size)
+		: needle{needle}, needle_size{needle_size}
+	{
+		if (0 == needle_size)
+			return;
+
+		static const Poco::UTF8Encoding utf8;
+		UTF8SequenceBuffer l_seq, u_seq;
+
+		auto needle_pos = reinterpret_cast<const UInt8 *>(needle);
+		if (*needle_pos < 0x80u)
+		{
+			first_needle_symbol_is_ascii = true;
+			l = std::tolower(*needle_pos);
+			u = std::toupper(*needle_pos);
+		}
+		else
+		{
+			const auto first_u32 = utf8.convert(needle_pos);
+			const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
+			const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+
+			/// lower and uppercase variants of the first octet of the first character in `needle`
+			utf8.convert(first_l_u32, l_seq, sizeof(l_seq));
+			l = l_seq[0];
+			utf8.convert(first_u_u32, u_seq, sizeof(u_seq));
+			u = u_seq[0];
+		}
+
+		/// for detecting leftmost position of the first symbol
+		patl = _mm_set1_epi8(l);
+		patu = _mm_set1_epi8(u);
+		/// lower and uppercase vectors of first 16 octets of `needle`
+
+		const auto needle_end = needle_pos + needle_size;
+
+		for (std::size_t i = 0; i < n;)
+		{
+			if (needle_pos == needle_end)
+			{
+				cachel = _mm_srli_si128(cachel, 1);
+				cacheu = _mm_srli_si128(cacheu, 1);
+				++i;
+
+				continue;
+			}
+
+			const auto src_len = DB::UTF8::seqLength(*needle_pos);
+			const auto c_u32 = utf8.convert(needle_pos);
+
+			const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
+			const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
+
+			const auto dst_l_len = static_cast<UInt8>(utf8.convert(c_l_u32, l_seq, sizeof(l_seq)));
+			const auto dst_u_len = static_cast<UInt8>(utf8.convert(c_u_u32, u_seq, sizeof(u_seq)));
+
+			/// @note Unicode standard states it is a rare but possible occasion
+			if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
+				throw DB::Exception{
+					"UTF8 sequences with different lowercase and uppercase lengths are not supported",
+					DB::ErrorCodes::UNSUPPORTED_PARAMETER
+				};
+
+			cache_actual_len += src_len;
+			if (cache_actual_len < n)
+				cache_valid_len += src_len;
+
+			for (std::size_t j = 0; j < src_len && i < n; ++j, ++i)
+			{
+				cachel = _mm_srli_si128(cachel, 1);
+				cacheu = _mm_srli_si128(cacheu, 1);
+
+				if (needle_pos != needle_end)
+				{
+					cachel = _mm_insert_epi8(cachel, l_seq[j], n - 1);
+					cacheu = _mm_insert_epi8(cacheu, u_seq[j], n - 1);
+
+					cachemask |= 1 << i;
+					++needle_pos;
+				}
+			}
+		}
+	}
+
+	bool compare(const UInt8 * pos) const
+	{
+		static const Poco::UTF8Encoding utf8;
+
+		if (page_safe(pos))
+		{
+			const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
+			const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
+			const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
+			const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
+			const auto mask = _mm_movemask_epi8(v_against_l_or_u);
+
+			if (0xffff == cachemask)
+			{
+				if (mask == cachemask)
+				{
+					pos += cache_valid_len;
+					auto needle_pos = needle + cache_valid_len;
+					const auto needle_end = needle + needle_size;
+
+					while (needle_pos < needle_end &&
+						   Poco::Unicode::toLower(utf8.convert(pos)) ==
+						   Poco::Unicode::toLower(utf8.convert(reinterpret_cast<const UInt8 *>(needle_pos))))
+					{
+						/// @note assuming sequences for lowercase and uppercase have exact same length
+						const auto len = DB::UTF8::seqLength(*pos);
+						pos += len, needle_pos += len;
+					}
+
+					if (needle_pos == needle_end)
+						return true;
+				}
+			}
+			else if ((mask & cachemask) == cachemask)
+				return true;
+
+			return false;
+		}
+
+		if (*pos == l || *pos == u)
+		{
+			pos += first_needle_symbol_is_ascii;
+			auto needle_pos = needle + first_needle_symbol_is_ascii;
+			const auto needle_end = needle + needle_size;
+
+			while (needle_pos < needle_end &&
+				   Poco::Unicode::toLower(utf8.convert(pos)) ==
+				   Poco::Unicode::toLower(utf8.convert(reinterpret_cast<const UInt8 *>(needle_pos))))
+			{
+				const auto len = DB::UTF8::seqLength(*pos);
+				pos += len, needle_pos += len;
+			}
+
+			if (needle_pos == needle_end)
+				return true;
+		}
+
+		return false;
+	}
+
+	const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const
+	{
+		if (0 == needle_size)
+			return haystack;
+
+		static const Poco::UTF8Encoding utf8;
+
+		const auto needle_begin = reinterpret_cast<const UInt8 *>(needle);
+		const auto needle_end = needle_begin + needle_size;
+
+		while (haystack < haystack_end)
+		{
+			if (haystack + n <= haystack_end && page_safe(haystack))
+			{
+				const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
+				const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
+				const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu);
+				const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
+
+				const auto mask = _mm_movemask_epi8(v_against_l_or_u);
+
+				if (mask == 0)
+				{
+					haystack += n;
+					DB::UTF8::syncForward(haystack, haystack_end);
+					continue;
+				}
+
+				const auto offset = _bit_scan_forward(mask);
+				haystack += offset;
+
+				if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+				{
+					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
+					const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
+					const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
+					const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
+					const auto mask = _mm_movemask_epi8(v_against_l_or_u);
+
+					if (0xffff == cachemask)
+					{
+						if (mask == cachemask)
+						{
+							auto haystack_pos = haystack + cache_valid_len;
+							auto needle_pos = needle_begin + cache_valid_len;
+
+							while (haystack_pos < haystack_end && needle_pos < needle_end &&
+								   Poco::Unicode::toLower(utf8.convert(haystack_pos)) ==
+								   Poco::Unicode::toLower(utf8.convert(needle_pos)))
+							{
+								/// @note assuming sequences for lowercase and uppercase have exact same length
+								const auto len = DB::UTF8::seqLength(*haystack_pos);
+								haystack_pos += len, needle_pos += len;
+							}
+
+							if (needle_pos == needle_end)
+								return haystack;
+						}
+					}
+					else if ((mask & cachemask) == cachemask)
+						return haystack;
+
+					/// first octet was ok, but not the first 16, move to start of next sequence and reapply
+					haystack += DB::UTF8::seqLength(*haystack);
+					continue;
+				}
+			}
+
+			if (haystack == haystack_end)
+				return haystack_end;
+
+			if (*haystack == l || *haystack == u)
+			{
+				auto haystack_pos = haystack + first_needle_symbol_is_ascii;
+				auto needle_pos = needle_begin + first_needle_symbol_is_ascii;
+
+				while (haystack_pos < haystack_end && needle_pos < needle_end &&
+					   Poco::Unicode::toLower(utf8.convert(haystack_pos)) ==
+					   Poco::Unicode::toLower(utf8.convert(needle_pos)))
+				{
+					const auto len = DB::UTF8::seqLength(*haystack_pos);
+					haystack_pos += len, needle_pos += len;
+				}
+
+				if (needle_pos == needle_end)
+					return haystack;
+			}
+
+			/// advance to the start of the next sequence
+			haystack += DB::UTF8::seqLength(*haystack);
+		}
+
+		return haystack_end;
+	}
+};
+
+/// Case-insensitive ASCII searcher
+template <> class StringSearcher<false, true>
+{
+	static constexpr auto n = sizeof(__m128i);
+
+	const int page_size = getpagesize();
+
+	/// string to be searched for
+	const char * const needle;
+	const std::size_t needle_size;
+	/// lower and uppercase variants of the first character in `needle`
+	UInt8 l{};
+	UInt8 u{};
+	/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
+	__m128i patl, patu;
+	/// lower and uppercase vectors of first 16 characters of `needle`
+	__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
+	int cachemask{};
+
+	bool page_safe(const void * const ptr) const
+	{
+		return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
+	}
+
+public:
+	StringSearcher(const char * const needle, const std::size_t needle_size)
+		: needle{needle}, needle_size{needle_size}
+	{
+		if (0 == needle_size)
+			return;
+
+		auto needle_pos = needle;
+
+		l = std::tolower(*needle_pos);
+		u = std::toupper(*needle_pos);
+
+		patl = _mm_set1_epi8(l);
+		patu = _mm_set1_epi8(u);
+
+		const auto needle_end = needle_pos + needle_size;
+
+		for (const auto i : ext::range(0, n))
+		{
+			cachel = _mm_srli_si128(cachel, 1);
+			cacheu = _mm_srli_si128(cacheu, 1);
+
+			if (needle_pos != needle_end)
+			{
+				cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), n - 1);
+				cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), n - 1);
+				cachemask |= 1 << i;
+				++needle_pos;
+			}
+		}
+	}
+
+	bool compare(const UInt8 * pos) const
+	{
+		if (page_safe(pos))
+		{
+			const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
+			const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
+			const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
+			const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
+			const auto mask = _mm_movemask_epi8(v_against_l_or_u);
+
+			if (0xffff == cachemask)
+			{
+				if (mask == cachemask)
+				{
+					pos += n;
+					auto needle_pos = needle + n;
+					const auto needle_end = needle + needle_size;
+
+					while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos))
+						++pos, ++needle_pos;
+
+					if (needle_pos == needle_end)
+						return true;
+				}
+			}
+			else if ((mask & cachemask) == cachemask)
+				return true;
+
+			return false;
+		}
+
+		if (*pos == l || *pos == u)
+		{
+			++pos;
+			auto needle_pos = needle + 1;
+			const auto needle_end = needle + needle_size;
+
+			while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos))
+				++pos, ++needle_pos;
+
+			if (needle_pos == needle_end)
+				return true;
+		}
+
+		return false;
+	}
+
+	const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const
+	{
+		if (0 == needle_size)
+			return haystack;
+
+		const auto needle_begin = reinterpret_cast<const UInt8 *>(needle);
+		const auto needle_end = needle_begin + needle_size;
+
+		while (haystack < haystack_end)
+		{
+			if (haystack + n <= haystack_end && page_safe(haystack))
+			{
+				const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
+				const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
+				const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu);
+				const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
+
+				const auto mask = _mm_movemask_epi8(v_against_l_or_u);
+
+				if (mask == 0)
+				{
+					haystack += n;
+					continue;
+				}
+
+				const auto offset = _bit_scan_forward(mask);
+				haystack += offset;
+
+				if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+				{
+					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
+					const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
+					const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
+					const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
+					const auto mask = _mm_movemask_epi8(v_against_l_or_u);
+
+					if (0xffff == cachemask)
+					{
+						if (mask == cachemask)
+						{
+							auto haystack_pos = haystack + n;
+							auto needle_pos = needle_begin + n;
+
+							while (haystack_pos < haystack_end && needle_pos < needle_end &&
+								   std::tolower(*haystack_pos) == std::tolower(*needle_pos))
+								++haystack_pos, ++needle_pos;
+
+							if (needle_pos == needle_end)
+								return haystack;
+						}
+					}
+					else if ((mask & cachemask) == cachemask)
+						return haystack;
+
+					++haystack;
+					continue;
+				}
+			}
+
+			if (haystack == haystack_end)
+				return haystack_end;
+
+			if (*haystack == l || *haystack == u)
+			{
+				auto haystack_pos = haystack + 1;
+				auto needle_pos = needle_begin + 1;
+
+				while (haystack_pos < haystack_end && needle_pos < needle_end &&
+					   std::tolower(*haystack_pos) == std::tolower(*needle_pos))
+					++haystack_pos, ++needle_pos;
+
+				if (needle_pos == needle_end)
+					return haystack;
+			}
+
+			++haystack;
+		}
+
+		return haystack_end;
+	}
+};
+
+/// Case-sensitive searcher (both ASCII and UTF-8)
+template <bool ASCII> class StringSearcher<true, ASCII>
+{
+	static constexpr auto n = sizeof(__m128i);
+
+	const int page_size = getpagesize();
+
+	/// string to be searched for
+	const char * const needle;
+	const std::size_t needle_size;
+	/// first character in `needle`
+	UInt8 first{};
+	/// vector filled `first` for determining leftmost position of the first symbol
+	__m128i pattern;
+	/// vector of first 16 characters of `needle`
+	__m128i cache = _mm_setzero_si128();
+	int cachemask{};
+
+	bool page_safe(const void * const ptr) const
+	{
+		return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
+	}
+
+public:
+	StringSearcher(const char * const needle, const std::size_t needle_size)
+		: needle{needle}, needle_size{needle_size}
+	{
+		if (0 == needle_size)
+			return;
+
+		auto needle_pos = needle;
+
+		first = *needle_pos;
+
+		pattern = _mm_set1_epi8(first);
+
+		const auto needle_end = needle_pos + needle_size;
+
+		for (const auto i : ext::range(0, n))
+		{
+			cache = _mm_srli_si128(cache, 1);
+
+			if (needle_pos != needle_end)
+			{
+				cache = _mm_insert_epi8(cache, *needle_pos, n - 1);
+				cachemask |= 1 << i;
+				++needle_pos;
+			}
+		}
+	}
+
+	bool compare(const UInt8 * pos) const
+	{
+		if (page_safe(pos))
+		{
+			const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
+			const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
+			const auto mask = _mm_movemask_epi8(v_against_cache);
+
+			if (0xffff == cachemask)
+			{
+				if (mask == cachemask)
+				{
+					pos += n;
+					auto needle_pos = needle + n;
+					const auto needle_end = needle + needle_size;
+
+					while (needle_pos < needle_end && *pos == *needle_pos)
+						++pos, ++needle_pos;
+
+					if (needle_pos == needle_end)
+						return true;
+				}
+			}
+			else if ((mask & cachemask) == cachemask)
+				return true;
+
+			return false;
+		}
+
+		if (*pos == first)
+		{
+			++pos;
+			auto needle_pos = needle + 1;
+			const auto needle_end = needle + needle_size;
+
+			while (needle_pos < needle_end && *pos == *needle_pos)
+				++pos, ++needle_pos;
+
+			if (needle_pos == needle_end)
+				return true;
+		}
+
+		return false;
+	}
+
+	const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const
+	{
+		if (0 == needle_size)
+			return haystack;
+
+		const auto needle_begin = reinterpret_cast<const UInt8 *>(needle);
+		const auto needle_end = needle_begin + needle_size;
+
+		while (haystack < haystack_end)
+		{
+			if (haystack + n <= haystack_end && page_safe(haystack))
+			{
+				/// find first character
+				const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
+				const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern);
+
+				const auto mask = _mm_movemask_epi8(v_against_pattern);
+
+				/// first character not present in 16 octets starting at `haystack`
+				if (mask == 0)
+				{
+					haystack += n;
+					continue;
+				}
+
+				const auto offset = _bit_scan_forward(mask);
+				haystack += offset;
+
+				if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+				{
+					/// check for first 16 octets
+					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
+					const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
+					const auto mask = _mm_movemask_epi8(v_against_cache);
+
+					if (0xffff == cachemask)
+					{
+						if (mask == cachemask)
+						{
+							auto haystack_pos = haystack + n;
+							auto needle_pos = needle_begin + n;
+
+							while (haystack_pos < haystack_end && needle_pos < needle_end &&
+								   *haystack_pos == *needle_pos)
+								++haystack_pos, ++needle_pos;
+
+							if (needle_pos == needle_end)
+								return haystack;
+						}
+					}
+					else if ((mask & cachemask) == cachemask)
+						return haystack;
+
+					++haystack;
+					continue;
+				}
+			}
+
+			if (haystack == haystack_end)
+				return haystack_end;
+
+			if (*haystack == first)
+			{
+				auto haystack_pos = haystack + 1;
+				auto needle_pos = needle_begin + 1;
+
+				while (haystack_pos < haystack_end && needle_pos < needle_end &&
+					   *haystack_pos == *needle_pos)
+					++haystack_pos, ++needle_pos;
+
+				if (needle_pos == needle_end)
+					return haystack;
+			}
+
+			++haystack;
+		}
+
+		return haystack_end;
+	}
+};
+
+
+using ASCIICaseSensitiveStringSearcher = StringSearcher<true, true>;
+using ASCIICaseInsensitiveStringSearcher = StringSearcher<false, true>;
+using UTF8CaseSensitiveStringSearcher = StringSearcher<true, false>;
+using UTF8CaseInsensitiveStringSearcher = StringSearcher<false, false>;
+
+
+}
--- a/dbms/include/DB/Common/UTF8Helpers.h
+++ b/dbms/include/DB/Common/UTF8Helpers.h
@ -0,0 +1,54 @@
+#pragma once
+
+#include <DB/Core/Types.h>
+#include <x86intrin.h>
+
+
+namespace DB
+{
+
+
+namespace UTF8
+{
+
+
+static const UInt8 CONTINUATION_OCTET_MASK = 0b11000000u;
+static const UInt8 CONTINUATION_OCTET = 0b10000000u;
+
+/// return true if `octet` binary repr starts with 10 (octet is a UTF-8 sequence continuation)
+static bool isContinuationOctet(const UInt8 octet)
+{
+	return (octet & CONTINUATION_OCTET_MASK) == CONTINUATION_OCTET;
+}
+
+/// moves `s` backward until either first non-continuation octet
+static void syncBackward(const UInt8 * & s)
+{
+	while (isContinuationOctet(*s))
+		--s;
+}
+
+/// moves `s` forward until either first non-continuation octet or string end is met
+static void syncForward(const UInt8 * & s, const UInt8 * const end = nullptr)
+{
+	while (s < end && isContinuationOctet(*s))
+		++s;
+}
+
+/// returns UTF-8 code point sequence length judging by it's first octet
+static std::size_t seqLength(const UInt8 first_octet)
+{
+	if (first_octet < 0x80u)
+		return 1;
+
+	const std::size_t bits = 8;
+	const auto first_zero = _bit_scan_reverse(static_cast<UInt8>(~first_octet));
+
+	return bits - 1 - first_zero;
+}
+
+
+}
+
+
+}
--- a/dbms/include/DB/Common/Volnitsky.h
+++ b/dbms/include/DB/Common/Volnitsky.h
@ -1,5 +1,6 @@
 #pragma once

+#include <DB/Common/StringSearcher.h>
 #include <statdaemons/ext/range.hpp>
 #include <Poco/UTF8Encoding.h>
 #include <Poco/Unicode.h>
@ -132,11 +133,13 @@ protected:
 };


-/// Primary template for case sensitive comparison
-template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl : VolnitskyBase<VolnitskyImpl<CaseSensitive, ASCII>>
+template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl;
+
+/// Case sensitive comparison
+template <bool ASCII> struct VolnitskyImpl<true, ASCII> : VolnitskyBase<VolnitskyImpl<true, ASCII>>
 {
 	VolnitskyImpl(const char * const needle, const size_t needle_size, const size_t haystack_size_hint = 0)
-		: VolnitskyBase<VolnitskyImpl<CaseSensitive, ASCII>>{needle, needle_size, haystack_size_hint},
+		: VolnitskyBase<VolnitskyImpl<true, ASCII>>{needle, needle_size, haystack_size_hint},
 		  fallback_searcher{needle, needle_size}
 	{
 	}
@ -152,189 +155,12 @@ template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl : VolnitskyBase<V
 		return fallback_searcher.compare(pos);
 	}

-	class Searcher
-	{
-		static constexpr auto n = sizeof(__m128i);
-
-		const int page_size = getpagesize();
-
-		/// string to be searched for
-		const char * const needle;
-		const std::size_t needle_size;
-		/// first character in `needle`
-		UInt8 first{};
-		/// vector filled `first` for determining leftmost position of the first symbol
-		__m128i pattern;
-		/// vector of first 16 characters of `needle`
-		__m128i cache = _mm_setzero_si128();
-		int cachemask{};
-
-		bool page_safe(const void * const ptr) const
-		{
-			return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
-		}
-
-	public:
-		Searcher(const char * const needle, const std::size_t needle_size)
-			: needle{needle}, needle_size{needle_size}
-		{
-			if (0 == needle_size)
-				return;
-
-			auto needle_pos = needle;
-
-			first = *needle_pos;
-
-			pattern = _mm_set1_epi8(first);
-
-			const auto needle_end = needle_pos + needle_size;
-
-			for (const auto i : ext::range(0, n))
-			{
-				cache = _mm_srli_si128(cache, 1);
-
-				if (needle_pos != needle_end)
-				{
-					cache = _mm_insert_epi8(cache, *needle_pos, n - 1);
-					cachemask |= 1 << i;
-					++needle_pos;
-				}
-			}
-		}
-
-		bool compare(const UInt8 * pos) const
-		{
-			if (page_safe(pos))
-			{
-				const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
-				const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
-				const auto mask = _mm_movemask_epi8(v_against_cache);
-
-				if (0xffff == cachemask)
-				{
-					if (mask == cachemask)
-					{
-						pos += n;
-						auto needle_pos = needle + n;
-						const auto needle_end = needle + needle_size;
-
-						while (needle_pos < needle_end && *pos == *needle_pos)
-							++pos, ++needle_pos;
-
-						if (needle_pos == needle_end)
-							return true;
-					}
-				}
-				else if ((mask & cachemask) == cachemask)
-					return true;
-
-				return false;
-			}
-
-			if (*pos == first)
-			{
-				++pos;
-				auto needle_pos = needle + 1;
-				const auto needle_end = needle + needle_size;
-
-				while (needle_pos < needle_end && *pos == *needle_pos)
-					++pos, ++needle_pos;
-
-				if (needle_pos == needle_end)
-					return true;
-			}
-
-			return false;
-		}
-
-		const UInt8 * find(const UInt8 * haystack, const UInt8 * const haystack_end) const
-		{
-			if (0 == needle_size)
-				return haystack;
-
-			const auto needle_begin = reinterpret_cast<const UInt8 *>(needle);
-			const auto needle_end = needle_begin + needle_size;
-
-			while (haystack < haystack_end)
-			{
-				/// @todo supposedly for long strings spanning across multiple pages. Why don't we use this technique in other places?
-				if (haystack + n <= haystack_end && page_safe(haystack))
-				{
-					/// find first character
-					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-					const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern);
-
-					const auto mask = _mm_movemask_epi8(v_against_pattern);
-
-					/// first character not present in 16 octets starting at `haystack`
-					if (mask == 0)
-					{
-						haystack += n;
-						continue;
-					}
-
-					const auto offset = _bit_scan_forward(mask);
-					haystack += offset;
-
-					if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
-					{
-						/// check for first 16 octets
-						const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-						const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
-						const auto mask = _mm_movemask_epi8(v_against_cache);
-
-						if (0xffff == cachemask)
-						{
-							if (mask == cachemask)
-							{
-								auto haystack_pos = haystack + n;
-								auto needle_pos = needle_begin + n;
-
-								while (haystack_pos < haystack_end && needle_pos < needle_end &&
-									   *haystack_pos == *needle_pos)
-									++haystack_pos, ++needle_pos;
-
-								if (needle_pos == needle_end)
-									return haystack;
-							}
-						}
-						else if ((mask & cachemask) == cachemask)
-							return haystack;
-
-						++haystack;
-						continue;
-					}
-				}
-
-				if (haystack == haystack_end)
-					return haystack_end;
-
-				if (*haystack == first)
-				{
-					auto haystack_pos = haystack + 1;
-					auto needle_pos = needle_begin + 1;
-
-					while (haystack_pos < haystack_end && needle_pos < needle_end &&
-						   *haystack_pos == *needle_pos)
-						++haystack_pos, ++needle_pos;
-
-					if (needle_pos == needle_end)
-						return haystack;
-				}
-
-				++haystack;
-			}
-
-			return haystack_end;
-		}
-	};
-
-	Searcher fallback_searcher;
-
 	const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
 	{
-		return fallback_searcher.find(haystack, haystack_end);
+		return fallback_searcher.search(haystack, haystack_end);
 	}
+
+	DB::ASCIICaseSensitiveStringSearcher fallback_searcher;
 };

 /// Case-insensitive ASCII
@ -400,197 +226,12 @@ template <> struct VolnitskyImpl<false, true> : VolnitskyBase<VolnitskyImpl<fals
 		return fallback_searcher.compare(pos);
 	}

-	class Searcher
-	{
-		static constexpr auto n = sizeof(__m128i);
-
-		const int page_size = getpagesize();
-
-		/// string to be searched for
-		const char * const needle;
-		const std::size_t needle_size;
-		/// lower and uppercase variants of the first character in `needle`
-		UInt8 l{};
-		UInt8 u{};
-		/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
-		__m128i patl, patu;
-		/// lower and uppercase vectors of first 16 characters of `needle`
-		__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
-		int cachemask{};
-
-		bool page_safe(const void * const ptr) const
-		{
-			return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
-		}
-
-	public:
-		Searcher(const char * const needle, const std::size_t needle_size)
-		: needle{needle}, needle_size{needle_size}
-		{
-			if (0 == needle_size)
-				return;
-
-			auto needle_pos = needle;
-
-			l = std::tolower(*needle_pos);
-			u = std::toupper(*needle_pos);
-
-			patl = _mm_set1_epi8(l);
-			patu = _mm_set1_epi8(u);
-
-			const auto needle_end = needle_pos + needle_size;
-
-			for (const auto i : ext::range(0, n))
-			{
-				cachel = _mm_srli_si128(cachel, 1);
-				cacheu = _mm_srli_si128(cacheu, 1);
-
-				if (needle_pos != needle_end)
-				{
-					cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), n - 1);
-					cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), n - 1);
-					cachemask |= 1 << i;
-					++needle_pos;
-				}
-			}
-		}
-
-		bool compare(const UInt8 * pos) const
-		{
-			if (page_safe(pos))
-			{
-				const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
-				const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
-				const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
-				const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-				const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-				if (0xffff == cachemask)
-				{
-					if (mask == cachemask)
-					{
-						pos += n;
-						auto needle_pos = needle + n;
-						const auto needle_end = needle + needle_size;
-
-						while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos))
-							++pos, ++needle_pos;
-
-						if (needle_pos == needle_end)
-							return true;
-					}
-				}
-				else if ((mask & cachemask) == cachemask)
-					return true;
-
-				return false;
-			}
-
-			if (*pos == l || *pos == u)
-			{
-				++pos;
-				auto needle_pos = needle + 1;
-				const auto needle_end = needle + needle_size;
-
-				while (needle_pos < needle_end && std::tolower(*pos) == std::tolower(*needle_pos))
-					++pos, ++needle_pos;
-
-				if (needle_pos == needle_end)
-					return true;
-			}
-
-			return false;
-		}
-
-		const UInt8 * find(const UInt8 * haystack, const UInt8 * const haystack_end) const
-		{
-			if (0 == needle_size)
-				return haystack;
-
-			const auto needle_begin = reinterpret_cast<const UInt8 *>(needle);
-			const auto needle_end = needle_begin + needle_size;
-
-			while (haystack < haystack_end)
-			{
-				/// @todo supposedly for long strings spanning across multiple pages. Why don't we use this technique in other places?
-				if (haystack + n <= haystack_end && page_safe(haystack))
-				{
-					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-					const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
-					const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu);
-					const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-
-					const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-					if (mask == 0)
-					{
-						haystack += n;
-						continue;
-					}
-
-					const auto offset = _bit_scan_forward(mask);
-					haystack += offset;
-
-					if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
-					{
-						const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-						const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
-						const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
-						const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-						const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-						if (0xffff == cachemask)
-						{
-							if (mask == cachemask)
-							{
-								auto haystack_pos = haystack + n;
-								auto needle_pos = needle_begin + n;
-
-								while (haystack_pos < haystack_end && needle_pos < needle_end &&
-									   std::tolower(*haystack_pos) == std::tolower(*needle_pos))
-									++haystack_pos, ++needle_pos;
-
-								if (needle_pos == needle_end)
-									return haystack;
-							}
-						}
-						else if ((mask & cachemask) == cachemask)
-							return haystack;
-
-						++haystack;
-						continue;
-					}
-				}
-
-				if (haystack == haystack_end)
-					return haystack_end;
-
-				if (*haystack == l || *haystack == u)
-				{
-					auto haystack_pos = haystack + 1;
-					auto needle_pos = needle_begin + 1;
-
-					while (haystack_pos < haystack_end && needle_pos < needle_end &&
-						   std::tolower(*haystack_pos) == std::tolower(*needle_pos))
-						++haystack_pos, ++needle_pos;
-
-					if (needle_pos == needle_end)
-						return haystack;
-				}
-
-				++haystack;
-			}
-
-			return haystack_end;
-		}
-	};
-
-	Searcher fallback_searcher;
-
 	const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
 	{
-		return fallback_searcher.find(haystack, haystack_end);
+		return fallback_searcher.search(haystack, haystack_end);
 	}
+
+	DB::ASCIICaseInsensitiveStringSearcher fallback_searcher;
 };

 /// Case-sensitive UTF-8
@ -660,11 +301,11 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal

 			static const Poco::UTF8Encoding utf8;

-			if (utf8_is_continuation_octet(c[1]))
+			if (DB::UTF8::isContinuationOctet(c[1]))
 			{
 				/// ngram is inside a sequence
 				auto seq_pos = pos;
-				utf8_sync_backward(seq_pos);
+				DB::UTF8::syncBackward(seq_pos);

 				const auto u32 = utf8.convert(seq_pos);
 				const auto l_u32 = Poco::Unicode::toLower(u32);
@ -698,7 +339,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
 				/// ngram is on the boundary of two sequences
 				/// first sequence may start before u_pos if it is not ASCII
 				auto first_seq_pos = pos;
-				utf8_sync_backward(first_seq_pos);
+				DB::UTF8::syncBackward(first_seq_pos);

 				const auto first_u32 = utf8.convert(first_seq_pos);
 				const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
@ -784,321 +425,17 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
 		}
 	}

-	static const UInt8 utf8_continuation_octet_mask = 0b11000000u;
-	static const UInt8 utf8_continuation_octet = 0b10000000u;
-
-	/// return true if `octet` binary repr starts with 10 (octet is a UTF-8 sequence continuation)
-	static bool utf8_is_continuation_octet(const UInt8 octet)
-	{
-		return (octet & utf8_continuation_octet_mask) == utf8_continuation_octet;
-	}
-
-	/// moves `s` backward until either first non-continuation octet
-	static void utf8_sync_backward(const UInt8 * & s)
-	{
-		while (utf8_is_continuation_octet(*s))
-			--s;
-	}
-
-	/// moves `s` forward until either first non-continuation octet or string end is met
-	static void utf8_sync_forward(const UInt8 * & s, const UInt8 * const end = nullptr)
-	{
-		while (s < end && utf8_is_continuation_octet(*s))
-			++s;
-	}
-
-	/// returns UTF-8 code point sequence length judging by it's first octet
-	static std::size_t utf8_seq_length(const UInt8 first_octet)
-	{
-		if (first_octet < 0x80u)
-			return 1;
-
-		const std::size_t bits = 8;
-		const auto first_zero = _bit_scan_reverse(static_cast<UInt8>(~first_octet));
-
-		return bits - 1 - first_zero;
-	}
-
 	bool compare(const UInt8 * const pos) const
 	{
 		return fallback_searcher.compare(pos);
 	}

-	class Searcher
-	{
-		using UTF8SequenceBuffer = UInt8[6];
-
-		static constexpr auto n = sizeof(__m128i);
-
-		const int page_size = getpagesize();
-
-		/// string to be searched for
-		const char * const needle;
-		const std::size_t needle_size;
-		bool first_needle_symbol_is_ascii{};
-		/// lower and uppercase variants of the first octet of the first character in `needle`
-		UInt8 l{};
-		UInt8 u{};
-		/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
-		__m128i patl, patu;
-		/// lower and uppercase vectors of first 16 characters of `needle`
-		__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
-		int cachemask{};
-		std::size_t cache_valid_len{};
-		std::size_t cache_actual_len{};
-
-		bool page_safe(const void * const ptr) const
-		{
-			return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
-		}
-
-	public:
-		Searcher(const char * const needle, const std::size_t needle_size)
-		: needle{needle}, needle_size{needle_size}
-		{
-			if (0 == needle_size)
-				return;
-
-			static const Poco::UTF8Encoding utf8;
-			UTF8SequenceBuffer l_seq, u_seq;
-
-			auto needle_pos = reinterpret_cast<const UInt8 *>(needle);
-			if (*needle_pos < 0x80u)
-			{
-				first_needle_symbol_is_ascii = true;
-				l = std::tolower(*needle_pos);
-				u = std::toupper(*needle_pos);
-			}
-			else
-			{
-				const auto first_u32 = utf8.convert(needle_pos);
-				const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-				const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
-
-				/// lower and uppercase variants of the first octet of the first character in `needle`
-				utf8.convert(first_l_u32, l_seq, sizeof(l_seq));
-				l = l_seq[0];
-				utf8.convert(first_u_u32, u_seq, sizeof(u_seq));
-				u = u_seq[0];
-			}
-
-			/// for detecting leftmost position of the first symbol
-			patl = _mm_set1_epi8(l);
-			patu = _mm_set1_epi8(u);
-			/// lower and uppercase vectors of first 16 octets of `needle`
-
-			const auto needle_end = needle_pos + needle_size;
-
-			for (std::size_t i = 0; i < n;)
-			{
-				if (needle_pos == needle_end)
-				{
-					cachel = _mm_srli_si128(cachel, 1);
-					cacheu = _mm_srli_si128(cacheu, 1);
-					++i;
-
-					continue;
-				}
-
-				const auto src_len = utf8_seq_length(*needle_pos);
-				const auto c_u32 = utf8.convert(needle_pos);
-
-				const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
-				const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
-
-				const auto dst_l_len = static_cast<UInt8>(utf8.convert(c_l_u32, l_seq, sizeof(l_seq)));
-				const auto dst_u_len = static_cast<UInt8>(utf8.convert(c_u_u32, u_seq, sizeof(u_seq)));
-
-				/// @note Unicode standard states it is a rare but possible occasion
-				if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
-					throw DB::Exception{
-						"UTF8 sequences with different lowercase and uppercase lengths are not supported",
-						DB::ErrorCodes::UNSUPPORTED_PARAMETER
-					};
-
-				cache_actual_len += src_len;
-				if (cache_actual_len < n)
-					cache_valid_len += src_len;
-
-				for (std::size_t j = 0; j < src_len && i < n; ++j, ++i)
-				{
-					cachel = _mm_srli_si128(cachel, 1);
-					cacheu = _mm_srli_si128(cacheu, 1);
-
-					if (needle_pos != needle_end)
-					{
-						cachel = _mm_insert_epi8(cachel, l_seq[j], n - 1);
-						cacheu = _mm_insert_epi8(cacheu, u_seq[j], n - 1);
-
-						cachemask |= 1 << i;
-						++needle_pos;
-					}
-				}
-			}
-		}
-
-		bool compare(const UInt8 * pos) const
-		{
-			static const Poco::UTF8Encoding utf8;
-
-			if (page_safe(pos))
-			{
-				const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
-				const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
-				const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
-				const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-				const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-				if (0xffff == cachemask)
-				{
-					if (mask == cachemask)
-					{
-						pos += cache_valid_len;
-						auto needle_pos = needle + cache_valid_len;
-						const auto needle_end = needle + needle_size;
-
-						while (needle_pos < needle_end &&
-							   Poco::Unicode::toLower(utf8.convert(pos)) ==
-							   Poco::Unicode::toLower(utf8.convert(reinterpret_cast<const UInt8 *>(needle_pos))))
-						{
-							/// @note assuming sequences for lowercase and uppercase have exact same length
-							const auto len = utf8_seq_length(*pos);
-							pos += len, needle_pos += len;
-						}
-
-						if (needle_pos == needle_end)
-							return true;
-					}
-				}
-				else if ((mask & cachemask) == cachemask)
-					return true;
-
-				return false;
-			}
-
-			if (*pos == l || *pos == u)
-			{
-				pos += first_needle_symbol_is_ascii;
-				auto needle_pos = needle + first_needle_symbol_is_ascii;
-				const auto needle_end = needle + needle_size;
-
-				while (needle_pos < needle_end &&
-					   Poco::Unicode::toLower(utf8.convert(pos)) ==
-					   Poco::Unicode::toLower(utf8.convert(reinterpret_cast<const UInt8 *>(needle_pos))))
-				{
-					const auto len = utf8_seq_length(*pos);
-					pos += len, needle_pos += len;
-				}
-
-				if (needle_pos == needle_end)
-					return true;
-			}
-
-			return false;
-		}
-
-		const UInt8 * find(const UInt8 * haystack, const UInt8 * const haystack_end) const
-		{
-			if (0 == needle_size)
-				return haystack;
-
-			static const Poco::UTF8Encoding utf8;
-
-			const auto needle_begin = reinterpret_cast<const UInt8 *>(needle);
-			const auto needle_end = needle_begin + needle_size;
-
-			while (haystack < haystack_end)
-			{
-				if (haystack + n <= haystack_end && page_safe(haystack))
-				{
-					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-					const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
-					const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu);
-					const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-
-					const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-					if (mask == 0)
-					{
-						haystack += n;
-						utf8_sync_forward(haystack, haystack_end);
-						continue;
-					}
-
-					const auto offset = _bit_scan_forward(mask);
-					haystack += offset;
-
-					if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
-					{
-						const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-						const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
-						const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
-						const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-						const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-						if (0xffff == cachemask)
-						{
-							if (mask == cachemask)
-							{
-								auto haystack_pos = haystack + cache_valid_len;
-								auto needle_pos = needle_begin + cache_valid_len;
-
-								while (haystack_pos < haystack_end && needle_pos < needle_end &&
-									   Poco::Unicode::toLower(utf8.convert(haystack_pos)) ==
-									   Poco::Unicode::toLower(utf8.convert(needle_pos)))
-								{
-									/// @note assuming sequences for lowercase and uppercase have exact same length
-									const auto len = utf8_seq_length(*haystack_pos);
-									haystack_pos += len, needle_pos += len;
-								}
-
-								if (needle_pos == needle_end)
-									return haystack;
-							}
-						}
-						else if ((mask & cachemask) == cachemask)
-							return haystack;
-
-						/// first octet was ok, but not the first 16, move to start of next sequence and reapply
-						haystack += utf8_seq_length(*haystack);
-						continue;
-					}
-				}
-
-				if (haystack == haystack_end)
-					return haystack_end;
-
-				if (*haystack == l || *haystack == u)
-				{
-					auto haystack_pos = haystack + first_needle_symbol_is_ascii;
-					auto needle_pos = needle_begin + first_needle_symbol_is_ascii;
-
-					while (haystack_pos < haystack_end && needle_pos < needle_end &&
-						   Poco::Unicode::toLower(utf8.convert(haystack_pos)) ==
-						   Poco::Unicode::toLower(utf8.convert(needle_pos)))
-					{
-						const auto len = utf8_seq_length(*haystack_pos);
-						haystack_pos += len, needle_pos += len;
-					}
-
-					if (needle_pos == needle_end)
-						return haystack;
-				}
-
-				/// advance to the start of the next sequence
-				haystack += utf8_seq_length(*haystack);
-			}
-
-			return haystack_end;
-		}
-	};
-
-	Searcher fallback_searcher;
-
 	const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
 	{
-		return fallback_searcher.find(haystack, haystack_end);
+		return fallback_searcher.search(haystack, haystack_end);
 	}
+
+	DB::UTF8CaseInsensitiveStringSearcher fallback_searcher;
 };


--- a/dbms/include/DB/Functions/FunctionsStringSearch.h
+++ b/dbms/include/DB/Functions/FunctionsStringSearch.h
@ -13,6 +13,7 @@
 #include <DB/Columns/ColumnConst.h>
 #include <DB/Common/Volnitsky.h>
 #include <DB/Functions/IFunction.h>
+#include <DB/Common/StringSearcher.h>
 #include <re2/re2.h>
 #include <re2/stringpiece.h>
 #include <Poco/UTF8Encoding.h>
@ -52,7 +53,7 @@ namespace DB
  */


-template <bool CaseSensitive, bool EnforceSSE = false>
+template <bool CaseSensitive>
 struct PositionImpl
 {
 	typedef UInt64 ResultType;
@ -70,7 +71,7 @@ struct PositionImpl
 		/// Текущий индекс в массиве строк.
 		size_t i = 0;

-		VolnitskyImpl<CaseSensitive, true> searcher(needle.data(), needle.size(), EnforceSSE ? 1 : end - pos);
+		VolnitskyImpl<CaseSensitive, true> searcher(needle.data(), needle.size(), end - pos);

 		/// Искать будем следующее вхождение сразу во всех строках.
 		while (pos < end && end != (pos = searcher.search(pos, end - pos)))
@ -112,44 +113,7 @@ struct PositionImpl
 };


-namespace
-{
-
-
-const UInt8 utf8_continuation_octet_mask = 0b11000000u;
-const UInt8 utf8_continuation_octet = 0b10000000u;
-
-
-/// return true if `octet` binary repr starts with 10 (octet is a UTF-8 sequence continuation)
-bool utf8_is_continuation_octet(const UInt8 octet)
-{
-	return (octet & utf8_continuation_octet_mask) == utf8_continuation_octet;
-}
-
-/// moves `s` forward until either first non-continuation octet or string end is met
-void utf8_sync_forward(const UInt8 * & s, const UInt8 * const end = nullptr)
-{
-	while (s < end && utf8_is_continuation_octet(*s))
-		++s;
-}
-
-/// returns UTF-8 code point sequence length judging by it's first octet
-std::size_t utf8_seq_length(const UInt8 first_octet)
-{
-	if (first_octet < 0x80u)
-		return 1;
-
-	const std::size_t bits = 8;
-	const auto first_zero = _bit_scan_reverse(static_cast<UInt8>(~first_octet));
-
-	return bits - 1 - first_zero;
-}
-
-
-}
-
-
-template <bool CaseSensitive, bool EnforceSSE = false>
+template <bool CaseSensitive>
 struct PositionUTF8Impl
 {
 	typedef UInt64 ResultType;
@ -165,7 +129,7 @@ struct PositionUTF8Impl
 		/// Текущий индекс в массиве строк.
 		size_t i = 0;

-		VolnitskyImpl<CaseSensitive, false> searcher(needle.data(), needle.size(), EnforceSSE ? 1 : end - pos);
+		VolnitskyImpl<CaseSensitive, false> searcher(needle.data(), needle.size(), end - pos);

 		/// Искать будем следующее вхождение сразу во всех строках.
 		while (pos < end && end != (pos = searcher.search(pos, end - pos)))
@ -183,7 +147,7 @@ struct PositionUTF8Impl
 				/// А теперь надо найти, сколько кодовых точек находится перед pos.
 				res[i] = 1;
 				for (const UInt8 * c = begin + (i != 0 ? offsets[i - 1] : 0); c < pos; ++c)
-					if (!utf8_is_continuation_octet(*c))
+					if (!UTF8::isContinuationOctet(*c))
 						++res[i];
 			}
 			else
@ -225,7 +189,7 @@ struct PositionUTF8Impl
 			/// А теперь надо найти, сколько кодовых точек находится перед pos.
 			res = 1;
 			for (const auto i : ext::range(0, pos))
-				if (!utf8_is_continuation_octet(static_cast<UInt8>(data[i])))
+				if (!UTF8::isContinuationOctet(static_cast<UInt8>(data[i])))
 					++res;
 		}
 		else
@ -236,143 +200,6 @@ struct PositionUTF8Impl

 struct PositionCaseInsensitiveImpl
 {
-private:
-	class CaseInsensitiveSearcher
-	{
-		static constexpr auto n = sizeof(__m128i);
-
-		const int page_size = getpagesize();
-
-		/// string to be searched for
-		const std::string & needle;
-		/// lower and uppercase variants of the first character in `needle`
-		UInt8 l{};
-		UInt8 u{};
-		/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
-		__m128i patl, patu;
-		/// lower and uppercase vectors of first 16 characters of `needle`
-		__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
-		int cachemask{};
-
-		bool page_safe(const void * const ptr) const
-		{
-			return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
-		}
-
-	public:
-		CaseInsensitiveSearcher(const std::string & needle) : needle(needle)
-		{
-			if (needle.empty())
-				return;
-
-			auto needle_pos = needle.data();
-
-			l = std::tolower(*needle_pos);
-			u = std::toupper(*needle_pos);
-
-			patl = _mm_set1_epi8(l);
-			patu = _mm_set1_epi8(u);
-
-			const auto needle_end = needle_pos + needle.size();
-
-			for (const auto i : ext::range(0, n))
-			{
-				cachel = _mm_srli_si128(cachel, 1);
-				cacheu = _mm_srli_si128(cacheu, 1);
-
-				if (needle_pos != needle_end)
-				{
-					cachel = _mm_insert_epi8(cachel, std::tolower(*needle_pos), n - 1);
-					cacheu = _mm_insert_epi8(cacheu, std::toupper(*needle_pos), n - 1);
-					cachemask |= 1 << i;
-					++needle_pos;
-				}
-			}
-		}
-
-		const UInt8 * find(const UInt8 * haystack, const UInt8 * const haystack_end) const
-		{
-			if (needle.empty())
-				return haystack;
-
-			const auto needle_begin = reinterpret_cast<const UInt8 *>(needle.data());
-			const auto needle_end = needle_begin + needle.size();
-
-			while (haystack < haystack_end)
-			{
-				/// @todo supposedly for long strings spanning across multiple pages. Why don't we use this technique in other places?
-				if (haystack + n <= haystack_end && page_safe(haystack))
-				{
-					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-					const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
-					const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu);
-					const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-
-					const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-					if (mask == 0)
-					{
-						haystack += n;
-						continue;
-					}
-
-					const auto offset = _bit_scan_forward(mask);
-					haystack += offset;
-
-					if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
-					{
-						const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-						const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
-						const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
-						const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-						const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-						if (0xffff == cachemask)
-						{
-							if (mask == cachemask)
-							{
-								auto haystack_pos = haystack + n;
-								auto needle_pos = needle_begin + n;
-
-								while (haystack_pos < haystack_end && needle_pos < needle_end &&
-									   std::tolower(*haystack_pos) == std::tolower(*needle_pos))
-									++haystack_pos, ++needle_pos;
-
-								if (needle_pos == needle_end)
-									return haystack;
-							}
-						}
-						else if ((mask & cachemask) == cachemask)
-							return haystack;
-
-						++haystack;
-						continue;
-					}
-				}
-
-				if (haystack == haystack_end)
-					return haystack_end;
-
-				if (*haystack == l || *haystack == u)
-				{
-					auto haystack_pos = haystack + 1;
-					auto needle_pos = needle_begin + 1;
-
-					while (haystack_pos < haystack_end && needle_pos < needle_end &&
-						   std::tolower(*haystack_pos) == std::tolower(*needle_pos))
-						++haystack_pos, ++needle_pos;
-
-					if (needle_pos == needle_end)
-						return haystack;
-				}
-
-				++haystack;
-			}
-
-			return haystack_end;
-		}
-	};
-
 public:
 	using ResultType = UInt64;

@ -380,7 +207,7 @@ public:
 		const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, const std::string & needle,
 		PODArray<UInt64> & res)
 	{
-		const CaseInsensitiveSearcher searcher{needle};
+		const ASCIICaseInsensitiveStringSearcher searcher{needle.data(), needle.size()};

 		const UInt8 * begin = &data[0];
 		const UInt8 * pos = begin;
@ -390,7 +217,7 @@ public:
 		size_t i = 0;

 		/// Искать будем следующее вхождение сразу во всех строках.
-		while (pos < end && end != (pos = searcher.find(pos, end)))
+		while (pos < end && end != (pos = searcher.search(pos, end)))
 		{
 			/// Определим, к какому индексу оно относится.
 			while (begin + offsets[i] < pos)
@ -426,297 +253,6 @@ public:
 };


-struct PositionCaseInsensitiveUTF8Impl
-{
-private:
-	class CaseInsensitiveSearcher
-	{
-		using UTF8SequenceBuffer = UInt8[6];
-
-		static constexpr auto n = sizeof(__m128i);
-
-		const int page_size = getpagesize();
-
-		/// string to be searched for
-		const std::string & needle;
-		bool first_needle_symbol_is_ascii{};
-		/// lower and uppercase variants of the first octet of the first character in `needle`
-		UInt8 l{};
-		UInt8 u{};
-		/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
-		__m128i patl, patu;
-		/// lower and uppercase vectors of first 16 characters of `needle`
-		__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
-		int cachemask{};
-		std::size_t cache_valid_len{};
-		std::size_t cache_actual_len{};
-
-		bool page_safe(const void * const ptr) const
-		{
-			return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
-		}
-
-	public:
-		CaseInsensitiveSearcher(const std::string & needle) : needle(needle)
-		{
-			if (needle.empty())
-				return;
-
-			static const Poco::UTF8Encoding utf8;
-			UTF8SequenceBuffer l_seq, u_seq;
-
-			auto needle_pos = reinterpret_cast<const UInt8 *>(needle.data());
-			if (*needle_pos < 0x80u)
-			{
-				first_needle_symbol_is_ascii = true;
-				l = std::tolower(*needle_pos);
-				u = std::toupper(*needle_pos);
-			}
-			else
-			{
-				const auto first_u32 = utf8.convert(needle_pos);
-				const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-				const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
-
-				/// lower and uppercase variants of the first octet of the first character in `needle`
-				utf8.convert(first_l_u32, l_seq, sizeof(l_seq));
-				l = l_seq[0];
-				utf8.convert(first_u_u32, u_seq, sizeof(u_seq));
-				u = u_seq[0];
-			}
-
-			/// for detecting leftmost position of the first symbol
-			patl = _mm_set1_epi8(l);
-			patu = _mm_set1_epi8(u);
-			/// lower and uppercase vectors of first 16 octets of `needle`
-
-			const auto needle_end = needle_pos + needle.size();
-
-			for (std::size_t i = 0; i < n;)
-			{
-				if (needle_pos == needle_end)
-				{
-					cachel = _mm_srli_si128(cachel, 1);
-					cacheu = _mm_srli_si128(cacheu, 1);
-					++i;
-
-					continue;
-				}
-
-				const auto src_len = utf8_seq_length(*needle_pos);
-				const auto c_u32 = utf8.convert(needle_pos);
-
-				const auto c_l_u32 = Poco::Unicode::toLower(c_u32);
-				const auto c_u_u32 = Poco::Unicode::toUpper(c_u32);
-
-				const auto dst_l_len = static_cast<UInt8>(utf8.convert(c_l_u32, l_seq, sizeof(l_seq)));
-				const auto dst_u_len = static_cast<UInt8>(utf8.convert(c_u_u32, u_seq, sizeof(u_seq)));
-
-				/// @note Unicode standard states it is a rare but possible occasion
-				if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
-					throw Exception{
-							"UTF8 sequences with different lowercase and uppercase lengths are not supported",
-							ErrorCodes::UNSUPPORTED_PARAMETER
-					};
-
-				cache_actual_len += src_len;
-				if (cache_actual_len < n)
-					cache_valid_len += src_len;
-
-				for (std::size_t j = 0; j < src_len && i < n; ++j, ++i)
-				{
-					cachel = _mm_srli_si128(cachel, 1);
-					cacheu = _mm_srli_si128(cacheu, 1);
-
-					if (needle_pos != needle_end)
-					{
-						cachel = _mm_insert_epi8(cachel, l_seq[j], n - 1);
-						cacheu = _mm_insert_epi8(cacheu, u_seq[j], n - 1);
-
-						cachemask |= 1 << i;
-						++needle_pos;
-					}
-				}
-			}
-		}
-
-		const UInt8 * find(const UInt8 * haystack, const UInt8 * const haystack_end) const
-		{
-			if (needle.empty())
-				return haystack;
-
-			static const Poco::UTF8Encoding utf8;
-
-			const auto needle_begin = reinterpret_cast<const UInt8 *>(needle.data());
-			const auto needle_end = needle_begin + needle.size();
-
-			while (haystack < haystack_end)
-			{
-				if (haystack + n <= haystack_end && page_safe(haystack))
-				{
-					const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-					const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
-					const auto v_against_u = _mm_cmpeq_epi8(v_haystack, patu);
-					const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-
-					const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-					if (mask == 0)
-					{
-						haystack += n;
-						utf8_sync_forward(haystack, haystack_end);
-						continue;
-					}
-
-					const auto offset = _bit_scan_forward(mask);
-					haystack += offset;
-
-					if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
-					{
-						const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
-						const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
-						const auto v_against_u = _mm_cmpeq_epi8(v_haystack, cacheu);
-						const auto v_against_l_or_u = _mm_or_si128(v_against_l, v_against_u);
-						const auto mask = _mm_movemask_epi8(v_against_l_or_u);
-
-						if (0xffff == cachemask)
-						{
-							if (mask == cachemask)
-							{
-								auto haystack_pos = haystack + cache_valid_len;
-								auto needle_pos = needle_begin + cache_valid_len;
-
-								while (haystack_pos < haystack_end && needle_pos < needle_end &&
-									   Poco::Unicode::toLower(utf8.convert(haystack_pos)) ==
-									   Poco::Unicode::toLower(utf8.convert(needle_pos)))
-								{
-									/// @note assuming sequences for lowercase and uppercase have exact same length
-									const auto len = utf8_seq_length(*haystack_pos);
-									haystack_pos += len, needle_pos += len;
-								}
-
-								if (needle_pos == needle_end)
-									return haystack;
-							}
-						}
-						else if ((mask & cachemask) == cachemask)
-							return haystack;
-
-						/// first octet was ok, but not the first 16, move to start of next sequence and reapply
-						haystack += utf8_seq_length(*haystack);
-						continue;
-					}
-				}
-
-				if (haystack == haystack_end)
-					return haystack_end;
-
-				if (*haystack == l || *haystack == u)
-				{
-					auto haystack_pos = haystack + first_needle_symbol_is_ascii;
-					auto needle_pos = needle_begin + first_needle_symbol_is_ascii;
-
-					while (haystack_pos < haystack_end && needle_pos < needle_end &&
-						   Poco::Unicode::toLower(utf8.convert(haystack_pos)) ==
-						   Poco::Unicode::toLower(utf8.convert(needle_pos)))
-					{
-						const auto len = utf8_seq_length(*haystack_pos);
-						haystack_pos += len, needle_pos += len;
-					}
-
-					if (needle_pos == needle_end)
-						return haystack;
-				}
-
-				/// advance to the start of the next sequence
-				haystack += utf8_seq_length(*haystack);
-			}
-
-			return haystack_end;
-		}
-	};
-
-public:
-	using ResultType = UInt64;
-
-	static void vector(
-		const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets, const std::string & needle,
-		PODArray<UInt64> & res)
-	{
-		const CaseInsensitiveSearcher searcher{needle};
-
-		const UInt8 * begin = &data[0];
-		const UInt8 * pos = begin;
-		const UInt8 * end = pos + data.size();
-
-		/// Текущий индекс в массиве строк.
-		size_t i = 0;
-
-		/// Искать будем следующее вхождение сразу во всех строках.
-		while (pos < end && end != (pos = searcher.find(pos, end)))
-		{
-			/// Определим, к какому индексу оно относится.
-			while (begin + offsets[i] < pos)
-			{
-				res[i] = 0;
-				++i;
-			}
-
-			/// Проверяем, что вхождение не переходит через границы строк.
-			if (pos + needle.size() < begin + offsets[i])
-			{
-				/// А теперь надо найти, сколько кодовых точек находится перед pos.
-				res[i] = 1;
-				for (const UInt8 * c = begin + (i != 0 ? offsets[i - 1] : 0); c < pos; ++c)
-					if (!utf8_is_continuation_octet(*c))
-						++res[i];
-			}
-			else
-				res[i] = 0;
-
-			pos = begin + offsets[i];
-			++i;
-		}
-
-		memset(&res[i], 0, (res.size() - i) * sizeof(res[0]));
-	}
-
-	static void constant(std::string data, std::string needle, UInt64 & res)
-	{
-		static const Poco::UTF8Encoding utf8;
-
-		auto data_pos = reinterpret_cast<UInt8 *>(&data[0]);
-		const auto data_end = data_pos + data.size();
-		while (data_pos < data_end)
-		{
-			const auto len = utf8.convert(Poco::Unicode::toLower(utf8.convert(data_pos)), data_pos, data_end - data_pos);
-			data_pos += len;
-		}
-
-		auto needle_pos = reinterpret_cast<UInt8 *>(&needle[0]);
-		const auto needle_end = needle_pos + needle.size();
-		while (needle_pos < needle_end)
-		{
-			const auto len = utf8.convert(Poco::Unicode::toLower(utf8.convert(needle_pos)), needle_pos, needle_end - needle_pos);
-			needle_pos += len;
-		}
-
-		const auto pos = data.find(needle);
-		if (pos != std::string::npos)
-		{
-			/// А теперь надо найти, сколько кодовых точек находится перед pos.
-			res = 1;
-			for (const auto i : ext::range(0, pos))
-				if (!utf8_is_continuation_octet(static_cast<UInt8>(data[i])))
-					++res;
-		}
-		else
-			res = 0;
-	}
-};
-
-
-
 /// Переводит выражение LIKE в regexp re2. Например, abc%def -> ^abc.*def$
 inline String likePatternToRegexp(const String & pattern)
 {
@ -1761,12 +1297,8 @@ public:

 struct NamePosition 					{ static constexpr auto name = "position"; };
 struct NamePositionUTF8					{ static constexpr auto name = "positionUTF8"; };
-struct NamePositionSSE 					{ static constexpr auto name = "positionSSE"; };
-struct NamePositionUTF8SSE					{ static constexpr auto name = "positionUTF8SSE"; };
 struct NamePositionCaseInsensitive 		{ static constexpr auto name = "positionCaseInsensitive"; };
 struct NamePositionCaseInsensitiveUTF8	{ static constexpr auto name = "positionCaseInsensitiveUTF8"; };
-struct NamePositionCaseInsensitiveVolnitsky 		{ static constexpr auto name = "positionCaseInsensitiveVolnitsky"; };
-struct NamePositionCaseInsensitiveUTF8Volnitsky	{ static constexpr auto name = "positionCaseInsensitiveUTF8Volnitsky"; };
 struct NameMatch						{ static constexpr auto name = "match"; };
 struct NameLike							{ static constexpr auto name = "like"; };
 struct NameNotLike						{ static constexpr auto name = "notLike"; };
@ -1778,12 +1310,8 @@ struct NameReplaceRegexpAll				{ static constexpr auto name = "replaceRegexpAll"

 typedef FunctionsStringSearch<PositionImpl<true>, 				NamePosition> 						FunctionPosition;
 typedef FunctionsStringSearch<PositionUTF8Impl<true>, 			NamePositionUTF8> 					FunctionPositionUTF8;
-typedef FunctionsStringSearch<PositionImpl<true, true>, 				NamePositionSSE> 						FunctionPositionSSE;
-typedef FunctionsStringSearch<PositionUTF8Impl<true, true>, 			NamePositionUTF8SSE> 					FunctionPositionUTF8SSE;
 typedef FunctionsStringSearch<PositionCaseInsensitiveImpl,		NamePositionCaseInsensitive> 		FunctionPositionCaseInsensitive;
-typedef FunctionsStringSearch<PositionCaseInsensitiveUTF8Impl,	NamePositionCaseInsensitiveUTF8>	FunctionPositionCaseInsensitiveUTF8;
-typedef FunctionsStringSearch<PositionImpl<false>,				NamePositionCaseInsensitiveVolnitsky> 		FunctionPositionCaseInsensitiveVolnitsky;
-typedef FunctionsStringSearch<PositionUTF8Impl<false>,			NamePositionCaseInsensitiveUTF8Volnitsky>	FunctionPositionCaseInsensitiveUTF8Volnitsky;
+typedef FunctionsStringSearch<PositionUTF8Impl<false>,			NamePositionCaseInsensitiveUTF8>	FunctionPositionCaseInsensitiveUTF8;

 typedef FunctionsStringSearch<MatchImpl<false>, 				NameMatch> 							FunctionMatch;
 typedef FunctionsStringSearch<MatchImpl<true>, 					NameLike> 							FunctionLike;
--- a/dbms/src/Functions/FunctionsStringSearch.cpp
+++ b/dbms/src/Functions/FunctionsStringSearch.cpp
@ -12,12 +12,8 @@ void registerFunctionsStringSearch(FunctionFactory & factory)
 	factory.registerFunction<FunctionReplaceRegexpAll>();
 	factory.registerFunction<FunctionPosition>();
 	factory.registerFunction<FunctionPositionUTF8>();
-	factory.registerFunction<FunctionPositionSSE>();
-	factory.registerFunction<FunctionPositionUTF8SSE>();
 	factory.registerFunction<FunctionPositionCaseInsensitive>();
 	factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>();
-	factory.registerFunction<FunctionPositionCaseInsensitiveVolnitsky>();
-	factory.registerFunction<FunctionPositionCaseInsensitiveUTF8Volnitsky>();
 	factory.registerFunction<FunctionMatch>();
 	factory.registerFunction<FunctionLike>();
 	factory.registerFunction<FunctionNotLike>();