MultiVolnitsky added with tests and some benchmark, many multiFunctions are added to support multistring search

2024-09-20 08:40:50 +00:00 · 2019-01-14 18:54:47 +03:00 · 2019-01-14 18:54:47 +03:00 · a5669b6b7a
commit a5669b6b7a
parent 72df7ceee6
6 changed files with 24802 additions and 442 deletions
--- a/dbms/src/Common/Volnitsky.h
+++ b/dbms/src/Common/Volnitsky.h
@ -1,15 +1,19 @@
 #pragma once

-#include <Common/StringSearcher.h>
-#include <Common/StringUtils/StringUtils.h>
+#include <vector>
+#include <stdint.h>
+#include <string.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
 #include <Core/Types.h>
 #include <Poco/UTF8Encoding.h>
 #include <Poco/Unicode.h>
+#include <Common/StringSearcher.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <common/StringRef.h>
 #include <common/unaligned.h>
 #include <ext/range.h>
-#include <stdint.h>
-#include <string.h>
-

 /** Search for a substring in a string by Volnitsky's algorithm
  * http://volnitsky.com/project/str_search/
@ -28,117 +32,38 @@
  * - if it did not match, we check the next cell of the hash table from the collision resolution chain;
  * - if not found, skip to haystack almost the size of the needle bytes;
  *
-  * Unaligned memory access is used.
+  * MultiVersion:
+  * - Add bigrams to hash table with string index. Then the usual Volnitsky search is used.
+  * - We are adding while searching, limiting the number of fallback searchers and the total number of added bigrams
  */


 namespace DB
 {
-
-
-/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
-template <typename CRTP>
-class VolnitskyBase
+namespace VolnitskyTraits
 {
-protected:
-    using Offset = UInt8;    /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
-    using Ngram = UInt16;    /// n-gram (2 bytes).
+    using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
+    using Id = UInt8; /// Index of the string, must not be greater than 255.
+    using Ngram = UInt16; /// n-gram (2 bytes).

-    const UInt8 * const needle;
-    const size_t needle_size;
-    const UInt8 * const needle_end = needle + needle_size;
-    /// For how long we move, if the n-gram from haystack is not found in the hash table.
-    const size_t step = needle_size - sizeof(Ngram) + 1;
-
-    /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
-      *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
-    static const size_t hash_size = 64 * 1024;    /// Fits into the L2 cache (of common Intel CPUs).
-    Offset hash[hash_size];    /// Hash table.
+    /** Fits into the L2 cache (of common Intel CPUs).
+     * This number is extremely good for compilers as it is numeric_limits<Uint16>::max() and there are optimizations with movzwl and other instructions with 2 bytes
+     */
+    static constexpr size_t hash_size = 64 * 1024;

    /// min haystack size to use main algorithm instead of fallback
-    static constexpr auto min_haystack_size_for_algorithm = 20000;
-    const bool fallback; /// Do we need to use the fallback algorithm.
+    static constexpr size_t min_haystack_size_for_algorithm = 20000;

-public:
-    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
-      * If you specify it small enough, the fallback algorithm will be used,
-      *  since it is considered that it's useless to waste time initializing the hash table.
-      */
-    VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
-    : needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
-      fallback{
-          needle_size < 2 * sizeof(Ngram)
-          || needle_size >= std::numeric_limits<Offset>::max()
-          || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
+    static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
    {
-        if (fallback)
-            return;
-
-        memset(hash, 0, sizeof(hash));
-
-        /// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
-        for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
-            self().putNGram(this->needle + i, i + 1, this->needle);
+        return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
+            || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
    }

+    static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }

-    /// If not found, the end of the haystack is returned.
-    const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
-    {
-        if (needle_size == 0)
-            return haystack;
-
-        const auto haystack_end = haystack + haystack_size;
-
-        if (needle_size == 1 || fallback || haystack_size <= needle_size)
-            return self().search_fallback(haystack, haystack_end);
-
-        /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
-        const auto * pos = haystack + needle_size - sizeof(Ngram);
-        for (; pos <= haystack_end - needle_size; pos += step)
-        {
-            /// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
-            for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
-                 cell_num = (cell_num + 1) % hash_size)
-            {
-                /// When found - compare bytewise, using the offset from the hash table.
-                const auto res = pos - (hash[cell_num] - 1);
-
-                if (self().compare(res))
-                    return res;
-            }
-        }
-
-        /// The remaining tail.
-        return self().search_fallback(pos - step + 1, haystack_end);
-    }
-
-    const char * search(const char * haystack, size_t haystack_size) const
-    {
-        return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
-    }
-
-protected:
-    CRTP & self() { return static_cast<CRTP &>(*this); }
-    const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }
-
-    static Ngram toNGram(const UInt8 * const pos)
-    {
-        return unalignedLoad<Ngram>(pos);
-    }
-
-    void putNGramBase(const Ngram ngram, const int offset)
-    {
-        /// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
-        size_t cell_num = ngram % hash_size;
-
-        while (hash[cell_num])
-            cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.
-
-        hash[cell_num] = offset;
-    }
-
-    void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset)
+    template <class Callback>
+    static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
    {
        struct Chars
        {
@ -186,274 +111,554 @@ protected:
            /// 1 combination: 01
            putNGramBase(n, offset);
    }
-};

-
-template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl;
-
-/// Case sensitive comparison
-template <bool ASCII> struct VolnitskyImpl<true, ASCII> : VolnitskyBase<VolnitskyImpl<true, ASCII>>
-{
-    VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
-        : VolnitskyBase<VolnitskyImpl<true, ASCII>>{needle_, needle_size_, haystack_size_hint},
-          fallback_searcher{needle_, needle_size_}
+    template <bool CaseSensitive, bool ASCII, class Callback>
+    static inline void
+    putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
    {
-    }
-
-    void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
-    {
-        this->putNGramBase(this->toNGram(pos), offset);
-    }
-
-    bool compare(const UInt8 * const pos) const
-    {
-        /// @todo: maybe just use memcmp for this case and rely on internal SSE optimization as in case with memcpy?
-        return fallback_searcher.compare(pos);
-    }
-
-    const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
-    {
-        return fallback_searcher.search(haystack, haystack_end);
-    }
-
-    ASCIICaseSensitiveStringSearcher fallback_searcher;
-};
-
-/// Case-insensitive ASCII
-template <> struct VolnitskyImpl<false, true> : VolnitskyBase<VolnitskyImpl<false, true>>
-{
-    VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
-        : VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
-    {
-    }
-
-    void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
-    {
-        putNGramASCIICaseInsensitive(pos, offset);
-    }
-
-    bool compare(const UInt8 * const pos) const
-    {
-        return fallback_searcher.compare(pos);
-    }
-
-    const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
-    {
-        return fallback_searcher.search(haystack, haystack_end);
-    }
-
-    ASCIICaseInsensitiveStringSearcher fallback_searcher;
-};
-
-/// Case-sensitive UTF-8
-template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<false, false>>
-{
-    VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
-        : VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
-    {
-    }
-
-    void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const begin)
-    {
-        struct Chars
-        {
-            UInt8 c0;
-            UInt8 c1;
-        };
-
-        union
-        {
-            Ngram n;
-            Chars chars;
-        };
-
-        n = toNGram(pos);
-
-        if (isascii(chars.c0) && isascii(chars.c1))
-        {
-            putNGramASCIICaseInsensitive(pos, offset);
-        }
+        if constexpr (CaseSensitive)
+            putNGramBase(toNGram(pos), offset);
        else
        {
-            /** n-gram (in the case of n = 2)
-              *  can be entirely located within one code point,
-              *  or intersect with two code points.
-              *
-              * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
-              *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
-              *
-              * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
-              *  as well as composition / decomposition and other features.
-              *
-              * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
-              */
-
-            using Seq = UInt8[6];
-
-            static const Poco::UTF8Encoding utf8;
-
-            if (UTF8::isContinuationOctet(chars.c1))
+            if constexpr (ASCII)
+                putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
+            else
            {
-                /// ngram is inside a sequence
-                auto seq_pos = pos;
-                UTF8::syncBackward(seq_pos, begin);
+                struct Chars
+                {
+                    UInt8 c0;
+                    UInt8 c1;
+                };

-                const auto u32 = utf8.convert(seq_pos);
-                const auto l_u32 = Poco::Unicode::toLower(u32);
-                const auto u_u32 = Poco::Unicode::toUpper(u32);
+                union
+                {
+                    VolnitskyTraits::Ngram n;
+                    Chars chars;
+                };

-                /// symbol is case-independent
-                if (l_u32 == u_u32)
-                    putNGramBase(n, offset);
+                n = toNGram(pos);
+
+                if (isascii(chars.c0) && isascii(chars.c1))
+                    putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
                else
                {
-                    /// where is the given ngram in respect to the start of UTF-8 sequence?
-                    const auto seq_ngram_offset = pos - seq_pos;
+                    /** n-gram (in the case of n = 2)
+                      *  can be entirely located within one code point,
+                      *  or intersect with two code points.
+                      *
+                      * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
+                      *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
+                      *
+                      * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
+                      *  as well as composition / decomposition and other features.
+                      *
+                      * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
+                      */

-                    Seq seq;
+                    using Seq = UInt8[6];

-                    /// put ngram for lowercase
-                    utf8.convert(l_u32, seq, sizeof(seq));
-                    chars.c0 = seq[seq_ngram_offset];
-                    chars.c1 = seq[seq_ngram_offset + 1];
-                    putNGramBase(n, offset);
+                    static const Poco::UTF8Encoding utf8;

-                    /// put ngram for uppercase
-                    utf8.convert(u_u32, seq, sizeof(seq));
-                    chars.c0 = seq[seq_ngram_offset];
-                    chars.c1 = seq[seq_ngram_offset + 1];
-                    putNGramBase(n, offset);
+                    if (UTF8::isContinuationOctet(chars.c1))
+                    {
+                        /// ngram is inside a sequence
+                        auto seq_pos = pos;
+                        UTF8::syncBackward(seq_pos, begin);
+
+                        const auto u32 = utf8.convert(seq_pos);
+                        const auto l_u32 = Poco::Unicode::toLower(u32);
+                        const auto u_u32 = Poco::Unicode::toUpper(u32);
+
+                        /// symbol is case-independent
+                        if (l_u32 == u_u32)
+                            putNGramBase(n, offset);
+                        else
+                        {
+                            /// where is the given ngram in respect to the start of UTF-8 sequence?
+                            const auto seq_ngram_offset = pos - seq_pos;
+
+                            Seq seq;
+
+                            /// put ngram for lowercase
+                            utf8.convert(l_u32, seq, sizeof(seq));
+                            chars.c0 = seq[seq_ngram_offset];
+                            chars.c1 = seq[seq_ngram_offset + 1];
+                            putNGramBase(n, offset);
+
+                            /// put ngram for uppercase
+                            utf8.convert(u_u32, seq, sizeof(seq));
+                            chars.c0 = seq[seq_ngram_offset];
+                            chars.c1 = seq[seq_ngram_offset + 1];
+                            putNGramBase(n, offset);
+                        }
+                    }
+                    else
+                    {
+                        /// ngram is on the boundary of two sequences
+                        /// first sequence may start before u_pos if it is not ASCII
+                        auto first_seq_pos = pos;
+                        UTF8::syncBackward(first_seq_pos, begin);
+                        /// where is the given ngram in respect to the start of first UTF-8 sequence?
+                        const auto seq_ngram_offset = pos - first_seq_pos;
+
+                        const auto first_u32 = utf8.convert(first_seq_pos);
+                        const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
+                        const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+
+                        /// second sequence always start immediately after u_pos
+                        auto second_seq_pos = pos + 1;
+
+                        const auto second_u32 = utf8.convert(second_seq_pos); /// TODO This assumes valid UTF-8 or zero byte after needle.
+                        const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
+                        const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
+
+                        /// both symbols are case-independent
+                        if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
+                        {
+                            putNGramBase(n, offset);
+                        }
+                        else if (first_l_u32 == first_u_u32)
+                        {
+                            /// first symbol is case-independent
+                            Seq seq;
+
+                            /// put ngram for lowercase
+                            utf8.convert(second_l_u32, seq, sizeof(seq));
+                            chars.c1 = seq[0];
+                            putNGramBase(n, offset);
+
+                            /// put ngram from uppercase, if it is different
+                            utf8.convert(second_u_u32, seq, sizeof(seq));
+                            if (chars.c1 != seq[0])
+                            {
+                                chars.c1 = seq[0];
+                                putNGramBase(n, offset);
+                            }
+                        }
+                        else if (second_l_u32 == second_u_u32)
+                        {
+                            /// second symbol is case-independent
+                            Seq seq;
+
+                            /// put ngram for lowercase
+                            utf8.convert(first_l_u32, seq, sizeof(seq));
+                            chars.c0 = seq[seq_ngram_offset];
+                            putNGramBase(n, offset);
+
+                            /// put ngram for uppercase, if it is different
+                            utf8.convert(first_u_u32, seq, sizeof(seq));
+                            if (chars.c0 != seq[seq_ngram_offset])
+                            {
+                                chars.c0 = seq[seq_ngram_offset];
+                                putNGramBase(n, offset);
+                            }
+                        }
+                        else
+                        {
+                            Seq first_l_seq;
+                            Seq first_u_seq;
+                            Seq second_l_seq;
+                            Seq second_u_seq;
+
+                            utf8.convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
+                            utf8.convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
+                            utf8.convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
+                            utf8.convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
+
+                            auto c0l = first_l_seq[seq_ngram_offset];
+                            auto c0u = first_u_seq[seq_ngram_offset];
+                            auto c1l = second_l_seq[0];
+                            auto c1u = second_u_seq[0];
+
+                            /// ngram for ll
+                            chars.c0 = c0l;
+                            chars.c1 = c1l;
+                            putNGramBase(n, offset);
+
+                            if (c0l != c0u)
+                            {
+                                /// ngram for Ul
+                                chars.c0 = c0u;
+                                chars.c1 = c1l;
+                                putNGramBase(n, offset);
+                            }
+
+                            if (c1l != c1u)
+                            {
+                                /// ngram for lU
+                                chars.c0 = c0l;
+                                chars.c1 = c1u;
+                                putNGramBase(n, offset);
+                            }
+
+                            if (c0l != c0u && c1l != c1u)
+                            {
+                                /// ngram for UU
+                                chars.c0 = c0u;
+                                chars.c1 = c1u;
+                                putNGramBase(n, offset);
+                            }
+                        }
+                    }
                }
            }
+        }
+    }
+
+} // namespace VolnitskyTraits
+
+template <bool CaseSensitive, bool ASCII, class FallbackSearcher>
+class MultiVolnitskyBase
+{
+private:
+    /// needles and their offsets
+    const std::vector<String> & needles;
+
+
+    /// fallback searchers
+    std::vector<size_t> fallback_needles;
+    std::vector<FallbackSearcher> fallback_searchers;
+
+    /// because std::pair<> is not POD
+    struct OffsetId
+    {
+        VolnitskyTraits::Id id;
+        VolnitskyTraits::Offset off;
+    };
+
+    OffsetId hash[VolnitskyTraits::hash_size];
+
+    /// step for each bunch of strings
+    size_t step;
+
+    /// last index of offsets that was not processed
+    size_t last;
+
+    /// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half
+    static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8;
+
+public:
+    MultiVolnitskyBase(const std::vector<String> & needles_) : needles{needles_}, step{0}, last{0}
+    {
+        fallback_searchers.reserve(needles.size());
+    }
+
+    /// returns vector of the positions
+    std::vector<const char *> search_all(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets)
+    {
+        const size_t haystack_string_size = haystack_offsets.size();
+        const size_t needles_size = needles.size();
+        std::vector<const char *> ans(needles_size * haystack_string_size, nullptr);
+        while (!reset())
+        {
+            size_t fallback_size = fallback_needles.size();
+            size_t prev_offset = 0;
+            for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size)
+            {
+                const auto * haystack = &haystack_data[prev_offset];
+                const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
+                for (size_t i = 0; i < fallback_size; ++i)
+                {
+                    const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end);
+                    if (ptr != haystack_end)
+                        ans[from + fallback_needles[i]] = reinterpret_cast<const char *>(ptr);
+                }
+
+                /// check if we have one non empty volnitsky searcher
+                if (step != std::numeric_limits<size_t>::max())
+                {
+                    const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
+                    for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
+                    {
+                        for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
+                             cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
+                        {
+                            if (pos >= haystack + hash[cell_num].off - 1)
+                            {
+                                const auto res = pos - (hash[cell_num].off - 1);
+                                const size_t ind = hash[cell_num].id;
+                                if (!ans[from + ind] && res + needles[ind].size() <= haystack_end)
+                                {
+                                    if (fallback_searchers[ind].compare(res))
+                                    {
+                                        ans[from + ind] = reinterpret_cast<const char *>(res);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                prev_offset = haystack_offsets[j];
+            }
+        }
+        return ans;
+    }
+
+    std::vector<char> search(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets)
+    {
+        const size_t haystack_string_size = haystack_offsets.size();
+        std::vector<char> ans(haystack_string_size, 0);
+        while (!reset())
+        {
+            size_t prev_offset = 0;
+            for (size_t j = 0; j < haystack_string_size; ++j)
+            {
+                const auto * haystack = &haystack_data[prev_offset];
+                const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
+                ans[j] = search_one(haystack, haystack_end);
+                prev_offset = haystack_offsets[j];
+            }
+        }
+        return ans;
+    }
+
+    std::vector<size_t> search_index(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets)
+    {
+        const size_t haystack_string_size = haystack_offsets.size();
+        std::vector<size_t> ans(haystack_string_size, 0);
+        while (!reset())
+        {
+            size_t prev_offset = 0;
+            for (size_t j = 0; j < haystack_string_size; ++j)
+            {
+                const auto * haystack = &haystack_data[prev_offset];
+                const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
+                ans[j] = search_one_index(haystack, haystack_end);
+                prev_offset = haystack_offsets[j];
+            }
+        }
+        return ans;
+    }
+
+private:
+    bool reset()
+    {
+        if (last == needles.size())
+            return true;
+
+        memset(hash, 0, sizeof(hash));
+        fallback_needles.clear();
+        step = std::numeric_limits<size_t>::max();
+
+        size_t buf = 0;
+        size_t size = needles.size();
+
+        for (; last < size; ++last)
+        {
+            const char * cur_needle_data = needles[last].data();
+            const size_t cur_needle_size = needles[last].size();
+
+            /// save the indices of fallback searchers
+            if (VolnitskyTraits::isFallbackNeedle(cur_needle_size))
+            {
+                fallback_needles.push_back(last);
+            }
            else
            {
-                /// ngram is on the boundary of two sequences
-                /// first sequence may start before u_pos if it is not ASCII
-                auto first_seq_pos = pos;
-                UTF8::syncBackward(first_seq_pos, begin);
-                /// where is the given ngram in respect to the start of first UTF-8 sequence?
-                const auto seq_ngram_offset = pos - first_seq_pos;
+                /// put all bigrams
+                auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) {
+                    return this->putNGramBase(ngram, offset, this->last);
+                };

-                const auto first_u32 = utf8.convert(first_seq_pos);
-                const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
-                const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
+                buf += cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1;

-                /// second sequence always start immediately after u_pos
-                auto second_seq_pos = pos + 1;
-
-                const auto second_u32 = utf8.convert(second_seq_pos);    /// TODO This assumes valid UTF-8 or zero byte after needle.
-                const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
-                const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
-
-                /// both symbols are case-independent
-                if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
+                if (buf > small_limit)
                {
-                    putNGramBase(n, offset);
+                    break;
                }
-                else if (first_l_u32 == first_u_u32)
+
+                step = std::min(step, cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1);
+                for (auto i = static_cast<int>(cur_needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
                {
-                    /// first symbol is case-independent
-                    Seq seq;
-
-                    /// put ngram for lowercase
-                    utf8.convert(second_l_u32, seq, sizeof(seq));
-                    chars.c1 = seq[0];
-                    putNGramBase(n, offset);
-
-                    /// put ngram from uppercase, if it is different
-                    utf8.convert(second_u_u32, seq, sizeof(seq));
-                    if (chars.c1 != seq[0])
-                    {
-                        chars.c1 = seq[0];
-                        putNGramBase(n, offset);
-                    }
+                    VolnitskyTraits::putNGram<CaseSensitive, ASCII>(
+                        reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
+                        i + 1,
+                        reinterpret_cast<const UInt8 *>(cur_needle_data),
+                        callback);
                }
-                else if (second_l_u32 == second_u_u32)
+            }
+            fallback_searchers.emplace_back(cur_needle_data, cur_needle_size);
+        }
+        return false;
+    }
+
+    inline bool search_one(const UInt8 * haystack, const UInt8 * haystack_end)
+    {
+        const size_t fallback_size = fallback_needles.size();
+        for (size_t i = 0; i < fallback_size; ++i)
+            if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
+                return true;
+
+        /// check if we have one non empty volnitsky searcher
+        if (step != std::numeric_limits<size_t>::max())
+        {
+            const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
+            for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
+            {
+                for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
+                     cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
                {
-                    /// second symbol is case-independent
-                    Seq seq;
-
-                    /// put ngram for lowercase
-                    utf8.convert(first_l_u32, seq, sizeof(seq));
-                    chars.c0 = seq[seq_ngram_offset];
-                    putNGramBase(n, offset);
-
-                    /// put ngram for uppercase, if it is different
-                    utf8.convert(first_u_u32, seq, sizeof(seq));
-                    if (chars.c0 != seq[seq_ngram_offset])
+                    if (pos >= haystack + hash[cell_num].off - 1)
                    {
-                        chars.c0 = seq[seq_ngram_offset];
-                        putNGramBase(n, offset);
-                    }
-                }
-                else
-                {
-                    Seq first_l_seq;
-                    Seq first_u_seq;
-                    Seq second_l_seq;
-                    Seq second_u_seq;
-
-                    utf8.convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
-                    utf8.convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
-                    utf8.convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
-                    utf8.convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
-
-                    auto c0l = first_l_seq[seq_ngram_offset];
-                    auto c0u = first_u_seq[seq_ngram_offset];
-                    auto c1l = second_l_seq[0];
-                    auto c1u = second_u_seq[0];
-
-                    /// ngram for ll
-                    chars.c0 = c0l;
-                    chars.c1 = c1l;
-                    putNGramBase(n, offset);
-
-                    if (c0l != c0u)
-                    {
-                        /// ngram for Ul
-                        chars.c0 = c0u;
-                        chars.c1 = c1l;
-                        putNGramBase(n, offset);
-                    }
-
-                    if (c1l != c1u)
-                    {
-                        /// ngram for lU
-                        chars.c0 = c0l;
-                        chars.c1 = c1u;
-                        putNGramBase(n, offset);
-                    }
-
-                    if (c0l != c0u && c1l != c1u)
-                    {
-                        /// ngram for UU
-                        chars.c0 = c0u;
-                        chars.c1 = c1u;
-                        putNGramBase(n, offset);
+                        const auto res = pos - (hash[cell_num].off - 1);
+                        const size_t ind = hash[cell_num].id;
+                        if (res + needles[ind].size() <= haystack_end && fallback_searchers[ind].compare(res))
+                            return true;
                    }
                }
            }
        }
+        return false;
    }

-    bool compare(const UInt8 * const pos) const
+    inline size_t search_one_index(const UInt8 * haystack, const UInt8 * haystack_end)
    {
-        return fallback_searcher.compare(pos);
+        const size_t fallback_size = fallback_needles.size();
+
+        size_t ans = std::numeric_limits<size_t>::max();
+
+        for (size_t i = 0; i < fallback_size; ++i)
+            if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
+                ans = std::min(ans, fallback_needles[i]);
+
+        /// check if we have one non empty volnitsky searcher
+        if (step != std::numeric_limits<size_t>::max())
+        {
+            const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
+            for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
+            {
+                for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
+                     cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
+                {
+                    if (pos >= haystack + hash[cell_num].off - 1)
+                    {
+                        const auto res = pos - (hash[cell_num].off - 1);
+                        const size_t ind = hash[cell_num].id;
+                        if (res + needles[ind].size() <= haystack_end && fallback_searchers[ind].compare(res))
+                            ans = std::min(ans, ind);
+                    }
+                }
+            }
+        }
+
+        /*
+        * NOTE!!! if nothing was found, ans + 1 will be equal to zero and we can
+        * std::copy it into the result because we need to return the position starting with one
+        */
+        return ans + 1;
    }

-    const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
+    void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num)
    {
-        return fallback_searcher.search(haystack, haystack_end);
-    }
+        size_t cell_num = ngram % VolnitskyTraits::hash_size;

-    UTF8CaseInsensitiveStringSearcher fallback_searcher;
+        while (hash[cell_num].off)
+            cell_num = (cell_num + 1) % VolnitskyTraits::hash_size;
+
+        hash[cell_num] = {static_cast<VolnitskyTraits::Id>(num), static_cast<VolnitskyTraits::Offset>(offset)};
+    }
 };

+/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
+template <bool CaseSensitive, bool ASCII, class FallbackSearcher>
+class VolnitskyBase
+{
+protected:
+    const UInt8 * const needle;
+    const size_t needle_size;
+    const UInt8 * const needle_end = needle + needle_size;
+    /// For how long we move, if the n-gram from haystack is not found in the hash table.
+    const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;

-using Volnitsky = VolnitskyImpl<true, true>;
-using VolnitskyUTF8 = VolnitskyImpl<true, false>;    /// exactly same as Volnitsky
-using VolnitskyCaseInsensitive = VolnitskyImpl<false, true>;    /// ignores non-ASCII bytes
-using VolnitskyCaseInsensitiveUTF8 = VolnitskyImpl<false, false>;
+    /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
+      *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
+    VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table.
+
+    const bool fallback; /// Do we need to use the fallback algorithm.
+
+    FallbackSearcher fallback_searcher;
+
+public:
+    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
+      * If you specify it small enough, the fallback algorithm will be used,
+      *  since it is considered that it's useless to waste time initializing the hash table.
+      */
+    VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
+        : needle{reinterpret_cast<const UInt8 *>(needle)}
+        , needle_size{needle_size}
+        , fallback{VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)}
+        , fallback_searcher{needle, needle_size}
+    {
+        if (fallback)
+            return;
+
+        memset(hash, 0, sizeof(hash));
+
+        auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
+        /// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
+        for (auto i = static_cast<int>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
+            VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
+    }
+
+
+    /// If not found, the end of the haystack is returned.
+    const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
+    {
+        if (needle_size == 0)
+            return haystack;
+
+        const auto haystack_end = haystack + haystack_size;
+
+        if (fallback || haystack_size <= needle_size)
+            return fallback_searcher.search(haystack, haystack_end);
+
+        /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
+        const auto * pos = haystack + needle_size - sizeof(VolnitskyTraits::Ngram);
+        for (; pos <= haystack_end - needle_size; pos += step)
+        {
+            /// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
+            for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num];
+                 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
+            {
+                /// When found - compare bytewise, using the offset from the hash table.
+                const auto res = pos - (hash[cell_num] - 1);
+
+                /// pointer in the code is always padded array so we can use pagesafe semantics
+                if (fallback_searcher.compare(res))
+                    return res;
+            }
+        }
+
+        return fallback_searcher.search(pos - step + 1, haystack_end);
+    }
+
+    const char * search(const char * haystack, size_t haystack_size) const
+    {
+        return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
+    }
+
+protected:
+    void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset)
+    {
+        /// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
+        size_t cell_num = ngram % VolnitskyTraits::hash_size;
+
+        while (hash[cell_num])
+            cell_num = (cell_num + 1) % VolnitskyTraits::hash_size; /// Search for the next free cell.
+
+        hash[cell_num] = offset;
+    }
+};
+
+using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
+using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
+using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
+using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
+
+using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
+using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
+using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
+using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;


 }
--- a/dbms/src/Functions/FunctionsStringSearch.cpp
+++ b/dbms/src/Functions/FunctionsStringSearch.cpp
@ -1,28 +1,28 @@
 #include <Functions/FunctionsStringSearch.h>

-#include <memory>
-#include <mutex>
-#include <Poco/UTF8String.h>
 #include <Columns/ColumnFixedString.h>
-#include <Common/Volnitsky.h>
+#include <Common/config.h>
+
 #include <DataTypes/DataTypeFixedString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/Regexps.h>
 #include <IO/WriteHelpers.h>
-#include <Common/config.h>
 #include <re2/re2.h>
 #include <re2/stringpiece.h>
+#include <Poco/UTF8String.h>
+#include <Common/Volnitsky.h>
+
+#include <algorithm>
+#include <memory>

 #if USE_RE2_ST
-    #include <re2_st/re2.h> // Y_IGNORE
+#    include <re2_st/re2.h> // Y_IGNORE
 #else
-    #define re2_st re2
+#    define re2_st re2
 #endif

-
 namespace DB
 {
-
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
@ -35,7 +35,10 @@ namespace ErrorCodes
 struct PositionCaseSensitiveASCII
 {
    /// For searching single substring inside big-enough contiguous chunk of data. Coluld have slightly expensive initialization.
-    using SearcherInBigHaystack = VolnitskyImpl<true, true>;
+    using SearcherInBigHaystack = Volnitsky;
+
+    /// For search many substrings in one string
+    using MultiSearcherInBigHaystack = MultiVolnitsky;

    /// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization.
    using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
@ -50,23 +53,24 @@ struct PositionCaseSensitiveASCII
        return SearcherInSmallHaystack(needle_data, needle_size);
    }

-    /// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
-    static size_t countChars(const char * begin, const char * end)
+    static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<String> & needles)
    {
-        return end - begin;
+        return MultiSearcherInBigHaystack(needles);
    }

+    /// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
+    static size_t countChars(const char * begin, const char * end) { return end - begin; }
+
    /// Convert string to lowercase. Only for case-insensitive search.
    /// Implementation is permitted to be inefficient because it is called for single string.
-    static void toLowerIfNeed(std::string &)
-    {
-    }
+    static void toLowerIfNeed(std::string &) {}
 };

 struct PositionCaseInsensitiveASCII
 {
    /// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it.
    using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher;
+    using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive;
    using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher;

    static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/)
@ -79,20 +83,20 @@ struct PositionCaseInsensitiveASCII
        return SearcherInSmallHaystack(needle_data, needle_size);
    }

-    static size_t countChars(const char * begin, const char * end)
+    static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<String> & needles)
    {
-        return end - begin;
+        return MultiSearcherInBigHaystack(needles);
    }

-    static void toLowerIfNeed(std::string & s)
-    {
-        std::transform(std::begin(s), std::end(s), std::begin(s), tolower);
-    }
+    static size_t countChars(const char * begin, const char * end) { return end - begin; }
+
+    static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
 };

 struct PositionCaseSensitiveUTF8
 {
-    using SearcherInBigHaystack = VolnitskyImpl<true, false>;
+    using SearcherInBigHaystack = VolnitskyUTF8;
+    using MultiSearcherInBigHaystack = MultiVolnitskyUTF8;
    using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;

    static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
@ -105,6 +109,11 @@ struct PositionCaseSensitiveUTF8
        return SearcherInSmallHaystack(needle_data, needle_size);
    }

+    static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<String> & needles)
+    {
+        return MultiSearcherInBigHaystack(needles);
+    }
+
    static size_t countChars(const char * begin, const char * end)
    {
        size_t res = 0;
@ -114,14 +123,13 @@ struct PositionCaseSensitiveUTF8
        return res;
    }

-    static void toLowerIfNeed(std::string &)
-    {
-    }
+    static void toLowerIfNeed(std::string &) {}
 };

 struct PositionCaseInsensitiveUTF8
 {
-    using SearcherInBigHaystack = VolnitskyImpl<false, false>;
+    using SearcherInBigHaystack = VolnitskyCaseInsensitiveUTF8;
+    using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitiveUTF8;
    using SearcherInSmallHaystack = UTF8CaseInsensitiveStringSearcher; /// TODO Very suboptimal.

    static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
@ -134,6 +142,11 @@ struct PositionCaseInsensitiveUTF8
        return SearcherInSmallHaystack(needle_data, needle_size);
    }

+    static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<String> & needles)
+    {
+        return MultiSearcherInBigHaystack(needles);
+    }
+
    static size_t countChars(const char * begin, const char * end)
    {
        size_t res = 0;
@ -143,10 +156,7 @@ struct PositionCaseInsensitiveUTF8
        return res;
    }

-    static void toLowerIfNeed(std::string & s)
-    {
-        Poco::UTF8::toLowerInPlace(s);
-    }
+    static void toLowerIfNeed(std::string & s) { Poco::UTF8::toLowerInPlace(s); }
 };

 template <typename Impl>
@ -155,10 +165,8 @@ struct PositionImpl
    using ResultType = UInt64;

    /// Find one substring in many strings.
-    static void vector_constant(const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        const std::string & needle,
-        PaddedPODArray<UInt64> & res)
+    static void vector_constant(
+        const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray<UInt64> & res)
    {
        const UInt8 * begin = data.data();
        const UInt8 * pos = begin;
@ -210,7 +218,8 @@ struct PositionImpl
    }

    /// Search each time for a different single substring inside each time different string.
-    static void vector_vector(const ColumnString::Chars & haystack_data,
+    static void vector_vector(
+        const ColumnString::Chars & haystack_data,
        const ColumnString::Offsets & haystack_offsets,
        const ColumnString::Chars & needle_data,
        const ColumnString::Offsets & needle_offsets,
@ -234,9 +243,9 @@ struct PositionImpl
            else
            {
                /// It is assumed that the StringSearcher is not very difficult to initialize.
-                typename Impl::SearcherInSmallHaystack searcher
-                    = Impl::createSearcherInSmallHaystack(reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
-                        needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end
+                typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
+                    reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
+                    needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end

                /// searcher returns a pointer to the found substring or to the end of `haystack`.
                size_t pos = searcher.search(&haystack_data[prev_haystack_offset], &haystack_data[haystack_offsets[i] - 1])
@ -244,8 +253,10 @@ struct PositionImpl

                if (pos != haystack_size)
                {
-                    res[i] = 1 + Impl::countChars(reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
-                                     reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset + pos]));
+                    res[i] = 1
+                        + Impl::countChars(
+                                 reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
+                                 reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset + pos]));
                }
                else
                    res[i] = 0;
@ -257,7 +268,8 @@ struct PositionImpl
    }

    /// Find many substrings in one line.
-    static void constant_vector(const String & haystack,
+    static void constant_vector(
+        const String & haystack,
        const ColumnString::Chars & needle_data,
        const ColumnString::Offsets & needle_offsets,
        PaddedPODArray<UInt64> & res)
@ -281,7 +293,8 @@ struct PositionImpl
                typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
                    reinterpret_cast<const char *>(&needle_data[prev_needle_offset]), needle_offsets[i] - prev_needle_offset - 1);

-                size_t pos = searcher.search(reinterpret_cast<const UInt8 *>(haystack.data()),
+                size_t pos = searcher.search(
+                                 reinterpret_cast<const UInt8 *>(haystack.data()),
                                 reinterpret_cast<const UInt8 *>(haystack.data()) + haystack.size())
                    - reinterpret_cast<const UInt8 *>(haystack.data());

@ -298,6 +311,71 @@ struct PositionImpl
    }
 };

+template <typename Impl>
+struct MultiPositionImpl
+{
+    using ResultType = UInt64;
+
+    static void multi_constant_vector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const std::vector<String> & needles,
+        PaddedPODArray<UInt64> & res)
+    {
+        const size_t needles_size = needles.size();
+        const size_t haystack_offsets_size = haystack_offsets.size();
+        size_t k = 0;
+        const auto result = Impl::createMultiSearcherInBigHaystack(needles).search_all(haystack_data, haystack_offsets);
+        for (size_t j = 0; j < haystack_offsets_size; ++j)
+        {
+            for (size_t i = 0; i < needles_size; ++i)
+            {
+                const char * ptr = result[k];
+                if (ptr)
+                {
+                    const char * start = reinterpret_cast<const char *>(&haystack_data[j == 0 ? 0 : haystack_offsets[j - 1]]);
+                    res[k] = 1 + Impl::countChars(start, ptr);
+                }
+                else
+                    res[k] = 0;
+                ++k;
+            }
+        }
+    }
+};
+
+template <typename Impl>
+struct MultiSearchImpl
+{
+    using ResultType = UInt64;
+
+    static void multi_constant_vector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const std::vector<String> & needles,
+        PaddedPODArray<UInt64> & res)
+    {
+        const auto result = Impl::createMultiSearcherInBigHaystack(needles).search(haystack_data, haystack_offsets);
+        std::copy(result.begin(), result.end(), res.begin());
+    }
+};
+
+template <typename Impl>
+struct FirstMatchImpl
+{
+    using ResultType = UInt64;
+
+    static void multi_constant_vector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const std::vector<String> & needles,
+        PaddedPODArray<UInt64> & res)
+    {
+        const auto result = Impl::createMultiSearcherInBigHaystack(needles).search_index(haystack_data, haystack_offsets);
+        std::copy(result.begin(), result.end(), res.begin());
+    }
+};
+

 /// Is the LIKE expression reduced to finding a substring in a string?
 inline bool likePatternIsStrstr(const String & pattern, String & res)
@ -348,10 +426,8 @@ struct MatchImpl
 {
    using ResultType = UInt8;

-    static void vector_constant(const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        const std::string & pattern,
-        PaddedPODArray<UInt8> & res)
+    static void vector_constant(
+        const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray<UInt8> & res)
    {
        if (offsets.empty())
            return;
@ -473,7 +549,8 @@ struct MatchImpl

                            if (required_substring_is_prefix)
                                res[i] = revert
-                                    ^ regexp->getRE2()->Match(re2_st::StringPiece(str_data, str_size),
+                                    ^ regexp->getRE2()->Match(
+                                          re2_st::StringPiece(str_data, str_size),
                                          reinterpret_cast<const char *>(pos) - str_data,
                                          str_size,
                                          re2_st::RE2::UNANCHORED,
@ -504,13 +581,15 @@ struct MatchImpl
        res = revert ^ regexp->match(data);
    }

-    template <typename... Args> static void vector_vector(Args &&...)
+    template <typename... Args>
+    static void vector_vector(Args &&...)
    {
        throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
    }

    /// Search different needles in single haystack.
-    template <typename... Args> static void constant_vector(Args &&...)
+    template <typename... Args>
+    static void constant_vector(Args &&...)
    {
        throw Exception("Functions 'like' and 'match' don't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
    }
@ -519,7 +598,8 @@ struct MatchImpl

 struct ExtractImpl
 {
-    static void vector(const ColumnString::Chars & data,
+    static void vector(
+        const ColumnString::Chars & data,
        const ColumnString::Offsets & offsets,
        const std::string & pattern,
        ColumnString::Chars & res_data,
@ -613,16 +693,17 @@ struct ReplaceRegexpImpl

        for (const auto & it : instructions)
            if (it.first >= num_captures)
-                throw Exception("Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only "
-                        + toString(num_captures - 1)
-                        + " subpatterns",
+                throw Exception(
+                    "Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only "
+                        + toString(num_captures - 1) + " subpatterns",
                    ErrorCodes::BAD_ARGUMENTS);

        return instructions;
    }


-    static void processString(const re2_st::StringPiece & input,
+    static void processString(
+        const re2_st::StringPiece & input,
        ColumnString::Chars & res_data,
        ColumnString::Offset & res_offset,
        re2_st::RE2 & searcher,
@ -687,7 +768,8 @@ struct ReplaceRegexpImpl
    }


-    static void vector(const ColumnString::Chars & data,
+    static void vector(
+        const ColumnString::Chars & data,
        const ColumnString::Offsets & offsets,
        const std::string & needle,
        const std::string & replacement,
@ -715,7 +797,8 @@ struct ReplaceRegexpImpl
        }
    }

-    static void vector_fixed(const ColumnString::Chars & data,
+    static void vector_fixed(
+        const ColumnString::Chars & data,
        size_t n,
        const std::string & needle,
        const std::string & replacement,
@ -749,7 +832,8 @@ struct ReplaceRegexpImpl
 template <bool replace_one = false>
 struct ReplaceStringImpl
 {
-    static void vector(const ColumnString::Chars & data,
+    static void vector(
+        const ColumnString::Chars & data,
        const ColumnString::Offsets & offsets,
        const std::string & needle,
        const std::string & replacement,
@ -824,7 +908,8 @@ struct ReplaceStringImpl

    /// Note: this function converts fixed-length strings to variable-length strings
    ///       and each variable-length string should ends with zero byte.
-    static void vector_fixed(const ColumnString::Chars & data,
+    static void vector_fixed(
+        const ColumnString::Chars & data,
        size_t n,
        const std::string & needle,
        const std::string & replacement,
@ -851,7 +936,8 @@ struct ReplaceStringImpl
            const UInt8 * match = searcher.search(pos, end - pos);

 #define COPY_REST_OF_CURRENT_STRING() \
-    do { \
+    do \
+    { \
        const size_t len = begin + n * (i + 1) - pos; \
        res_data.resize(res_data.size() + len + 1); \
        memcpy(&res_data[res_offset], pos, len); \
@ -935,20 +1021,11 @@ class FunctionStringReplace : public IFunction
 {
 public:
    static constexpr auto name = Name::name;
-    static FunctionPtr create(const Context &)
-    {
-        return std::make_shared<FunctionStringReplace>();
-    }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionStringReplace>(); }

-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }

-    size_t getNumberOfArguments() const override
-    {
-        return 3;
-    }
+    size_t getNumberOfArguments() const override { return 3; }

    bool useDefaultImplementationForConstants() const override { return true; }
    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
@ -956,15 +1033,18 @@ public:
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
        if (!isStringOrFixedString(arguments[0]))
-            throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

        if (!isStringOrFixedString(arguments[1]))
-            throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
+            throw Exception(
+                "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

        if (!isStringOrFixedString(arguments[2]))
-            throw Exception("Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
+            throw Exception(
+                "Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

        return std::make_shared<DataTypeString>();
@ -1025,6 +1105,54 @@ struct NamePositionCaseInsensitiveUTF8
 {
    static constexpr auto name = "positionCaseInsensitiveUTF8";
 };
+struct NameMultiPosition
+{
+    static constexpr auto name = "multiPosition";
+};
+struct NameMultiPositionUTF8
+{
+    static constexpr auto name = "multiPositionUTF8";
+};
+struct NameMultiPositionCaseInsensitive
+{
+    static constexpr auto name = "multiPositionCaseInsensitive";
+};
+struct NameMultiPositionCaseInsensitiveUTF8
+{
+    static constexpr auto name = "multiPositionCaseInsensitiveUTF8";
+};
+struct NameMultiSearch
+{
+    static constexpr auto name = "multiSearch";
+};
+struct NameMultiSearchUTF8
+{
+    static constexpr auto name = "multiSearchUTF8";
+};
+struct NameMultiSearchCaseInsensitive
+{
+    static constexpr auto name = "multiSearchCaseInsensitive";
+};
+struct NameMultiSearchCaseInsensitiveUTF8
+{
+    static constexpr auto name = "multiSearchCaseInsensitiveUTF8";
+};
+struct NameFirstMatch
+{
+    static constexpr auto name = "firstMatch";
+};
+struct NameFirstMatchUTF8
+{
+    static constexpr auto name = "firstMatchUTF8";
+};
+struct NameFirstMatchCaseInsensitive
+{
+    static constexpr auto name = "firstMatchCaseInsensitive";
+};
+struct NameFirstMatchCaseInsensitiveUTF8
+{
+    static constexpr auto name = "firstMatchCaseInsensitiveUTF8";
+};
 struct NameMatch
 {
    static constexpr auto name = "match";
@ -1064,6 +1192,27 @@ using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<Posit
 using FunctionPositionCaseInsensitiveUTF8
    = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;

+using FunctionMultiPosition = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseSensitiveASCII>, NameMultiPosition>;
+using FunctionMultiPositionUTF8 = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseSensitiveUTF8>, NameMultiPositionUTF8>;
+using FunctionMultiPositionCaseInsensitive
+    = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseInsensitiveASCII>, NameMultiPositionCaseInsensitive>;
+using FunctionMultiPositionCaseInsensitiveUTF8
+    = FunctionsMultiStringPosition<MultiPositionImpl<PositionCaseInsensitiveUTF8>, NameMultiPositionCaseInsensitiveUTF8>;
+
+using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearch>;
+using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchUTF8>;
+using FunctionMultiSearchCaseInsensitive
+    = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchCaseInsensitive>;
+using FunctionMultiSearchCaseInsensitiveUTF8
+    = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchCaseInsensitiveUTF8>;
+
+using FunctionFirstMatch = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseSensitiveASCII>, NameFirstMatch>;
+using FunctionFirstMatchUTF8 = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseSensitiveUTF8>, NameFirstMatchUTF8>;
+using FunctionFirstMatchCaseInsensitive
+    = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseInsensitiveASCII>, NameFirstMatchCaseInsensitive>;
+using FunctionFirstMatchCaseInsensitiveUTF8
+    = FunctionsMultiStringSearch<FirstMatchImpl<PositionCaseInsensitiveUTF8>, NameFirstMatchCaseInsensitiveUTF8>;
+
 using FunctionMatch = FunctionsStringSearch<MatchImpl<false>, NameMatch>;
 using FunctionLike = FunctionsStringSearch<MatchImpl<true>, NameLike>;
 using FunctionNotLike = FunctionsStringSearch<MatchImpl<true, true>, NameNotLike>;
@ -1080,14 +1229,32 @@ void registerFunctionsStringSearch(FunctionFactory & factory)
    factory.registerFunction<FunctionReplaceAll>();
    factory.registerFunction<FunctionReplaceRegexpOne>();
    factory.registerFunction<FunctionReplaceRegexpAll>();
+
    factory.registerFunction<FunctionPosition>(FunctionFactory::CaseInsensitive);
    factory.registerFunction<FunctionPositionUTF8>();
    factory.registerFunction<FunctionPositionCaseInsensitive>();
    factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>();
+
+    factory.registerFunction<FunctionMultiPosition>();
+    factory.registerFunction<FunctionMultiPositionUTF8>();
+    factory.registerFunction<FunctionMultiPositionCaseInsensitive>();
+    factory.registerFunction<FunctionMultiPositionCaseInsensitiveUTF8>();
+
+    factory.registerFunction<FunctionMultiSearch>();
+    factory.registerFunction<FunctionMultiSearchUTF8>();
+    factory.registerFunction<FunctionMultiSearchCaseInsensitive>();
+    factory.registerFunction<FunctionMultiSearchCaseInsensitiveUTF8>();
+
+    factory.registerFunction<FunctionFirstMatch>();
+    factory.registerFunction<FunctionFirstMatchUTF8>();
+    factory.registerFunction<FunctionFirstMatchCaseInsensitive>();
+    factory.registerFunction<FunctionFirstMatchCaseInsensitiveUTF8>();
+
    factory.registerFunction<FunctionMatch>();
    factory.registerFunction<FunctionLike>();
    factory.registerFunction<FunctionNotLike>();
    factory.registerFunction<FunctionExtract>();
+
    factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive);
    factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive);
 }
--- a/dbms/src/Functions/FunctionsStringSearch.h
+++ b/dbms/src/Functions/FunctionsStringSearch.h
@ -1,17 +1,21 @@
 #pragma once

+#include <Columns/ColumnArray.h>
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <Functions/IFunction.h>
 #include <Functions/FunctionHelpers.h>
-
+#include <Functions/IFunction.h>
+#include <IO/WriteHelpers.h>
+#include <common/StringRef.h>
+#include <ext/range.h>

 namespace DB
 {
-
 /** Search and replace functions in strings:
  *
  * position(haystack, needle)     - the normal search for a substring in a string, returns the position (in bytes) of the found substring starting with 1, or 0 if no substring is found.
@ -35,12 +39,28 @@ namespace DB
  *
  * replaceRegexpOne(haystack, pattern, replacement) - replaces the pattern with the specified regexp, only the first occurrence.
  * replaceRegexpAll(haystack, pattern, replacement) - replaces the pattern with the specified type, all occurrences.
+  *
+  * multiPosition(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- find first occurences (positions) of all the const patterns inside haystack
+  * multiPositionUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  * multiPositionCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  * multiPositionCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  *
+  * multiSearch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- find any of the const patterns inside haystack and return 0 or 1
+  * multiSearchUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  * multiSearchCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  * multiSearchCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
+
+  * firstMatch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- returns the first index of the matched string or zero if nothing was found
+  * firstMatchUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  * firstMatchCaseInsensitive(haystack, [pattern_1, pattern_2, ..., pattern_n])
+  * firstMatchCaseInsensitiveUTF8(haystack, [pattern_1, pattern_2, ..., pattern_n])
  */

 namespace ErrorCodes
 {
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int ILLEGAL_COLUMN;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }

 template <typename Impl, typename Name>
@ -48,20 +68,11 @@ class FunctionsStringSearch : public IFunction
 {
 public:
    static constexpr auto name = Name::name;
-    static FunctionPtr create(const Context &)
-    {
-        return std::make_shared<FunctionsStringSearch>();
-    }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringSearch>(); }

-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }

-    size_t getNumberOfArguments() const override
-    {
-        return 2;
-    }
+    size_t getNumberOfArguments() const override { return 2; }

    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
@ -90,7 +101,8 @@ public:
        {
            ResultType res{};
            Impl::constant_constant(col_haystack_const->getValue<String>(), col_needle_const->getValue<String>(), res);
-            block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res));
+            block.getByPosition(result).column
+                = block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res));
            return;
        }

@ -103,20 +115,22 @@ public:
        const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);

        if (col_haystack_vector && col_needle_vector)
-            Impl::vector_vector(col_haystack_vector->getChars(),
+            Impl::vector_vector(
+                col_haystack_vector->getChars(),
                col_haystack_vector->getOffsets(),
                col_needle_vector->getChars(),
                col_needle_vector->getOffsets(),
                vec_res);
        else if (col_haystack_vector && col_needle_const)
-            Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue<String>(), vec_res);
+            Impl::vector_constant(
+                col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue<String>(), vec_res);
        else if (col_haystack_const && col_needle_vector)
-            Impl::constant_vector(col_haystack_const->getValue<String>(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
+            Impl::constant_vector(
+                col_haystack_const->getValue<String>(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
        else
-            throw Exception("Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and "
-                    + block.getByPosition(arguments[1]).column->getName()
-                    + " of arguments of function "
-                    + getName(),
+            throw Exception(
+                "Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and "
+                    + block.getByPosition(arguments[1]).column->getName() + " of arguments of function " + getName(),
                ErrorCodes::ILLEGAL_COLUMN);

        block.getByPosition(result).column = std::move(col_res);
@ -129,20 +143,11 @@ class FunctionsStringSearchToString : public IFunction
 {
 public:
    static constexpr auto name = Name::name;
-    static FunctionPtr create(const Context &)
-    {
-        return std::make_shared<FunctionsStringSearchToString>();
-    }
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringSearchToString>(); }

-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }

-    size_t getNumberOfArguments() const override
-    {
-        return 2;
-    }
+    size_t getNumberOfArguments() const override { return 2; }

    bool useDefaultImplementationForConstants() const override { return true; }
    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
@ -186,4 +191,160 @@ public:
    }
 };

+template <typename Impl, typename Name>
+class FunctionsMultiStringPosition : public IFunction
+{
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringPosition>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
+            throw Exception(
+                "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
+                    + ", should be at most 255.",
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        if (!isString(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
+        if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get()))
+            throw Exception(
+                "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    {
+        using ResultType = typename Impl::ResultType;
+
+        const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column;
+
+        const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
+
+        const ColumnPtr & arr_ptr = block.getByPosition(arguments[1]).column;
+        const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
+
+        if (!col_const_arr)
+            throw Exception(
+                "Illegal column " + block.getByPosition(arguments[1]).column->getName() + ". The array is not const",
+                ErrorCodes::ILLEGAL_COLUMN);
+
+        Array src_arr = col_const_arr->getValue<Array>();
+
+        std::vector<String> refs;
+        for (const auto & el : src_arr)
+        {
+            refs.push_back(el.get<String>());
+        }
+
+        const size_t column_haystack_size = column_haystack->size();
+
+        auto col_res = ColumnVector<ResultType>::create();
+        auto col_offsets = ColumnArray::ColumnOffsets::create(column_haystack_size);
+
+        auto & vec_res = col_res->getData();
+        auto & offsets_res = col_offsets->getData();
+
+        vec_res.resize(column_haystack_size * refs.size());
+
+        if (col_haystack_vector)
+            Impl::multi_constant_vector(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
+        else
+            throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN);
+
+        size_t refs_size = refs.size();
+        size_t accum = refs_size;
+
+        for (size_t i = 0; i < column_haystack_size; ++i, accum += refs_size)
+        {
+            offsets_res[i] = accum;
+        }
+
+        block.getByPosition(result).column = ColumnArray::create(std::move(col_res), std::move(col_offsets));
+    }
+};
+
+template <typename Impl, typename Name>
+class FunctionsMultiStringSearch : public IFunction
+{
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringSearch>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
+            throw Exception(
+                "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
+                    + ", should be at most 255.",
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        if (!isString(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
+        if (!array_type || !checkAndGetDataType<DataTypeString>(array_type->getNestedType().get()))
+            throw Exception(
+                "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+
+        return std::make_shared<DataTypeNumber<typename Impl::ResultType>>();
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
+    {
+        using ResultType = typename Impl::ResultType;
+
+        const ColumnPtr & column_haystack = block.getByPosition(arguments[0]).column;
+
+        const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
+
+        const ColumnPtr & arr_ptr = block.getByPosition(arguments[1]).column;
+        const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
+
+        if (!col_const_arr)
+            throw Exception(
+                "Illegal column " + block.getByPosition(arguments[1]).column->getName() + ". The array is not const",
+                ErrorCodes::ILLEGAL_COLUMN);
+
+        Array src_arr = col_const_arr->getValue<Array>();
+
+        std::vector<String> refs;
+        refs.reserve(src_arr.size());
+
+        for (const auto & el : src_arr)
+            refs.emplace_back(el.get<String>());
+
+        const size_t column_haystack_size = column_haystack->size();
+
+        auto col_res = ColumnVector<ResultType>::create();
+
+        auto & vec_res = col_res->getData();
+
+        vec_res.resize(column_haystack_size);
+
+        if (col_haystack_vector)
+            Impl::multi_constant_vector(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
+        else
+            throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName(), ErrorCodes::ILLEGAL_COLUMN);
+
+        block.getByPosition(result).column = std::move(col_res);
+    }
+};
+
 }
--- a/dbms/tests/performance/string_search/constant_column_comparison.xml
+++ b/dbms/tests/performance/string_search/constant_column_comparison.xml
@ -0,0 +1,46 @@
+<test>
+    <name>Constant column string search</name>
+
+    <tags>
+        <tag>search</tag>
+    </tags>
+
+    <preconditions>
+        <table_exists>hits_100m_single</table_exists>
+    </preconditions>
+
+    <type>loop</type>
+
+    <stop_conditions>
+        <all_of>
+            <iterations>5</iterations>
+            <min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
+        </all_of>
+        <any_of>
+            <iterations>50</iterations>
+            <total_time_ms>60000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+    <query><![CDATA[select count(position(URL, 'yandex')), count(position(URL, 'google')) FROM hits_100m_single]]></query>
+    <query><![CDATA[select count(multiPosition(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
+    <query><![CDATA[select count(match(URL, 'yandex|google')) FROM hits_100m_single]]></query>
+
+    <query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'yahoo')), sum(match(URL, 'pikabu')) FROM hits_100m_single]]></query>
+    <query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'yahoo', 'pikabu'])) from hits_100m_single]]></query>
+    <query><![CDATA[select sum(match(URL, 'yandex|google|yahoo|pikabu')) FROM hits_100m_single]]></query>
+
+    <query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'http')) FROM hits_100m_single]]></query>
+    <query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'http'])) from hits_100m_single]]></query>
+    <query><![CDATA[select sum(match(URL, 'yandex|google|http')) FROM hits_100m_single]]></query>
+
+    <query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'facebook')), sum(match(URL, 'wikipedia')), sum(match(URL, 'reddit')) FROM hits_100m_single]]></query>
+    <query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'facebook', 'wikipedia', 'reddit'])) from hits_100m_single]]></query>
+    <query><![CDATA[select sum(match(URL, 'yandex|google|facebook|wikipedia|reddit')) FROM hits_100m_single]]></query>
+
+    <query><![CDATA[select sum(firstMatch(URL, ['yandex', 'google', 'http', 'facebook', 'google'])) from hits_100m_single]]></query>
+
+    <main_metric>
+        <min_time/>
+    </main_metric>
+</test>
--- a/dbms/tests/queries/0_stateless/00233_position_function_family.reference
+++ b/dbms/tests/queries/0_stateless/00233_position_function_family.reference
--- a/dbms/tests/queries/0_stateless/00233_position_function_family.sql
+++ b/dbms/tests/queries/0_stateless/00233_position_function_family.sql