diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index 7d96feba1f3..68f5b86877e 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -1,3 +1,4 @@
+#include <limits>
 #include <Common/Exception.h>
 #include <Common/PODArray.h>
 #include <Common/OptimizedRegularExpression.h>
@@ -14,13 +15,40 @@ namespace DB
     }
 }
 
+namespace
+{
 
-template <bool thread_safe>
-void OptimizedRegularExpressionImpl<thread_safe>::analyze(
+struct Literal
+{
+    std::string literal;
+    bool prefix; /// this literal string is the prefix of the whole string.
+    bool suffix; /// this literal string is the suffix of the whole string.
+    void clear()
+    {
+        literal.clear();
+        prefix = false;
+        suffix = false;
+    }
+};
+
+using Literals = std::vector<Literal>;
+
+size_t shortest_literal_length(const Literals & literals)
+{
+    if (literals.empty()) return 0;
+    size_t shortest = std::numeric_limits<size_t>::max();
+    for (const auto & lit : literals)
+        if (shortest > lit.literal.size())
+            shortest = lit.literal.size();
+    return shortest;
+}
+
+const char * analyzeImpl(
     std::string_view regexp,
-    std::string & required_substring,
+    const char * pos,
+    Literal & required_substring,
     bool & is_trivial,
-    bool & required_substring_is_prefix)
+    Literals & global_alternatives)
 {
     /** The expression is trivial if all the metacharacters in it are escaped.
       * The non-alternative string is
@@ -30,12 +58,11 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
       *  and also avoid substrings of the form `http://` or `www` and some other
       *   (this is the hack for typical use case in web analytics applications).
       */
-    const char * begin = regexp.data();
-    const char * pos = begin;
+    const char * begin = pos;
     const char * end = regexp.data() + regexp.size();
+    bool is_first_call = begin == regexp.data();
     int depth = 0;
     is_trivial = true;
-    required_substring_is_prefix = false;
     required_substring.clear();
     bool has_alternative_on_depth_0 = false;
     bool has_case_insensitive_flag = false;
@@ -47,6 +74,80 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
     Substrings trivial_substrings(1);
     Substring * last_substring = &trivial_substrings.back();
 
+    Literals cur_alternatives;
+
+    auto finish_cur_alternatives = [&]()
+    {
+        if (cur_alternatives.empty())
+            return;
+
+        if (global_alternatives.empty())
+        {
+            global_alternatives = cur_alternatives;
+            cur_alternatives.clear();
+            return;
+        }
+        /// that means current alternatives have better quality.
+        if (shortest_literal_length(global_alternatives) < shortest_literal_length(cur_alternatives))
+        {
+            global_alternatives.clear();
+            global_alternatives = cur_alternatives;
+        }
+        cur_alternatives.clear();
+    };
+
+    auto finish_non_trivial_char = [&](bool create_new_substr = true)
+    {
+        if (depth != 0)
+            return;
+
+        for (auto & alter : cur_alternatives)
+        {
+            if (alter.suffix)
+            {
+                alter.literal += last_substring->first;
+            }
+        }
+
+        finish_cur_alternatives();
+
+        if (!last_substring->first.empty() && create_new_substr)
+        {
+            trivial_substrings.resize(trivial_substrings.size() + 1);
+            last_substring = &trivial_substrings.back();
+        }
+    };
+
+    /// Resolve the string or alters in a group (xxxxx)
+    auto finish_group = [&](Literal & group_required_string, Literals & group_alternatives)
+    {
+        for (auto & alter : group_alternatives)
+        {
+            if (alter.prefix)
+            {
+                alter.literal = last_substring->first + alter.literal;
+            }
+        }
+
+        if (group_required_string.prefix)
+            last_substring->first += group_required_string.literal;
+        else
+        {
+            finish_non_trivial_char();
+            last_substring->first = group_required_string.literal;
+        }
+        /// if we can still append, no need to finish it. e.g. abc(de)fg should capture abcdefg
+        if (!last_substring->first.empty() && !group_required_string.suffix)
+        {
+            trivial_substrings.resize(trivial_substrings.size() + 1);
+            last_substring = &trivial_substrings.back();
+        }
+
+        /// assign group alters to current alters.
+        finish_cur_alternatives();
+        cur_alternatives = std::move(group_alternatives);
+    };
+
     bool in_curly_braces = false;
     bool in_square_braces = false;
 
@@ -73,25 +174,19 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                     case '$':
                     case '.':
                     case '[':
+                    case ']':
                     case '?':
                     case '*':
                     case '+':
+                    case '-':
                     case '{':
-                        if (depth == 0 && !in_curly_braces && !in_square_braces)
-                        {
-                            if (last_substring->first.empty())
-                                last_substring->second = pos - begin;
-                            last_substring->first.push_back(*pos);
-                        }
-                        break;
+                    case '}':
+                    case '/':
+                        goto ordinary;
                     default:
                         /// all other escape sequences are not supported
                         is_trivial = false;
-                        if (!last_substring->first.empty())
-                        {
-                            trivial_substrings.resize(trivial_substrings.size() + 1);
-                            last_substring = &trivial_substrings.back();
-                        }
+                        finish_non_trivial_char();
                         break;
                 }
 
@@ -100,28 +195,19 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
             }
 
             case '|':
-                if (depth == 0)
-                    has_alternative_on_depth_0 = true;
                 is_trivial = false;
-                if (!in_square_braces && !last_substring->first.empty())
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
                 ++pos;
+                if (depth == 0)
+                {
+                    has_alternative_on_depth_0 = true;
+                    goto finish;
+                }
                 break;
 
             case '(':
+                is_trivial = false;
                 if (!in_square_braces)
                 {
-                    ++depth;
-                    is_trivial = false;
-                    if (!last_substring->first.empty())
-                    {
-                        trivial_substrings.resize(trivial_substrings.size() + 1);
-                        last_substring = &trivial_substrings.back();
-                    }
-
                     /// Check for case-insensitive flag.
                     if (pos + 1 < end && pos[1] == '?')
                     {
@@ -143,6 +229,28 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                                 break;
                         }
                     }
+                    if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
+                    {
+                        pos += 2;
+                    }
+                    Literal group_required_substr;
+                    bool group_is_trival = true;
+                    Literals group_alters;
+                    pos = analyzeImpl(regexp, pos + 1, group_required_substr, group_is_trival, group_alters);
+                    /// pos should be ')', if not, then it is not a valid regular expression
+                    if (pos == end)
+                        return pos;
+
+                    /// For ()? or ()* or (){0,1}, we can just ignore the whole group.
+                    if ((pos + 1 < end && (pos[1] == '?' || pos[1] == '*')) ||
+                        (pos + 2 < end && pos[1] == '{' && pos[2] == '0'))
+                    {
+                        finish_non_trivial_char();
+                    }
+                    else
+                    {
+                        finish_group(group_required_substr, group_alters);
+                    }
                 }
                 ++pos;
                 break;
@@ -151,11 +259,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                 in_square_braces = true;
                 ++depth;
                 is_trivial = false;
-                if (!last_substring->first.empty())
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
+                finish_non_trivial_char();
                 ++pos;
                 break;
 
@@ -163,38 +267,25 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                 if (!in_square_braces)
                     goto ordinary;
 
-                in_square_braces = false;
                 --depth;
+                if (depth == 0)
+                    in_square_braces = false;
                 is_trivial = false;
-                if (!last_substring->first.empty())
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
+                finish_non_trivial_char();
                 ++pos;
                 break;
 
             case ')':
                 if (!in_square_braces)
                 {
-                    --depth;
-                    is_trivial = false;
-                    if (!last_substring->first.empty())
-                    {
-                        trivial_substrings.resize(trivial_substrings.size() + 1);
-                        last_substring = &trivial_substrings.back();
-                    }
+                    goto finish;
                 }
                 ++pos;
                 break;
 
             case '^': case '$': case '.': case '+':
                 is_trivial = false;
-                if (!last_substring->first.empty() && !in_square_braces)
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
+                finish_non_trivial_char();
                 ++pos;
                 break;
 
@@ -206,12 +297,11 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                 [[fallthrough]];
             case '*':
                 is_trivial = false;
-                if (!last_substring->first.empty() && !in_square_braces)
+                if (depth == 0 && !last_substring->first.empty() && !in_square_braces)
                 {
                     last_substring->first.resize(last_substring->first.size() - 1);
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
                 }
+                finish_non_trivial_char();
                 ++pos;
                 break;
 
@@ -236,13 +326,15 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                 break;
         }
     }
+finish:
 
-    if (last_substring && last_substring->first.empty())
-        trivial_substrings.pop_back();
+    finish_non_trivial_char(false);
 
     if (!is_trivial)
     {
-        if (!has_alternative_on_depth_0 && !has_case_insensitive_flag)
+        /// we calculate required substring even though has_alternative_on_depth_0.
+        /// we will clear the required substring after putting it to alternatives.
+        if (!has_case_insensitive_flag)
         {
             /// We choose the non-alternative substring of the maximum length for first search.
 
@@ -262,19 +354,45 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                 }
             }
 
-            if (max_length >= MIN_LENGTH_FOR_STRSTR)
+            if (max_length >= MIN_LENGTH_FOR_STRSTR || (!is_first_call && max_length > 0))
             {
-                required_substring = candidate_it->first;
-                required_substring_is_prefix = candidate_it->second == 0;
+                required_substring.literal = candidate_it->first;
+                required_substring.prefix = candidate_it->second == 0;
+                required_substring.suffix = candidate_it + 1 == trivial_substrings.end();
             }
         }
     }
     else if (!trivial_substrings.empty())
     {
-        required_substring = trivial_substrings.front().first;
-        required_substring_is_prefix = trivial_substrings.front().second == 0;
+        required_substring.literal = trivial_substrings.front().first;
+        required_substring.prefix = trivial_substrings.front().second == 0;
+        required_substring.suffix = true;
     }
 
+    /// if it is xxx|xxx|xxx, we should call the next xxx|xxx recursively and collect the result.
+    if (has_alternative_on_depth_0)
+    {
+        /// compare the quality of required substring and alternatives and choose the better one.
+        if (shortest_literal_length(global_alternatives) < required_substring.literal.size())
+            global_alternatives = {required_substring};
+        Literals next_alternatives;
+        /// this two vals are useless, xxx|xxx cannot be trivial nor prefix.
+        bool next_is_trivial = true;
+        pos = analyzeImpl(regexp, pos, required_substring, next_is_trivial, next_alternatives);
+        /// For xxx|xxx|xxx, we only conbine the alternatives and return a empty required_substring.
+        if (next_alternatives.empty() || shortest_literal_length(next_alternatives) < required_substring.literal.size())
+        {
+            global_alternatives.push_back(required_substring);
+        }
+        else
+        {
+            global_alternatives.insert(global_alternatives.end(), next_alternatives.begin(), next_alternatives.end());
+        }
+        required_substring.clear();
+    }
+
+    return pos;
+
 /*    std::cerr
         << "regexp: " << regexp
         << ", is_trivial: " << is_trivial
@@ -282,12 +400,31 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
         << ", required_substring_is_prefix: " << required_substring_is_prefix
         << std::endl;*/
 }
+}
 
+template <bool thread_safe>
+void OptimizedRegularExpressionImpl<thread_safe>::analyze(
+        std::string_view regexp_,
+        std::string & required_substring,
+        bool & is_trivial,
+        bool & required_substring_is_prefix,
+        std::vector<std::string> & alternatives)
+{
+    Literals alternative_literals;
+    Literal required_literal;
+    analyzeImpl(regexp_, regexp_.data(), required_literal, is_trivial, alternative_literals);
+    required_substring = std::move(required_literal.literal);
+    required_substring_is_prefix = required_literal.prefix;
+    for (auto & lit : alternative_literals)
+        alternatives.push_back(std::move(lit.literal));
+}
 
 template <bool thread_safe>
 OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
 {
-    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
+    std::vector<std::string> alternativesDummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
+    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternativesDummy);
+
 
     /// Just three following options are supported
     if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h
index d8ed1e205c8..f6b59f0a465 100644
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@@ -95,6 +95,15 @@ public:
         out_required_substring_is_prefix = required_substring_is_prefix;
     }
 
+    /// analyze function will extract the longest string literal or multiple alternative string literals from regexp for pre-checking if
+    /// a string contains the string literal(s). If not, we can tell this string can never match the regexp.
+    static void analyze(
+        std::string_view regexp_,
+        std::string & required_substring,
+        bool & is_trivial,
+        bool & required_substring_is_prefix,
+        std::vector<std::string> & alternatives);
+
 private:
     bool is_trivial;
     bool required_substring_is_prefix;
@@ -104,8 +113,6 @@ private:
     std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
     std::unique_ptr<RegexType> re2;
     unsigned number_of_subpatterns;
-
-    static void analyze(std::string_view regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
 };
 
 using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
diff --git a/src/Common/tests/gtest_optimize_re.cpp b/src/Common/tests/gtest_optimize_re.cpp
new file mode 100644
index 00000000000..556700f1fcc
--- /dev/null
+++ b/src/Common/tests/gtest_optimize_re.cpp
@@ -0,0 +1,46 @@
+#include <gtest/gtest.h>
+
+#include <Common/OptimizedRegularExpression.h>
+
+TEST(OptimizeRE, analyze)
+{
+    auto test_f = [](const std::string & regexp, const std::string & answer, std::vector<std::string> expect_alternatives = {}, bool trival_expected = false)
+    {
+        std::string required;
+        bool is_trivial;
+        bool is_prefix;
+        std::vector<std::string> alternatives;
+        OptimizedRegularExpression::analyze(regexp, required, is_trivial, is_prefix, alternatives);
+        std::cerr << regexp << std::endl;
+        EXPECT_EQ(required, answer);
+        EXPECT_EQ(alternatives, expect_alternatives);
+        EXPECT_EQ(is_trivial, trival_expected);
+    };
+    test_f("abc", "abc", {}, true);
+    test_f("c([^k]*)de", "");
+    test_f("abc(de)fg", "abcdefg");
+    test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"});
+    test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"});
+    test_f("abc|fgk|xyz", "", {"abc","fgk", "xyz"});
+    test_f("(abc)", "abc");
+    test_f("(abc|fgk)", "", {"abc","fgk"});
+    test_f("(abc|fgk)(e|f|zkh|)", "", {"abc","fgk"});
+    test_f("abc(abc|fg)xyzz", "xyzz", {"abcabcxyzz","abcfgxyzz"});
+    test_f("abc[k]xyzz", "xyzz");
+    test_f("(abc[k]xyzz)", "xyzz");
+    test_f("abc((de)fg(hi))jk", "abcdefghijk");
+    test_f("abc((?:de)fg(?:hi))jk", "abcdefghijk");
+    test_f("abc((de)fghi+zzz)jk", "abcdefghi");
+    test_f("abc((de)fg(hi))?jk", "abc");
+    test_f("abc((de)fghi?zzz)jk", "abcdefgh");
+    test_f("abc(*cd)jk", "cdjk");
+    test_f(R"(abc(de|xyz|(\{xx\}))fg)", "abc", {"abcdefg", "abcxyzfg", "abc{xx}fg"});
+    test_f("abc(abc|fg)?xyzz", "xyzz");
+    test_f("abc(abc|fg){0,1}xyzz", "xyzz");
+    test_f("abc(abc|fg)xyzz|bcdd?k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bc"});
+    test_f("abc(abc|fg)xyzz|bc(dd?x|kk?y|(f))k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bck", "bcfk", "bc"});
+    test_f("((?:abc|efg|xyz)/[a-zA-Z0-9]{1-50})(/?[^ ]*|)", "", {"abc/", "efg/", "xyz/"});
+    test_f(R"([Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Daumoa(?:-feedfetcher|)|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|))", "", {"pider-", "bingbot", "Yeti-", "Yeti", "Catchpoint bot", "Catchpoint", "harlotte", "Daumoa-feedfetcher", "Daumoa", "-Googlebot", "Googlebot"});
+    test_f("abc|(:?xx|yy|zz|x?)def", "", {"abc", "def"});
+    test_f("abc|(:?xx|yy|zz|x?){1,2}def", "", {"abc", "def"});
+}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ca89106dc08..9fa2ba0d32f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -934,7 +934,7 @@ class IColumn;
     M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
     \
     M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
-    M(Bool, regexp_dict_allow_hyperscan, false, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
+    M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
 
 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp
index caba2a52a51..c072ba78d46 100644
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@@ -9,9 +9,10 @@
 #include <Poco/Logger.h>
 #include <Poco/RegularExpression.h>
 
-#include "Common/Exception.h"
 #include <Common/ArenaUtils.h>
+#include <Common/Exception.h>
 #include <Common/logger_useful.h>
+#include <Common/OptimizedRegularExpression.h>
 #include <Core/ColumnsWithTypeAndName.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -34,6 +35,7 @@
 
 #if USE_VECTORSCAN
 #    include <hs.h>
+#    include <hs_compile.h>
 #endif
 
 namespace DB
@@ -46,6 +48,7 @@ namespace ErrorCodes
     extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
     extern const int UNSUPPORTED_METHOD;
     extern const int INCORRECT_DICTIONARY_DEFINITION;
+    extern const int LOGICAL_ERROR;
 }
 
 const std::string kRegExp = "regexp";
@@ -172,10 +175,6 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
     auto keys_column = block.getByName(kKeys).column;
     auto values_column = block.getByName(kValues).column;
 
-#ifdef USE_VECTORSCAN
-    SlowWithHyperscanChecker checker;
-#endif
-
     size_t size = block.rows();
     for (size_t i = 0; i < size; i++)
     {
@@ -219,12 +218,36 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
             }
         }
         regex_nodes.emplace(id, node);
+
 #if USE_VECTORSCAN
-        if (use_vectorscan && !checker.isSlow(regex))
+        String required_substring;
+        bool is_trivial, required_substring_is_prefix;
+        std::vector<std::string> alternatives;
+
+        if (use_vectorscan)
+            OptimizedRegularExpression::analyze(regex, required_substring, is_trivial, required_substring_is_prefix, alternatives);
+
+        for (auto & alter : alternatives)
         {
-            simple_regexps.push_back(regex);
+            if (alter.size() < 3)
+            {
+                alternatives.clear();
+                break;
+            }
+        }
+        if (!required_substring.empty())
+        {
+            simple_regexps.push_back(required_substring);
             regexp_ids.push_back(id);
         }
+        else if (!alternatives.empty())
+        {
+            for (auto & alternative : alternatives)
+            {
+                simple_regexps.push_back(alternative);
+                regexp_ids.push_back(id);
+            }
+        }
         else
 #endif
             complex_regexp_nodes.push_back(node);
@@ -284,20 +307,50 @@ void RegExpTreeDictionary::loadData()
             use_vectorscan = false;
         if (!use_vectorscan)
             return;
-        #if USE_VECTORSCAN
-        try
+
+#if USE_VECTORSCAN
+        std::vector<const char *> patterns;
+        std::vector<unsigned int> flags;
+        std::vector<size_t> lengths;
+
+        for (const std::string & simple_regexp : simple_regexps)
         {
-            std::vector<std::string_view> regexps_views(simple_regexps.begin(), simple_regexps.end());
-            hyperscan_regex = MultiRegexps::getOrSet<true, false>(regexps_views, std::nullopt);
-            hyperscan_regex->get();
+            patterns.push_back(simple_regexp.data());
+            lengths.push_back(simple_regexp.size());
+            flags.push_back(HS_FLAG_SINGLEMATCH);
         }
-        catch (Exception & e)
+
+        hs_database_t * db = nullptr;
+        hs_compile_error_t * compile_error;
+
+        std::unique_ptr<unsigned int[]> ids;
+        ids.reset(new unsigned int[patterns.size()]);
+        for (size_t i = 0; i < patterns.size(); i++)
+            ids[i] = static_cast<unsigned>(i+1);
+
+        hs_error_t err = hs_compile_lit_multi(patterns.data(), flags.data(), ids.get(), lengths.data(), static_cast<unsigned>(patterns.size()), HS_MODE_BLOCK, nullptr, &db, &compile_error);
+        origin_db = (db);
+        if (err != HS_SUCCESS)
         {
-            /// Some compile errors will be thrown as LOGICAL ERROR and cause crash, e.g. empty expression or expressions are too large.
-            /// We catch the error here and rethrow again.
-            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Error occurs when compiling regular expressions, reason: {}", e.message());
+            /// CompilerError is a unique_ptr, so correct memory free after the exception is thrown.
+            MultiRegexps::CompilerErrorPtr error(compile_error);
+
+            if (error->expression < 0)
+                throw Exception::createRuntime(ErrorCodes::LOGICAL_ERROR, String(error->message));
+            else
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", patterns[error->expression], String(error->message));
         }
-        #endif
+
+        /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch
+        /// function which is faster than allocating scratch space each time in each thread.
+        hs_scratch_t * scratch = nullptr;
+        err = hs_alloc_scratch(db, &scratch);
+        origin_scratch.reset(scratch);
+        /// If not HS_SUCCESS, it is guaranteed that the memory would not be allocated for scratch.
+        if (err != HS_SUCCESS)
+            throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not allocate scratch space for vectorscan");
+#endif
+
     }
     else
     {
@@ -396,47 +449,70 @@ bool RegExpTreeDictionary::setAttributes(
     return attributes_to_set.size() == attributes.size();
 }
 
-namespace
+/// a temp struct to store all the matched result.
+struct MatchContext
 {
-    struct MatchContext
+    std::set<UInt64> matched_idx_set;
+    std::vector<std::pair<UInt64, UInt64>> matched_idx_sorted_list;
+
+    const std::vector<UInt64> & regexp_ids ;
+    const std::unordered_map<UInt64, UInt64> & topology_order;
+    const char * data;
+    size_t length;
+    const std::map<UInt64, RegExpTreeDictionary::RegexTreeNodePtr> & regex_nodes;
+
+    size_t pre_match_counter = 0;
+    size_t match_counter = 0;
+
+    MatchContext(
+        const std::vector<UInt64> & regexp_ids_,
+        const std::unordered_map<UInt64, UInt64> & topology_order_,
+        const char * data_, size_t length_,
+        const std::map<UInt64, RegExpTreeDictionary::RegexTreeNodePtr> & regex_nodes_
+    )
+    : regexp_ids(regexp_ids_),
+        topology_order(topology_order_),
+        data(data_),
+        length(length_),
+        regex_nodes(regex_nodes_)
+    {}
+
+    [[maybe_unused]]
+    void insertIdx(unsigned int idx)
     {
-        std::set<UInt64> matched_idx_set;
-        std::vector<std::pair<UInt64, UInt64>> matched_idx_sorted_list;
-
-        const std::vector<UInt64> & regexp_ids ;
-        const std::unordered_map<UInt64, UInt64> & topology_order;
-
-        MatchContext(const std::vector<UInt64> & regexp_ids_, const std::unordered_map<UInt64, UInt64> & topology_order_)
-            : regexp_ids(regexp_ids_), topology_order(topology_order_) {}
-
-        [[maybe_unused]]
-        void insertIdx(unsigned int idx)
+        UInt64 node_id = regexp_ids[idx-1];
+        pre_match_counter++;
+        if (!regex_nodes.at(node_id)->match(data, length))
         {
-            UInt64 node_id = regexp_ids[idx-1];
-            UInt64 topological_order = topology_order.at(node_id);
-            matched_idx_set.emplace(node_id);
-            matched_idx_sorted_list.push_back(std::make_pair(topological_order, node_id));
+            return;
         }
+        match_counter++;
+        matched_idx_set.emplace(node_id);
 
-        void insertNodeID(UInt64 id)
-        {
-            UInt64 topological_order = topology_order.at(id);
-            matched_idx_set.emplace(id);
-            matched_idx_sorted_list.push_back(std::make_pair(topological_order, id));
-        }
+        UInt64 topological_order = topology_order.at(node_id);
+        matched_idx_sorted_list.push_back(std::make_pair(topological_order, node_id));
+    }
 
-        /// Sort by topological order, which indicates the matching priorities.
-        void sort()
-        {
-            std::sort(matched_idx_sorted_list.begin(), matched_idx_sorted_list.end());
-        }
+    [[maybe_unused]]
+    void insertNodeID(UInt64 id)
+    {
+        matched_idx_set.emplace(id);
 
-        bool contains(UInt64 idx) const
-        {
-            return matched_idx_set.contains(idx);
-        }
-    };
-}
+        UInt64 topological_order = topology_order.at(id);
+        matched_idx_sorted_list.push_back(std::make_pair(topological_order, id));
+    }
+
+    /// Sort by topological order, which indicates the matching priorities.
+    void sort()
+    {
+        std::sort(matched_idx_sorted_list.begin(), matched_idx_sorted_list.end());
+    }
+
+    bool contains(UInt64 idx) const
+    {
+        return matched_idx_set.contains(idx);
+    }
+};
 
 std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
     const ColumnString::Chars & keys_data,
@@ -449,7 +525,7 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
     hs_scratch_t * scratch = nullptr;
     if (use_vectorscan)
     {
-        hs_error_t err = hs_clone_scratch(hyperscan_regex->get()->getScratch(), &scratch);
+        hs_error_t err = hs_clone_scratch(origin_scratch.get(), &scratch);
 
         if (err != HS_SUCCESS)
         {
@@ -476,11 +552,14 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
         auto key_offset = keys_offsets[key_idx];
         UInt64 length = key_offset - offset - 1;
 
-        MatchContext match_result(regexp_ids, topology_order);
+        const char * begin = reinterpret_cast<const char *>(keys_data.data()) + offset;
+
+        MatchContext match_result(regexp_ids, topology_order, begin, length, regex_nodes);
 
 #if USE_VECTORSCAN
         if (use_vectorscan)
         {
+            /// pre-select all the possible matches
             auto on_match = [](unsigned int id,
                             unsigned long long /* from */, // NOLINT
                             unsigned long long /* to */, // NOLINT
@@ -490,8 +569,9 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
                 static_cast<MatchContext *>(context)->insertIdx(id);
                 return 0;
             };
+
             hs_error_t err = hs_scan(
-                hyperscan_regex->get()->getDB(),
+                origin_db,
                 reinterpret_cast<const char *>(keys_data.data()) + offset,
                 static_cast<unsigned>(length),
                 0,
@@ -501,6 +581,7 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
 
             if (err != HS_SUCCESS)
                 throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Failed to scan data with vectorscan");
+
         }
 #endif
 
diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h
index 32206f25429..17a0c6bbef3 100644
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@@ -33,6 +33,7 @@ namespace ErrorCodes
 
 class RegExpTreeDictionary : public IDictionary
 {
+    friend struct MatchContext;
 public:
     struct Configuration
     {
@@ -162,6 +163,8 @@ private:
     std::unordered_map<UInt64, UInt64> topology_order;
     #if USE_VECTORSCAN
     MultiRegexps::DeferredConstructedRegexpsPtr hyperscan_regex;
+    MultiRegexps::ScratchPtr origin_scratch;
+    hs_database_t* origin_db;
     #endif
 
     Poco::Logger * logger;