#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #if USE_HYPERSCAN # if __has_include() # include # else # include # endif #endif namespace ProfileEvents { extern const Event RegexpCreated; } namespace DB { namespace ErrorCodes { extern const int CANNOT_ALLOCATE_MEMORY; extern const int LOGICAL_ERROR; } namespace Regexps { using Regexp = OptimizedRegularExpressionImpl; using Pool = ObjectPoolMap; template inline Regexp createRegexp(const std::string & pattern, int flags) { return {pattern, flags}; } template <> inline Regexp createRegexp(const std::string & pattern, int flags) { return {likePatternToRegexp(pattern), flags}; } template inline Pool::Pointer get(const std::string & pattern) { /// C++11 has thread-safe function-local statics on most modern compilers. static Pool known_regexps; /// Different variables for different pattern parameters. return known_regexps.get(pattern, [&pattern] { int flags = OptimizedRegularExpression::RE_DOT_NL; if (no_capture) flags |= OptimizedRegularExpression::RE_NO_CAPTURE; ProfileEvents::increment(ProfileEvents::RegexpCreated); return new Regexp{createRegexp(pattern, flags)}; }); } } #if USE_HYPERSCAN namespace MultiRegexps { template struct HyperscanDeleter { template void operator()(T * ptr) const { deleter(ptr); } }; using CompilerError = std::unique_ptr>; using ScratchPtr = std::unique_ptr>; using DataBasePtr = std::unique_ptr>; /// Database is thread safe across multiple threads and Scratch is not but we can copy it whenever we use it in the searcher class Regexps { public: Regexps(hs_database_t * db_, hs_scratch_t * scratch_) : db{db_}, scratch{scratch_} {} hs_database_t * getDB() const { return db.get(); }; hs_scratch_t * getScratch() const { return scratch.get(); }; private: DataBasePtr db; ScratchPtr scratch; }; struct Pool { /// Mutex for finding in map std::mutex mutex; /// Patterns + possible edit_distance to database and scratch std::map, std::optional>, Regexps> storage; }; template inline Regexps constructRegexps(const std::vector & str_patterns, std::optional edit_distance) { (void)edit_distance; /// Common pointers std::vector ptrns; std::vector flags; /// Pointer for external edit distance compilation std::vector ext_exprs; std::vector ext_exprs_ptrs; ptrns.reserve(str_patterns.size()); flags.reserve(str_patterns.size()); if constexpr (CompileForEditDistance) { ext_exprs.reserve(str_patterns.size()); ext_exprs_ptrs.reserve(str_patterns.size()); } for (const StringRef ref : str_patterns) { ptrns.push_back(ref.data); flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH); if constexpr (CompileForEditDistance) { ext_exprs.emplace_back(); ext_exprs.back().flags = HS_EXT_FLAG_EDIT_DISTANCE; ext_exprs.back().edit_distance = edit_distance.value(); ext_exprs_ptrs.push_back(&ext_exprs.back()); } } hs_database_t * db = nullptr; hs_compile_error_t * compile_error; std::unique_ptr ids; if constexpr (FindAnyIndex) { ids.reset(new unsigned int[ptrns.size()]); for (size_t i = 0; i < ptrns.size(); ++i) ids[i] = i + 1; } hs_error_t err; if constexpr (!CompileForEditDistance) err = hs_compile_multi( ptrns.data(), flags.data(), ids.get(), ptrns.size(), HS_MODE_BLOCK, nullptr, &db, &compile_error); else err = hs_compile_ext_multi( ptrns.data(), flags.data(), ids.get(), ext_exprs_ptrs.data(), ptrns.size(), HS_MODE_BLOCK, nullptr, &db, &compile_error); if (err != HS_SUCCESS) { CompilerError error(compile_error); if (error->expression < 0) throw Exception(String(error->message), ErrorCodes::LOGICAL_ERROR); else throw Exception( "Pattern '" + str_patterns[error->expression] + "' failed with error '" + String(error->message), ErrorCodes::LOGICAL_ERROR); } ProfileEvents::increment(ProfileEvents::RegexpCreated); hs_scratch_t * scratch = nullptr; err = hs_alloc_scratch(db, &scratch); if (err != HS_SUCCESS) throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); return Regexps{db, scratch}; } /// If CompileForEditDistance is False, edit_distance must be nullopt template inline Regexps * get(const std::vector & patterns, std::optional edit_distance) { /// C++11 has thread-safe function-local statics on most modern compilers. static Pool known_regexps; /// Different variables for different pattern parameters. std::vector str_patterns; str_patterns.reserve(patterns.size()); for (const StringRef & ref : patterns) str_patterns.push_back(ref.toString()); std::unique_lock lock(known_regexps.mutex); auto it = known_regexps.storage.find({str_patterns, edit_distance}); if (known_regexps.storage.end() == it) it = known_regexps.storage.emplace( std::pair{str_patterns, edit_distance}, constructRegexps(str_patterns, edit_distance)).first; lock.unlock(); return &it->second; } } #endif // USE_HYPERSCAN }