2017-03-10 17:52:36 +00:00
|
|
|
#pragma once
|
2018-08-30 18:40:46 +00:00
|
|
|
|
2019-03-23 19:40:16 +00:00
|
|
|
#include <Functions/likePatternToRegexp.h>
|
2018-08-30 18:40:46 +00:00
|
|
|
#include <Common/ObjectPool.h>
|
2019-03-23 19:40:16 +00:00
|
|
|
#include <Common/OptimizedRegularExpression.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/ProfileEvents.h>
|
2019-03-23 19:40:16 +00:00
|
|
|
#include <common/StringRef.h>
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2017-03-11 00:27:59 +00:00
|
|
|
|
2019-03-23 19:40:16 +00:00
|
|
|
#ifdef __SSSE3__
|
|
|
|
# include <hs.h>
|
|
|
|
#endif
|
2017-03-10 17:52:36 +00:00
|
|
|
|
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
2019-03-23 19:40:16 +00:00
|
|
|
extern const Event RegexpCreated;
|
2017-03-10 17:52:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-01-10 00:04:08 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2019-03-23 19:40:16 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_ALLOCATE_MEMORY;
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
2017-03-10 17:52:36 +00:00
|
|
|
|
|
|
|
namespace Regexps
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
using Regexp = OptimizedRegularExpressionImpl<false>;
|
|
|
|
using Pool = ObjectPoolMap<Regexp, String>;
|
|
|
|
|
|
|
|
template <bool like>
|
2019-03-23 19:40:16 +00:00
|
|
|
inline Regexp createRegexp(const std::string & pattern, int flags)
|
|
|
|
{
|
|
|
|
return {pattern, flags};
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
template <>
|
2019-03-23 19:40:16 +00:00
|
|
|
inline Regexp createRegexp<true>(const std::string & pattern, int flags)
|
|
|
|
{
|
|
|
|
return {likePatternToRegexp(pattern), flags};
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
template <bool like, bool no_capture>
|
|
|
|
inline Pool::Pointer get(const std::string & pattern)
|
|
|
|
{
|
|
|
|
/// C++11 has thread-safe function-local statics on most modern compilers.
|
2019-03-23 19:40:16 +00:00
|
|
|
static Pool known_regexps; /// Different variables for different pattern parameters.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
return known_regexps.get(pattern, [&pattern]
|
|
|
|
{
|
|
|
|
int flags = OptimizedRegularExpression::RE_DOT_NL;
|
|
|
|
if (no_capture)
|
|
|
|
flags |= OptimizedRegularExpression::RE_NO_CAPTURE;
|
|
|
|
|
|
|
|
ProfileEvents::increment(ProfileEvents::RegexpCreated);
|
|
|
|
return new Regexp{createRegexp<like>(pattern, flags)};
|
|
|
|
});
|
|
|
|
}
|
2017-03-10 17:52:36 +00:00
|
|
|
}
|
|
|
|
|
2019-03-23 19:40:16 +00:00
|
|
|
#ifdef __SSSE3__
|
|
|
|
|
|
|
|
namespace MultiRegexps
|
|
|
|
{
|
|
|
|
template <typename Deleter, Deleter deleter>
|
|
|
|
struct HyperscanDeleter
|
|
|
|
{
|
|
|
|
template <typename T>
|
|
|
|
void operator()(T * ptr) const
|
|
|
|
{
|
|
|
|
deleter(ptr);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
using Regexps = std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>;
|
|
|
|
|
|
|
|
using Pool = ObjectPoolMap<Regexps, std::vector<String>>;
|
|
|
|
|
2019-03-23 22:49:38 +00:00
|
|
|
template <bool FindAnyIndex>
|
2019-03-23 19:40:16 +00:00
|
|
|
inline Pool::Pointer get(const std::vector<StringRef> & patterns)
|
|
|
|
{
|
|
|
|
/// C++11 has thread-safe function-local statics on most modern compilers.
|
|
|
|
static Pool known_regexps; /// Different variables for different pattern parameters.
|
|
|
|
|
|
|
|
std::vector<String> str_patterns;
|
|
|
|
str_patterns.reserve(patterns.size());
|
2019-03-23 23:56:52 +00:00
|
|
|
for (const StringRef & ref : patterns)
|
2019-03-23 19:40:16 +00:00
|
|
|
str_patterns.push_back(ref.toString());
|
|
|
|
|
|
|
|
return known_regexps.get(str_patterns, [&str_patterns]
|
|
|
|
{
|
|
|
|
std::vector<const char *> ptrns;
|
|
|
|
std::vector<unsigned int> flags;
|
|
|
|
ptrns.reserve(str_patterns.size());
|
|
|
|
flags.reserve(str_patterns.size());
|
|
|
|
for (const StringRef ref : str_patterns)
|
|
|
|
{
|
|
|
|
ptrns.push_back(ref.data);
|
2019-03-23 22:49:38 +00:00
|
|
|
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH);
|
2019-03-23 19:40:16 +00:00
|
|
|
}
|
|
|
|
hs_database_t * db = nullptr;
|
|
|
|
hs_compile_error_t * compile_error;
|
|
|
|
|
2019-03-23 22:49:38 +00:00
|
|
|
|
|
|
|
std::unique_ptr<unsigned int[]> ids;
|
|
|
|
|
|
|
|
if constexpr (FindAnyIndex)
|
|
|
|
{
|
|
|
|
ids.reset(new unsigned int[ptrns.size()]);
|
|
|
|
for (size_t i = 0; i < ptrns.size(); ++i)
|
|
|
|
ids[i] = i + 1;
|
|
|
|
}
|
|
|
|
|
2019-03-23 19:40:16 +00:00
|
|
|
hs_error_t err
|
2019-03-23 22:49:38 +00:00
|
|
|
= hs_compile_multi(ptrns.data(), flags.data(), ids.get(), ptrns.size(), HS_MODE_BLOCK, nullptr, &db, &compile_error);
|
2019-03-23 19:40:16 +00:00
|
|
|
if (err != HS_SUCCESS)
|
|
|
|
{
|
|
|
|
std::unique_ptr<
|
|
|
|
hs_compile_error_t,
|
|
|
|
HyperscanDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>> error(compile_error);
|
|
|
|
|
|
|
|
if (error->expression < 0)
|
|
|
|
throw Exception(String(error->message), ErrorCodes::LOGICAL_ERROR);
|
|
|
|
else
|
|
|
|
throw Exception(
|
|
|
|
"Pattern '" + str_patterns[error->expression] + "' failed with error '" + String(error->message),
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
ProfileEvents::increment(ProfileEvents::RegexpCreated);
|
|
|
|
|
|
|
|
return new Regexps{db};
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // __SSSE3__
|
|
|
|
|
2017-03-10 17:52:36 +00:00
|
|
|
}
|