2021-06-05 00:52:35 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-07-30 13:30:30 +00:00
|
|
|
#if !defined(ARCADIA_BUILD)
|
|
|
|
# include "config_core.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if USE_NLP
|
|
|
|
|
2021-06-05 00:52:35 +00:00
|
|
|
#include <common/types.h>
|
|
|
|
#include <Poco/Util/Application.h>
|
|
|
|
|
|
|
|
#include <mutex>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
2021-07-30 13:30:30 +00:00
|
|
|
|
2021-06-05 00:52:35 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-06-20 12:31:07 +00:00
|
|
|
class ILemmatizer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using TokenPtr = std::shared_ptr<char []>;
|
|
|
|
|
|
|
|
virtual TokenPtr lemmatize(const char * token) = 0;
|
|
|
|
|
|
|
|
virtual ~ILemmatizer() = default;
|
|
|
|
};
|
|
|
|
|
2021-06-05 00:52:35 +00:00
|
|
|
|
2021-06-19 18:52:09 +00:00
|
|
|
class Lemmatizers
|
|
|
|
{
|
2021-06-05 00:52:35 +00:00
|
|
|
public:
|
2021-06-20 12:31:07 +00:00
|
|
|
using LemmPtr = std::shared_ptr<ILemmatizer>;
|
2021-06-05 00:52:35 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
std::mutex mutex;
|
|
|
|
std::unordered_map<String, LemmPtr> lemmatizers;
|
2021-06-19 18:52:09 +00:00
|
|
|
std::unordered_map<String, String> paths;
|
2021-06-05 00:52:35 +00:00
|
|
|
|
|
|
|
public:
|
2021-06-20 12:31:07 +00:00
|
|
|
explicit Lemmatizers(const Poco::Util::AbstractConfiguration & config);
|
2021-06-05 00:52:35 +00:00
|
|
|
|
|
|
|
LemmPtr getLemmatizer(const String & name);
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
2021-07-30 13:30:30 +00:00
|
|
|
|
|
|
|
#endif
|