ClickHouse/src/Interpreters/Lemmatizers.h

49 lines
747 B
C++
Raw Normal View History

2021-06-05 00:52:35 +00:00
#pragma once
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#if USE_NLP
2021-06-05 00:52:35 +00:00
#include <common/types.h>
#include <Poco/Util/Application.h>
#include <mutex>
#include <unordered_map>
2021-06-05 00:52:35 +00:00
namespace DB
{
2021-06-20 12:31:07 +00:00
class ILemmatizer
{
public:
using TokenPtr = std::shared_ptr<char []>;
virtual TokenPtr lemmatize(const char * token) = 0;
virtual ~ILemmatizer() = default;
};
2021-06-05 00:52:35 +00:00
class Lemmatizers
{
2021-06-05 00:52:35 +00:00
public:
2021-06-20 12:31:07 +00:00
using LemmPtr = std::shared_ptr<ILemmatizer>;
2021-06-05 00:52:35 +00:00
private:
std::mutex mutex;
std::unordered_map<String, LemmPtr> lemmatizers;
std::unordered_map<String, String> paths;
2021-06-05 00:52:35 +00:00
public:
2021-06-20 12:31:07 +00:00
explicit Lemmatizers(const Poco::Util::AbstractConfiguration & config);
2021-06-05 00:52:35 +00:00
LemmPtr getLemmatizer(const String & name);
};
}
#endif