mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Add an ability to build ClickHouse without NLP functions
This commit is contained in:
parent
916594fe23
commit
4746002776
@ -542,6 +542,7 @@ include (cmake/find/libpqxx.cmake)
|
||||
include (cmake/find/nuraft.cmake)
|
||||
include (cmake/find/yaml-cpp.cmake)
|
||||
include (cmake/find/s2geometry.cmake)
|
||||
include (cmake/find/nlp.cmake)
|
||||
|
||||
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set (ENABLE_ORC OFF CACHE INTERNAL "")
|
||||
|
32
cmake/find/nlp.cmake
Normal file
32
cmake/find/nlp.cmake
Normal file
@ -0,0 +1,32 @@
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_NLP)
|
||||
|
||||
message (STATUS "NLP functions disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c/Makefile")
|
||||
message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libstemmer_c library, NLP functions will be disabled")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast/CMakeLists.txt")
|
||||
message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal wordnet-blast library, NLP functions will be disabled")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c/README.md")
|
||||
message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal lemmagen-c library, NLP functions will be disabled")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
set (USE_NLP 1)
|
||||
|
||||
message (STATUS "Using Libraries for NLP functions: contrib/wordnet-blast, contrib/libstemmer_c, contrib/lemmagen-c")
|
9
contrib/CMakeLists.txt
vendored
9
contrib/CMakeLists.txt
vendored
@ -327,9 +327,12 @@ if (USE_NURAFT)
|
||||
endif()
|
||||
|
||||
add_subdirectory(fast_float)
|
||||
add_subdirectory(libstemmer-c-cmake)
|
||||
add_subdirectory(wordnet-blast-cmake)
|
||||
add_subdirectory(lemmagen-c-cmake)
|
||||
|
||||
if (USE_NLP)
|
||||
add_subdirectory(libstemmer-c-cmake)
|
||||
add_subdirectory(wordnet-blast-cmake)
|
||||
add_subdirectory(lemmagen-c-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_SQLITE)
|
||||
add_subdirectory(sqlite-cmake)
|
||||
|
@ -473,9 +473,11 @@ endif ()
|
||||
|
||||
dbms_target_link_libraries(PRIVATE _boost_context)
|
||||
|
||||
dbms_target_link_libraries (PUBLIC stemmer)
|
||||
dbms_target_link_libraries (PUBLIC wnb)
|
||||
dbms_target_link_libraries (PUBLIC lemmagen)
|
||||
if (USE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC stemmer)
|
||||
dbms_target_link_libraries (PUBLIC wnb)
|
||||
dbms_target_link_libraries (PUBLIC lemmagen)
|
||||
endif()
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
||||
|
@ -15,4 +15,5 @@
|
||||
#cmakedefine01 USE_LIBPQXX
|
||||
#cmakedefine01 USE_SQLITE
|
||||
#cmakedefine01 USE_NURAFT
|
||||
#cmakedefine01 USE_NLP
|
||||
#cmakedefine01 USE_KRB5
|
||||
|
@ -1,3 +1,9 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
@ -116,3 +122,5 @@ void registerFunctionLemmatize(FunctionFactory & factory)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,6 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_functions.h"
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
@ -37,18 +38,20 @@ void registerFunctionCountMatches(FunctionFactory &);
|
||||
void registerFunctionEncodeXMLComponent(FunctionFactory &);
|
||||
void registerFunctionDecodeXMLComponent(FunctionFactory &);
|
||||
void registerFunctionExtractTextFromHTML(FunctionFactory &);
|
||||
void registerFunctionStem(FunctionFactory &);
|
||||
void registerFunctionSynonyms(FunctionFactory &);
|
||||
void registerFunctionLemmatize(FunctionFactory &);
|
||||
void registerFunctionToStringCutToZero(FunctionFactory &);
|
||||
|
||||
|
||||
#if USE_BASE64
|
||||
void registerFunctionBase64Encode(FunctionFactory &);
|
||||
void registerFunctionBase64Decode(FunctionFactory &);
|
||||
void registerFunctionTryBase64Decode(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
void registerFunctionStem(FunctionFactory &);
|
||||
void registerFunctionSynonyms(FunctionFactory &);
|
||||
void registerFunctionLemmatize(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
void registerFunctionsString(FunctionFactory & factory)
|
||||
{
|
||||
registerFunctionRepeat(factory);
|
||||
@ -81,15 +84,19 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionEncodeXMLComponent(factory);
|
||||
registerFunctionDecodeXMLComponent(factory);
|
||||
registerFunctionExtractTextFromHTML(factory);
|
||||
registerFunctionStem(factory);
|
||||
registerFunctionSynonyms(factory);
|
||||
registerFunctionLemmatize(factory);
|
||||
|
||||
registerFunctionToStringCutToZero(factory);
|
||||
#if USE_BASE64
|
||||
registerFunctionBase64Encode(factory);
|
||||
registerFunctionBase64Decode(factory);
|
||||
registerFunctionTryBase64Decode(factory);
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
registerFunctionStem(factory);
|
||||
registerFunctionSynonyms(factory);
|
||||
registerFunctionLemmatize(factory);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,3 +1,9 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
@ -116,3 +122,5 @@ void registerFunctionStem(FunctionFactory & factory)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,9 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
@ -114,3 +120,5 @@ void registerFunctionSynonyms(FunctionFactory & factory)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -351,8 +351,10 @@ struct ContextSharedPart
|
||||
|
||||
scope_guard dictionaries_xmls;
|
||||
|
||||
#if USE_NLP
|
||||
mutable std::optional<SynonymsExtensions> synonyms_extensions;
|
||||
mutable std::optional<Lemmatizers> lemmatizers;
|
||||
#endif
|
||||
|
||||
String default_profile_name; /// Default profile name used for default values.
|
||||
String system_profile_name; /// Profile used by system processes
|
||||
@ -1507,6 +1509,8 @@ void Context::loadDictionaries(const Poco::Util::AbstractConfiguration & config)
|
||||
std::make_unique<ExternalLoaderXMLConfigRepository>(config, "dictionaries_config"));
|
||||
}
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
SynonymsExtensions & Context::getSynonymsExtensions() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
@ -1526,6 +1530,7 @@ Lemmatizers & Context::getLemmatizers() const
|
||||
|
||||
return *shared->lemmatizers;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Context::setProgressCallback(ProgressCallback callback)
|
||||
{
|
||||
|
@ -52,8 +52,6 @@ class AccessRightsElements;
|
||||
class EmbeddedDictionaries;
|
||||
class ExternalDictionariesLoader;
|
||||
class ExternalModelsLoader;
|
||||
class SynonymsExtensions;
|
||||
class Lemmatizers;
|
||||
class InterserverCredentials;
|
||||
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
|
||||
class InterserverIOHandler;
|
||||
@ -116,6 +114,11 @@ using VolumePtr = std::shared_ptr<IVolume>;
|
||||
struct NamedSession;
|
||||
struct BackgroundTaskSchedulingSettings;
|
||||
|
||||
#if USE_NLP
|
||||
class SynonymsExtensions;
|
||||
class Lemmatizers;
|
||||
#endif
|
||||
|
||||
class Throttler;
|
||||
using ThrottlerPtr = std::shared_ptr<Throttler>;
|
||||
|
||||
@ -536,8 +539,10 @@ public:
|
||||
void tryCreateEmbeddedDictionaries() const;
|
||||
void loadDictionaries(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
#if USE_NLP
|
||||
SynonymsExtensions & getSynonymsExtensions() const;
|
||||
Lemmatizers & getLemmatizers() const;
|
||||
#endif
|
||||
|
||||
void setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name = "models_config");
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/Lemmatizers.h>
|
||||
#include <RdrLemmatizer.h>
|
||||
@ -89,3 +96,5 @@ Lemmatizers::LemmPtr Lemmatizers::getLemmatizer(const String & name)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,11 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -37,3 +45,5 @@ public:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,9 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/SynonymsExtensions.h>
|
||||
|
||||
@ -49,7 +55,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const Synset * getSynonyms(const std::string_view & token) const override
|
||||
const Synset * getSynonyms(std::string_view token) const override
|
||||
{
|
||||
auto it = table.find(token);
|
||||
|
||||
@ -68,7 +74,7 @@ private:
|
||||
public:
|
||||
explicit WordnetSynonymsExtension(const String & path) : wn(path) {}
|
||||
|
||||
const Synset * getSynonyms(const std::string_view & token) const override
|
||||
const Synset * getSynonyms(std::string_view token) const override
|
||||
{
|
||||
return wn.get_synset(std::string(token));
|
||||
}
|
||||
@ -147,3 +153,5 @@ SynonymsExtensions::ExtPtr SynonymsExtensions::getExtension(const String & name)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
|
||||
@ -17,7 +23,7 @@ class ISynonymsExtension
|
||||
public:
|
||||
using Synset = std::vector<String>;
|
||||
|
||||
virtual const Synset * getSynonyms(const std::string_view & token) const = 0;
|
||||
virtual const Synset * getSynonyms(std::string_view token) const = 0;
|
||||
|
||||
virtual ~ISynonymsExtension() = default;
|
||||
};
|
||||
@ -47,3 +53,5 @@ private:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user