mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Add experimental flag
This commit is contained in:
parent
e20e88ece3
commit
02176fb4c7
@ -489,6 +489,7 @@ class IColumn;
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
\
|
||||
\
|
||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||
|
@ -18,6 +18,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -61,6 +62,9 @@ public:
|
||||
static constexpr auto name = "lemmatize";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettingsRef().allow_experimental_nlp_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name);
|
||||
|
||||
return std::make_shared<FunctionLemmatize>(context->getLemmatizers());
|
||||
}
|
||||
|
||||
|
@ -84,8 +84,8 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionEncodeXMLComponent(factory);
|
||||
registerFunctionDecodeXMLComponent(factory);
|
||||
registerFunctionExtractTextFromHTML(factory);
|
||||
|
||||
registerFunctionToStringCutToZero(factory);
|
||||
|
||||
#if USE_BASE64
|
||||
registerFunctionBase64Encode(factory);
|
||||
registerFunctionBase64Decode(factory);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <libstemmer.h>
|
||||
|
||||
@ -19,6 +20,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -70,7 +72,14 @@ class FunctionStem : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "stem";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStem>(); }
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettingsRef().allow_experimental_nlp_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name);
|
||||
|
||||
return std::make_shared<FunctionStem>();
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
|
@ -24,6 +24,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
class FunctionSynonyms : public IFunction
|
||||
@ -32,6 +33,9 @@ public:
|
||||
static constexpr auto name = "synonyms";
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
if (!context->getSettingsRef().allow_experimental_nlp_functions)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name);
|
||||
|
||||
return std::make_shared<FunctionSynonyms>(context->getSynonymsExtensions());
|
||||
}
|
||||
|
||||
|
@ -27,22 +27,21 @@ def start_cluster():
|
||||
cluster.shutdown()
|
||||
|
||||
def test_lemmatize(start_cluster):
|
||||
assert instance.query("SELECT lemmatize('en', 'wolves')") == "wolf\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'dogs')") == "dog\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'looking')") == "look\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'took')") == "take\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'imported')") == "import\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'tokenized')") == "tokenize\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'flown')") == "fly\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'wolves')", settings={"allow_experimental_nlp_functions": 1}) == "wolf\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'dogs')", settings={"allow_experimental_nlp_functions": 1}) == "dog\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'looking')", settings={"allow_experimental_nlp_functions": 1}) == "look\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'took')", settings={"allow_experimental_nlp_functions": 1}) == "take\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'imported')", settings={"allow_experimental_nlp_functions": 1}) == "import\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'tokenized')", settings={"allow_experimental_nlp_functions": 1}) == "tokenize\n"
|
||||
assert instance.query("SELECT lemmatize('en', 'flown')", settings={"allow_experimental_nlp_functions": 1}) == "fly\n"
|
||||
|
||||
def test_synonyms_extensions(start_cluster):
|
||||
assert instance.query("SELECT synonyms('en', 'crucial')") == "['important','big','critical','crucial','essential']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'cheerful')") == "['happy','cheerful','delighted','ecstatic']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'yet')") == "['however','nonetheless','but','yet']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'quiz')") == "['quiz','query','check','exam']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'crucial')", settings={"allow_experimental_nlp_functions": 1}) == "['important','big','critical','crucial','essential']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'cheerful')", settings={"allow_experimental_nlp_functions": 1}) == "['happy','cheerful','delighted','ecstatic']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'yet')", settings={"allow_experimental_nlp_functions": 1}) == "['however','nonetheless','but','yet']\n"
|
||||
assert instance.query("SELECT synonyms('en', 'quiz')", settings={"allow_experimental_nlp_functions": 1}) == "['quiz','query','check','exam']\n"
|
||||
|
||||
assert instance.query("SELECT synonyms('ru', 'главный')") == "['важный','большой','высокий','хороший','главный']\n"
|
||||
assert instance.query("SELECT synonyms('ru', 'веселый')") == "['веселый','счастливый','живой','яркий','смешной']\n"
|
||||
assert instance.query("SELECT synonyms('ru', 'правда')") == "['хотя','однако','но','правда']\n"
|
||||
assert instance.query("SELECT synonyms('ru', 'экзамен')") == "['экзамен','испытание','проверка']\n"
|
||||
|
||||
assert instance.query("SELECT synonyms('ru', 'главный')", settings={"allow_experimental_nlp_functions": 1}) == "['важный','большой','высокий','хороший','главный']\n"
|
||||
assert instance.query("SELECT synonyms('ru', 'веселый')", settings={"allow_experimental_nlp_functions": 1}) == "['веселый','счастливый','живой','яркий','смешной']\n"
|
||||
assert instance.query("SELECT synonyms('ru', 'правда')", settings={"allow_experimental_nlp_functions": 1}) == "['хотя','однако','но','правда']\n"
|
||||
assert instance.query("SELECT synonyms('ru', 'экзамен')", settings={"allow_experimental_nlp_functions": 1}) == "['экзамен','испытание','проверка']\n"
|
||||
|
@ -1,4 +1,8 @@
|
||||
<test>
|
||||
<settings>
|
||||
<allow_experimental_nlp_functions>1</allow_experimental_nlp_functions>
|
||||
</settings>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>hits_100m_single</table_exists>
|
||||
</preconditions>
|
||||
@ -13,4 +17,4 @@
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS hits_100m_words</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS hits_100m_words_ws</drop_query>
|
||||
</test>
|
||||
</test>
|
||||
|
@ -1,3 +1,5 @@
|
||||
SET allow_experimental_nlp_functions = 1;
|
||||
|
||||
SELECT splitByNonAlpha('It is quite a wonderful day, isn\'t it?');
|
||||
SELECT splitByNonAlpha('There is.... so much to learn!');
|
||||
SELECT splitByNonAlpha('22:00 email@yandex.ru');
|
||||
|
@ -1,3 +1,5 @@
|
||||
SET allow_experimental_nlp_functions = 1;
|
||||
|
||||
SELECT stem('en', 'given');
|
||||
SELECT stem('en', 'combinatorial');
|
||||
SELECT stem('en', 'collection');
|
||||
@ -20,4 +22,4 @@ SELECT stem('fr', 'maximiser');
|
||||
SELECT stem('fr', 'dépasser');
|
||||
SELECT stem('fr', 'intensivement');
|
||||
SELECT stem('fr', 'étudié');
|
||||
SELECT stem('fr', 'peuvent');
|
||||
SELECT stem('fr', 'peuvent');
|
||||
|
Loading…
Reference in New Issue
Block a user