Add experimental flag

This commit is contained in:
alesapin 2021-07-30 18:25:51 +03:00
parent e20e88ece3
commit 02176fb4c7
9 changed files with 45 additions and 20 deletions

View File

@ -489,6 +489,7 @@ class IColumn;
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
\
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SUPPORT_IS_DISABLED;
}
namespace
@ -61,6 +62,9 @@ public:
static constexpr auto name = "lemmatize";
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettingsRef().allow_experimental_nlp_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name);
return std::make_shared<FunctionLemmatize>(context->getLemmatizers());
}

View File

@ -84,8 +84,8 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionEncodeXMLComponent(factory);
registerFunctionDecodeXMLComponent(factory);
registerFunctionExtractTextFromHTML(factory);
registerFunctionToStringCutToZero(factory);
#if USE_BASE64
registerFunctionBase64Encode(factory);
registerFunctionBase64Decode(factory);

View File

@ -9,6 +9,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <libstemmer.h>
@ -19,6 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SUPPORT_IS_DISABLED;
}
namespace
@ -70,7 +72,14 @@ class FunctionStem : public IFunction
{
public:
static constexpr auto name = "stem";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStem>(); }
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettingsRef().allow_experimental_nlp_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name);
return std::make_shared<FunctionStem>();
}
String getName() const override { return name; }

View File

@ -24,6 +24,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SUPPORT_IS_DISABLED;
}
class FunctionSynonyms : public IFunction
@ -32,6 +33,9 @@ public:
static constexpr auto name = "synonyms";
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettingsRef().allow_experimental_nlp_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name);
return std::make_shared<FunctionSynonyms>(context->getSynonymsExtensions());
}

View File

@ -27,22 +27,21 @@ def start_cluster():
cluster.shutdown()
def test_lemmatize(start_cluster):
assert instance.query("SELECT lemmatize('en', 'wolves')") == "wolf\n"
assert instance.query("SELECT lemmatize('en', 'dogs')") == "dog\n"
assert instance.query("SELECT lemmatize('en', 'looking')") == "look\n"
assert instance.query("SELECT lemmatize('en', 'took')") == "take\n"
assert instance.query("SELECT lemmatize('en', 'imported')") == "import\n"
assert instance.query("SELECT lemmatize('en', 'tokenized')") == "tokenize\n"
assert instance.query("SELECT lemmatize('en', 'flown')") == "fly\n"
assert instance.query("SELECT lemmatize('en', 'wolves')", settings={"allow_experimental_nlp_functions": 1}) == "wolf\n"
assert instance.query("SELECT lemmatize('en', 'dogs')", settings={"allow_experimental_nlp_functions": 1}) == "dog\n"
assert instance.query("SELECT lemmatize('en', 'looking')", settings={"allow_experimental_nlp_functions": 1}) == "look\n"
assert instance.query("SELECT lemmatize('en', 'took')", settings={"allow_experimental_nlp_functions": 1}) == "take\n"
assert instance.query("SELECT lemmatize('en', 'imported')", settings={"allow_experimental_nlp_functions": 1}) == "import\n"
assert instance.query("SELECT lemmatize('en', 'tokenized')", settings={"allow_experimental_nlp_functions": 1}) == "tokenize\n"
assert instance.query("SELECT lemmatize('en', 'flown')", settings={"allow_experimental_nlp_functions": 1}) == "fly\n"
def test_synonyms_extensions(start_cluster):
assert instance.query("SELECT synonyms('en', 'crucial')") == "['important','big','critical','crucial','essential']\n"
assert instance.query("SELECT synonyms('en', 'cheerful')") == "['happy','cheerful','delighted','ecstatic']\n"
assert instance.query("SELECT synonyms('en', 'yet')") == "['however','nonetheless','but','yet']\n"
assert instance.query("SELECT synonyms('en', 'quiz')") == "['quiz','query','check','exam']\n"
assert instance.query("SELECT synonyms('en', 'crucial')", settings={"allow_experimental_nlp_functions": 1}) == "['important','big','critical','crucial','essential']\n"
assert instance.query("SELECT synonyms('en', 'cheerful')", settings={"allow_experimental_nlp_functions": 1}) == "['happy','cheerful','delighted','ecstatic']\n"
assert instance.query("SELECT synonyms('en', 'yet')", settings={"allow_experimental_nlp_functions": 1}) == "['however','nonetheless','but','yet']\n"
assert instance.query("SELECT synonyms('en', 'quiz')", settings={"allow_experimental_nlp_functions": 1}) == "['quiz','query','check','exam']\n"
assert instance.query("SELECT synonyms('ru', 'главный')") == "['важный','большой','высокий','хороший','главный']\n"
assert instance.query("SELECT synonyms('ru', 'веселый')") == "['веселый','счастливый','живой','яркий','смешной']\n"
assert instance.query("SELECT synonyms('ru', 'правда')") == "['хотя','однако','но','правда']\n"
assert instance.query("SELECT synonyms('ru', 'экзамен')") == "['экзамен','испытание','проверка']\n"
assert instance.query("SELECT synonyms('ru', 'главный')", settings={"allow_experimental_nlp_functions": 1}) == "['важный','большой','высокий','хороший','главный']\n"
assert instance.query("SELECT synonyms('ru', 'веселый')", settings={"allow_experimental_nlp_functions": 1}) == "['веселый','счастливый','живой','яркий','смешной']\n"
assert instance.query("SELECT synonyms('ru', 'правда')", settings={"allow_experimental_nlp_functions": 1}) == "['хотя','однако','но','правда']\n"
assert instance.query("SELECT synonyms('ru', 'экзамен')", settings={"allow_experimental_nlp_functions": 1}) == "['экзамен','испытание','проверка']\n"

View File

@ -1,4 +1,8 @@
<test>
<settings>
<allow_experimental_nlp_functions>1</allow_experimental_nlp_functions>
</settings>
<preconditions>
<table_exists>hits_100m_single</table_exists>
</preconditions>
@ -13,4 +17,4 @@
<drop_query>DROP TABLE IF EXISTS hits_100m_words</drop_query>
<drop_query>DROP TABLE IF EXISTS hits_100m_words_ws</drop_query>
</test>
</test>

View File

@ -1,3 +1,5 @@
SET allow_experimental_nlp_functions = 1;
SELECT splitByNonAlpha('It is quite a wonderful day, isn\'t it?');
SELECT splitByNonAlpha('There is.... so much to learn!');
SELECT splitByNonAlpha('22:00 email@yandex.ru');

View File

@ -1,3 +1,5 @@
SET allow_experimental_nlp_functions = 1;
SELECT stem('en', 'given');
SELECT stem('en', 'combinatorial');
SELECT stem('en', 'collection');
@ -20,4 +22,4 @@ SELECT stem('fr', 'maximiser');
SELECT stem('fr', 'dépasser');
SELECT stem('fr', 'intensivement');
SELECT stem('fr', 'étudié');
SELECT stem('fr', 'peuvent');
SELECT stem('fr', 'peuvent');