diff --git a/programs/server/config.d/ext-en.txt b/programs/server/config.d/ext-en.txt new file mode 120000 index 00000000000..6bc78ab238a --- /dev/null +++ b/programs/server/config.d/ext-en.txt @@ -0,0 +1 @@ +../../../tests/config/ext-en.txt \ No newline at end of file diff --git a/programs/server/config.d/ext-ru.txt b/programs/server/config.d/ext-ru.txt new file mode 120000 index 00000000000..63ea415a66e --- /dev/null +++ b/programs/server/config.d/ext-ru.txt @@ -0,0 +1 @@ +../../../tests/config/ext-ru.txt \ No newline at end of file diff --git a/programs/server/config.d/lem-en.bin b/programs/server/config.d/lem-en.bin new file mode 120000 index 00000000000..d2c960cf013 --- /dev/null +++ b/programs/server/config.d/lem-en.bin @@ -0,0 +1 @@ +../../../tests/config/lem-en.bin \ No newline at end of file diff --git a/programs/server/config.d/nlp.xml b/programs/server/config.d/nlp.xml new file mode 120000 index 00000000000..def9d3b1eb4 --- /dev/null +++ b/programs/server/config.d/nlp.xml @@ -0,0 +1 @@ +../../../tests/config/nlp.xml \ No newline at end of file diff --git a/tests/integration/test_nlp/dictionaries/ext-en.txt b/tests/config/ext-en.txt similarity index 100% rename from tests/integration/test_nlp/dictionaries/ext-en.txt rename to tests/config/ext-en.txt diff --git a/tests/integration/test_nlp/dictionaries/ext-ru.txt b/tests/config/ext-ru.txt similarity index 100% rename from tests/integration/test_nlp/dictionaries/ext-ru.txt rename to tests/config/ext-ru.txt diff --git a/tests/config/install.sh b/tests/config/install.sh index 072787efbb3..efbb9e614ae 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -48,6 +48,7 @@ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ @@ -75,6 +76,10 @@ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/ext-en.txt $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/ext-ru.txt $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/lem-en.bin $DEST_SERVER_PATH/ + ln -sf $SRC_PATH/server.key $DEST_SERVER_PATH/ ln -sf $SRC_PATH/server.crt $DEST_SERVER_PATH/ ln -sf $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/ diff --git a/tests/integration/test_nlp/configs/dicts_config.xml b/tests/config/nlp.xml similarity index 65% rename from tests/integration/test_nlp/configs/dicts_config.xml rename to tests/config/nlp.xml index 8c05ea67e49..17b11741fbd 100644 --- a/tests/integration/test_nlp/configs/dicts_config.xml +++ b/tests/config/nlp.xml @@ -4,19 +4,19 @@ en plain - /etc/clickhouse-server/dictionaries/ext-en.txt + config.d/ext-en.txt ru plain - /etc/clickhouse-server/dictionaries/ext-ru.txt + config.d/ext-ru.txt en - /etc/clickhouse-server/dictionaries/lem-en.bin + config.d/lem-en.bin diff --git a/tests/integration/test_nlp/__init__.py b/tests/integration/test_nlp/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_nlp/dictionaries/lem-en.bin b/tests/integration/test_nlp/dictionaries/lem-en.bin deleted file mode 100644 index 8981bc1ead0..00000000000 Binary files a/tests/integration/test_nlp/dictionaries/lem-en.bin and /dev/null differ diff --git a/tests/integration/test_nlp/test.py b/tests/integration/test_nlp/test.py deleted file mode 100644 index e15c9ecfaa6..00000000000 --- a/tests/integration/test_nlp/test.py +++ /dev/null @@ -1,149 +0,0 @@ -import os -import sys - -import pytest - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -from helpers.cluster import ClickHouseCluster - - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance("instance", main_configs=["configs/dicts_config.xml"]) - - -def copy_file_to_container(local_path, dist_path, container_id): - os.system( - "docker cp {local} {cont_id}:{dist}".format( - local=local_path, cont_id=container_id, dist=dist_path - ) - ) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - - copy_file_to_container( - os.path.join(SCRIPT_DIR, "dictionaries/."), - "/etc/clickhouse-server/dictionaries", - instance.docker_id, - ) - - yield cluster - finally: - cluster.shutdown() - - -def test_lemmatize(start_cluster): - assert ( - instance.query( - "SELECT lemmatize('en', 'wolves')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "wolf\n" - ) - assert ( - instance.query( - "SELECT lemmatize('en', 'dogs')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "dog\n" - ) - assert ( - instance.query( - "SELECT lemmatize('en', 'looking')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "look\n" - ) - assert ( - instance.query( - "SELECT lemmatize('en', 'took')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "take\n" - ) - assert ( - instance.query( - "SELECT lemmatize('en', 'imported')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "import\n" - ) - assert ( - instance.query( - "SELECT lemmatize('en', 'tokenized')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "tokenize\n" - ) - assert ( - instance.query( - "SELECT lemmatize('en', 'flown')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "fly\n" - ) - - -def test_synonyms_extensions(start_cluster): - assert ( - instance.query( - "SELECT synonyms('en', 'crucial')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['important','big','critical','crucial','essential']\n" - ) - assert ( - instance.query( - "SELECT synonyms('en', 'cheerful')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['happy','cheerful','delighted','ecstatic']\n" - ) - assert ( - instance.query( - "SELECT synonyms('en', 'yet')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['however','nonetheless','but','yet']\n" - ) - assert ( - instance.query( - "SELECT synonyms('en', 'quiz')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['quiz','query','check','exam']\n" - ) - - assert ( - instance.query( - "SELECT synonyms('ru', 'главный')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['важный','большой','высокий','хороший','главный']\n" - ) - assert ( - instance.query( - "SELECT synonyms('ru', 'веселый')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['веселый','счастливый','живой','яркий','смешной']\n" - ) - assert ( - instance.query( - "SELECT synonyms('ru', 'правда')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['хотя','однако','но','правда']\n" - ) - assert ( - instance.query( - "SELECT synonyms('ru', 'экзамен')", - settings={"allow_experimental_nlp_functions": 1}, - ) - == "['экзамен','испытание','проверка']\n" - ) diff --git a/tests/queries/0_stateless/02412_nlp.reference b/tests/queries/0_stateless/02412_nlp.reference new file mode 100644 index 00000000000..337022b9fb2 --- /dev/null +++ b/tests/queries/0_stateless/02412_nlp.reference @@ -0,0 +1,15 @@ +wolf +dog +look +take +import +tokenize +fly +['important','big','critical','crucial','essential'] +['happy','cheerful','delighted','ecstatic'] +['however','nonetheless','but','yet'] +['quiz','query','check','exam'] +['важный','большой','высокий','хороший','главный'] +['веселый','счастливый','живой','яркий','смешной'] +['хотя','однако','но','правда'] +['экзамен','испытание','проверка'] diff --git a/tests/queries/0_stateless/02412_nlp.sql b/tests/queries/0_stateless/02412_nlp.sql new file mode 100644 index 00000000000..6536b1beb70 --- /dev/null +++ b/tests/queries/0_stateless/02412_nlp.sql @@ -0,0 +1,18 @@ +SET allow_experimental_nlp_functions = 1; + +SELECT lemmatize('en', 'wolves'); +SELECT lemmatize('en', 'dogs'); +SELECT lemmatize('en', 'looking'); +SELECT lemmatize('en', 'took'); +SELECT lemmatize('en', 'imported'); +SELECT lemmatize('en', 'tokenized'); +SELECT lemmatize('en', 'flown'); + +SELECT synonyms('en', 'crucial'); +SELECT synonyms('en', 'cheerful'); +SELECT synonyms('en', 'yet'); +SELECT synonyms('en', 'quiz'); +SELECT synonyms('ru', 'главный'); +SELECT synonyms('ru', 'веселый'); +SELECT synonyms('ru', 'правда'); +SELECT synonyms('ru', 'экзамен');