Rewrite NLP tests from integration to functional

This commit is contained in:
Alexey Milovidov 2022-08-26 22:36:34 +02:00
parent 12bbc4a276
commit 6414a01987
13 changed files with 45 additions and 152 deletions

View File

@ -0,0 +1 @@
../../../tests/config/ext-en.txt

View File

@ -0,0 +1 @@
../../../tests/config/ext-ru.txt

View File

@ -0,0 +1 @@
../../../tests/config/lem-en.bin

View File

@ -0,0 +1 @@
../../../tests/config/nlp.xml

View File

@ -48,6 +48,7 @@ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/
ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/
@ -75,6 +76,10 @@ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/
ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/
ln -sf $SRC_PATH/ext-en.txt $DEST_SERVER_PATH/
ln -sf $SRC_PATH/ext-ru.txt $DEST_SERVER_PATH/
ln -sf $SRC_PATH/lem-en.bin $DEST_SERVER_PATH/
ln -sf $SRC_PATH/server.key $DEST_SERVER_PATH/
ln -sf $SRC_PATH/server.crt $DEST_SERVER_PATH/
ln -sf $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/

View File

@ -4,19 +4,19 @@
<extension>
<name>en</name>
<type>plain</type>
<path>/etc/clickhouse-server/dictionaries/ext-en.txt</path>
<path>config.d/ext-en.txt</path>
</extension>
<extension>
<name>ru</name>
<type>plain</type>
<path>/etc/clickhouse-server/dictionaries/ext-ru.txt</path>
<path>config.d/ext-ru.txt</path>
</extension>
</synonyms_extensions>
<lemmatizers>
<lemmatizer>
<lang>en</lang>
<path>/etc/clickhouse-server/dictionaries/lem-en.bin</path>
<path>config.d/lem-en.bin</path>
</lemmatizer>
</lemmatizers>
</clickhouse>

View File

@ -1,149 +0,0 @@
import os
import sys
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance("instance", main_configs=["configs/dicts_config.xml"])
def copy_file_to_container(local_path, dist_path, container_id):
os.system(
"docker cp {local} {cont_id}:{dist}".format(
local=local_path, cont_id=container_id, dist=dist_path
)
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
copy_file_to_container(
os.path.join(SCRIPT_DIR, "dictionaries/."),
"/etc/clickhouse-server/dictionaries",
instance.docker_id,
)
yield cluster
finally:
cluster.shutdown()
def test_lemmatize(start_cluster):
assert (
instance.query(
"SELECT lemmatize('en', 'wolves')",
settings={"allow_experimental_nlp_functions": 1},
)
== "wolf\n"
)
assert (
instance.query(
"SELECT lemmatize('en', 'dogs')",
settings={"allow_experimental_nlp_functions": 1},
)
== "dog\n"
)
assert (
instance.query(
"SELECT lemmatize('en', 'looking')",
settings={"allow_experimental_nlp_functions": 1},
)
== "look\n"
)
assert (
instance.query(
"SELECT lemmatize('en', 'took')",
settings={"allow_experimental_nlp_functions": 1},
)
== "take\n"
)
assert (
instance.query(
"SELECT lemmatize('en', 'imported')",
settings={"allow_experimental_nlp_functions": 1},
)
== "import\n"
)
assert (
instance.query(
"SELECT lemmatize('en', 'tokenized')",
settings={"allow_experimental_nlp_functions": 1},
)
== "tokenize\n"
)
assert (
instance.query(
"SELECT lemmatize('en', 'flown')",
settings={"allow_experimental_nlp_functions": 1},
)
== "fly\n"
)
def test_synonyms_extensions(start_cluster):
assert (
instance.query(
"SELECT synonyms('en', 'crucial')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['important','big','critical','crucial','essential']\n"
)
assert (
instance.query(
"SELECT synonyms('en', 'cheerful')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['happy','cheerful','delighted','ecstatic']\n"
)
assert (
instance.query(
"SELECT synonyms('en', 'yet')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['however','nonetheless','but','yet']\n"
)
assert (
instance.query(
"SELECT synonyms('en', 'quiz')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['quiz','query','check','exam']\n"
)
assert (
instance.query(
"SELECT synonyms('ru', 'главный')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['важный','большой','высокий','хороший','главный']\n"
)
assert (
instance.query(
"SELECT synonyms('ru', 'веселый')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['веселый','счастливый','живой','яркий','смешной']\n"
)
assert (
instance.query(
"SELECT synonyms('ru', 'правда')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['хотя','однако','но','правда']\n"
)
assert (
instance.query(
"SELECT synonyms('ru', 'экзамен')",
settings={"allow_experimental_nlp_functions": 1},
)
== "['экзамен','испытание','проверка']\n"
)

View File

@ -0,0 +1,15 @@
wolf
dog
look
take
import
tokenize
fly
['important','big','critical','crucial','essential']
['happy','cheerful','delighted','ecstatic']
['however','nonetheless','but','yet']
['quiz','query','check','exam']
['важный','большой','высокий','хороший','главный']
['веселый','счастливый','живой','яркий','смешной']
['хотя','однако','но','правда']
['экзамен','испытание','проверка']

View File

@ -0,0 +1,18 @@
SET allow_experimental_nlp_functions = 1;
SELECT lemmatize('en', 'wolves');
SELECT lemmatize('en', 'dogs');
SELECT lemmatize('en', 'looking');
SELECT lemmatize('en', 'took');
SELECT lemmatize('en', 'imported');
SELECT lemmatize('en', 'tokenized');
SELECT lemmatize('en', 'flown');
SELECT synonyms('en', 'crucial');
SELECT synonyms('en', 'cheerful');
SELECT synonyms('en', 'yet');
SELECT synonyms('en', 'quiz');
SELECT synonyms('ru', 'главный');
SELECT synonyms('ru', 'веселый');
SELECT synonyms('ru', 'правда');
SELECT synonyms('ru', 'экзамен');