mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Rewrite NLP tests from integration to functional
This commit is contained in:
parent
12bbc4a276
commit
6414a01987
1
programs/server/config.d/ext-en.txt
Symbolic link
1
programs/server/config.d/ext-en.txt
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/ext-en.txt
|
1
programs/server/config.d/ext-ru.txt
Symbolic link
1
programs/server/config.d/ext-ru.txt
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/ext-ru.txt
|
1
programs/server/config.d/lem-en.bin
Symbolic link
1
programs/server/config.d/lem-en.bin
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/lem-en.bin
|
1
programs/server/config.d/nlp.xml
Symbolic link
1
programs/server/config.d/nlp.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/nlp.xml
|
@ -48,6 +48,7 @@ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/
|
||||
|
||||
ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/
|
||||
ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/
|
||||
@ -75,6 +76,10 @@ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/
|
||||
|
||||
ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/
|
||||
|
||||
ln -sf $SRC_PATH/ext-en.txt $DEST_SERVER_PATH/
|
||||
ln -sf $SRC_PATH/ext-ru.txt $DEST_SERVER_PATH/
|
||||
ln -sf $SRC_PATH/lem-en.bin $DEST_SERVER_PATH/
|
||||
|
||||
ln -sf $SRC_PATH/server.key $DEST_SERVER_PATH/
|
||||
ln -sf $SRC_PATH/server.crt $DEST_SERVER_PATH/
|
||||
ln -sf $SRC_PATH/dhparam.pem $DEST_SERVER_PATH/
|
||||
|
@ -4,19 +4,19 @@
|
||||
<extension>
|
||||
<name>en</name>
|
||||
<type>plain</type>
|
||||
<path>/etc/clickhouse-server/dictionaries/ext-en.txt</path>
|
||||
<path>config.d/ext-en.txt</path>
|
||||
</extension>
|
||||
<extension>
|
||||
<name>ru</name>
|
||||
<type>plain</type>
|
||||
<path>/etc/clickhouse-server/dictionaries/ext-ru.txt</path>
|
||||
<path>config.d/ext-ru.txt</path>
|
||||
</extension>
|
||||
</synonyms_extensions>
|
||||
|
||||
<lemmatizers>
|
||||
<lemmatizer>
|
||||
<lang>en</lang>
|
||||
<path>/etc/clickhouse-server/dictionaries/lem-en.bin</path>
|
||||
<path>config.d/lem-en.bin</path>
|
||||
</lemmatizer>
|
||||
</lemmatizers>
|
||||
</clickhouse>
|
Binary file not shown.
@ -1,149 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
instance = cluster.add_instance("instance", main_configs=["configs/dicts_config.xml"])
|
||||
|
||||
|
||||
def copy_file_to_container(local_path, dist_path, container_id):
|
||||
os.system(
|
||||
"docker cp {local} {cont_id}:{dist}".format(
|
||||
local=local_path, cont_id=container_id, dist=dist_path
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
|
||||
copy_file_to_container(
|
||||
os.path.join(SCRIPT_DIR, "dictionaries/."),
|
||||
"/etc/clickhouse-server/dictionaries",
|
||||
instance.docker_id,
|
||||
)
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_lemmatize(start_cluster):
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'wolves')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "wolf\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'dogs')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "dog\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'looking')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "look\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'took')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "take\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'imported')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "import\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'tokenized')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "tokenize\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT lemmatize('en', 'flown')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "fly\n"
|
||||
)
|
||||
|
||||
|
||||
def test_synonyms_extensions(start_cluster):
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('en', 'crucial')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['important','big','critical','crucial','essential']\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('en', 'cheerful')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['happy','cheerful','delighted','ecstatic']\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('en', 'yet')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['however','nonetheless','but','yet']\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('en', 'quiz')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['quiz','query','check','exam']\n"
|
||||
)
|
||||
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('ru', 'главный')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['важный','большой','высокий','хороший','главный']\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('ru', 'веселый')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['веселый','счастливый','живой','яркий','смешной']\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('ru', 'правда')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['хотя','однако','но','правда']\n"
|
||||
)
|
||||
assert (
|
||||
instance.query(
|
||||
"SELECT synonyms('ru', 'экзамен')",
|
||||
settings={"allow_experimental_nlp_functions": 1},
|
||||
)
|
||||
== "['экзамен','испытание','проверка']\n"
|
||||
)
|
15
tests/queries/0_stateless/02412_nlp.reference
Normal file
15
tests/queries/0_stateless/02412_nlp.reference
Normal file
@ -0,0 +1,15 @@
|
||||
wolf
|
||||
dog
|
||||
look
|
||||
take
|
||||
import
|
||||
tokenize
|
||||
fly
|
||||
['important','big','critical','crucial','essential']
|
||||
['happy','cheerful','delighted','ecstatic']
|
||||
['however','nonetheless','but','yet']
|
||||
['quiz','query','check','exam']
|
||||
['важный','большой','высокий','хороший','главный']
|
||||
['веселый','счастливый','живой','яркий','смешной']
|
||||
['хотя','однако','но','правда']
|
||||
['экзамен','испытание','проверка']
|
18
tests/queries/0_stateless/02412_nlp.sql
Normal file
18
tests/queries/0_stateless/02412_nlp.sql
Normal file
@ -0,0 +1,18 @@
|
||||
SET allow_experimental_nlp_functions = 1;
|
||||
|
||||
SELECT lemmatize('en', 'wolves');
|
||||
SELECT lemmatize('en', 'dogs');
|
||||
SELECT lemmatize('en', 'looking');
|
||||
SELECT lemmatize('en', 'took');
|
||||
SELECT lemmatize('en', 'imported');
|
||||
SELECT lemmatize('en', 'tokenized');
|
||||
SELECT lemmatize('en', 'flown');
|
||||
|
||||
SELECT synonyms('en', 'crucial');
|
||||
SELECT synonyms('en', 'cheerful');
|
||||
SELECT synonyms('en', 'yet');
|
||||
SELECT synonyms('en', 'quiz');
|
||||
SELECT synonyms('ru', 'главный');
|
||||
SELECT synonyms('ru', 'веселый');
|
||||
SELECT synonyms('ru', 'правда');
|
||||
SELECT synonyms('ru', 'экзамен');
|
Loading…
Reference in New Issue
Block a user