2021-06-05 02:52:18 +00:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
|
|
|
|
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
2022-03-22 16:39:58 +00:00
|
|
|
instance = cluster.add_instance("instance", main_configs=["configs/dicts_config.xml"])
|
|
|
|
|
2021-06-05 02:52:18 +00:00
|
|
|
|
|
|
|
def copy_file_to_container(local_path, dist_path, container_id):
|
2022-03-22 16:39:58 +00:00
|
|
|
os.system(
|
|
|
|
"docker cp {local} {cont_id}:{dist}".format(
|
|
|
|
local=local_path, cont_id=container_id, dist=dist_path
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2021-06-05 02:52:18 +00:00
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def start_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
copy_file_to_container(
|
|
|
|
os.path.join(SCRIPT_DIR, "dictionaries/."),
|
|
|
|
"/etc/clickhouse-server/dictionaries",
|
|
|
|
instance.docker_id,
|
|
|
|
)
|
2021-06-05 02:52:18 +00:00
|
|
|
|
|
|
|
yield cluster
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-06-05 02:52:18 +00:00
|
|
|
def test_lemmatize(start_cluster):
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'wolves')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "wolf\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'dogs')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "dog\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'looking')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "look\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'took')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "take\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'imported')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "import\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'tokenized')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "tokenize\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT lemmatize('en', 'flown')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "fly\n"
|
|
|
|
)
|
|
|
|
|
2021-06-05 02:52:18 +00:00
|
|
|
|
|
|
|
def test_synonyms_extensions(start_cluster):
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('en', 'crucial')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['important','big','critical','crucial','essential']\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('en', 'cheerful')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['happy','cheerful','delighted','ecstatic']\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('en', 'yet')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['however','nonetheless','but','yet']\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('en', 'quiz')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['quiz','query','check','exam']\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('ru', 'главный')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['важный','большой','высокий','хороший','главный']\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('ru', 'веселый')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['веселый','счастливый','живой','яркий','смешной']\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('ru', 'правда')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['хотя','однако','но','правда']\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
instance.query(
|
|
|
|
"SELECT synonyms('ru', 'экзамен')",
|
|
|
|
settings={"allow_experimental_nlp_functions": 1},
|
|
|
|
)
|
|
|
|
== "['экзамен','испытание','проверка']\n"
|
|
|
|
)
|