import os import sys import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', main_configs=['configs/dicts_config.xml']) def copy_file_to_container(local_path, dist_path, container_id): os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path)) @pytest.fixture(scope="module") def start_cluster(): try: cluster.start() copy_file_to_container(os.path.join(SCRIPT_DIR, 'dictionaries/.'), '/etc/clickhouse-server/dictionaries', instance.docker_id) yield cluster finally: cluster.shutdown() def test_lemmatize(start_cluster): assert instance.query("SELECT lemmatize('en', 'wolves')", settings={"allow_experimental_nlp_functions": 1}) == "wolf\n" assert instance.query("SELECT lemmatize('en', 'dogs')", settings={"allow_experimental_nlp_functions": 1}) == "dog\n" assert instance.query("SELECT lemmatize('en', 'looking')", settings={"allow_experimental_nlp_functions": 1}) == "look\n" assert instance.query("SELECT lemmatize('en', 'took')", settings={"allow_experimental_nlp_functions": 1}) == "take\n" assert instance.query("SELECT lemmatize('en', 'imported')", settings={"allow_experimental_nlp_functions": 1}) == "import\n" assert instance.query("SELECT lemmatize('en', 'tokenized')", settings={"allow_experimental_nlp_functions": 1}) == "tokenize\n" assert instance.query("SELECT lemmatize('en', 'flown')", settings={"allow_experimental_nlp_functions": 1}) == "fly\n" def test_synonyms_extensions(start_cluster): assert instance.query("SELECT synonyms('en', 'crucial')", settings={"allow_experimental_nlp_functions": 1}) == "['important','big','critical','crucial','essential']\n" assert instance.query("SELECT synonyms('en', 'cheerful')", settings={"allow_experimental_nlp_functions": 1}) == "['happy','cheerful','delighted','ecstatic']\n" assert instance.query("SELECT synonyms('en', 'yet')", settings={"allow_experimental_nlp_functions": 1}) == "['however','nonetheless','but','yet']\n" assert instance.query("SELECT synonyms('en', 'quiz')", settings={"allow_experimental_nlp_functions": 1}) == "['quiz','query','check','exam']\n" assert instance.query("SELECT synonyms('ru', 'главный')", settings={"allow_experimental_nlp_functions": 1}) == "['важный','большой','высокий','хороший','главный']\n" assert instance.query("SELECT synonyms('ru', 'веселый')", settings={"allow_experimental_nlp_functions": 1}) == "['веселый','счастливый','живой','яркий','смешной']\n" assert instance.query("SELECT synonyms('ru', 'правда')", settings={"allow_experimental_nlp_functions": 1}) == "['хотя','однако','но','правда']\n" assert instance.query("SELECT synonyms('ru', 'экзамен')", settings={"allow_experimental_nlp_functions": 1}) == "['экзамен','испытание','проверка']\n"