added tests

This commit is contained in:
Nikolay Degterinsky 2021-06-05 02:52:18 +00:00
parent fb217ca916
commit 1d6a01dd6f
9 changed files with 167 additions and 0 deletions

View File

View File

@ -0,0 +1,48 @@
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<mark_cache_size>5368709120</mark_cache_size>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
<synonyms_extensions>
<extension>
<name>en</name>
<type>plain</type>
<path>/etc/clickhouse-server/dictionaries/ext-en.txt</path>
</extension>
<extension>
<name>ru</name>
<type>plain</type>
<path>/etc/clickhouse-server/dictionaries/ext-ru.txt</path>
</extension>
</synonyms_extensions>
<lemmatizers>
<lemmatizer>
<lang>en</lang>
<path>/etc/clickhouse-server/dictionaries/lem-en.bin</path>
</lemmatizer>
</lemmatizers>
</yandex>

View File

@ -0,0 +1,4 @@
important big critical crucial essential
happy cheerful delighted ecstatic
however nonetheless but yet
quiz query check exam

View File

@ -0,0 +1,4 @@
важный большой высокий хороший главный
веселый счастливый живой яркий смешной
хотя однако но правда
экзамен испытание проверка

View File

@ -0,0 +1,50 @@
import os
import sys
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance', main_configs=['configs/config.xml'])
def copy_file_to_container(local_path, dist_path, container_id):
os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path))
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
copy_file_to_container(os.path.join(SCRIPT_DIR, 'dictionaries/.'), '/etc/clickhouse-server/dictionaries', instance.docker_id)
yield cluster
finally:
cluster.shutdown()
def test_lemmatize(start_cluster):
assert instance.query("SELECT lemmatize('en', 'wolves')") == "wolf"
assert instance.query("SELECT lemmatize('en', 'dogs')") == "dog"
assert instance.query("SELECT lemmatize('en', 'looking')") == "look"
assert instance.query("SELECT lemmatize('en', 'took')") == "take"
assert instance.query("SELECT lemmatize('en', 'imported')") == "import"
assert instance.query("SELECT lemmatize('en', 'tokenized')") == "tokenize"
assert instance.query("SELECT lemmatize('en', 'flown')") == "fly"
def test_synonyms_extensions(start_cluster):
assert instance.query("SELECT synonyms('en', 'crucial')") == "['important','big','critical','crucial','essential']"
assert instance.query("SELECT synonyms('en', 'cheerful')") == "['happy','cheerful','delighted','ecstatic']"
assert instance.query("SELECT synonyms('en', 'yet')") == "['however','nonetheless','but','yet']"
assert instance.query("SELECT synonyms('en', 'quiz')") == "['quiz','query','check','exam']"
assert instance.query("SELECT synonyms('ru', 'главный')") == "['важный','большой','высокий','хороший','главный']"
assert instance.query("SELECT synonyms('ru', 'веселый')") == "['веселый','счастливый','живой','яркий','смешной]"
assert instance.query("SELECT synonyms('ru', 'правда')") == "['хотя','однако','но','правда']"
assert instance.query("SELECT synonyms('ru', 'экзамен')") == "['экзамен','испытание','проверка']"

View File

@ -0,0 +1,8 @@
['It','is','quite','a','wonderful','day','isn','t','it']
['There','is','so','much','to','learn']
['22','00','email','yandex','ru']
['Токенизация','каких','либо','других','языков']
['It','is','quite','a','wonderful','day,','isn\'t','it?']
['There','is....','so','much','to','learn!']
['22:00','email@yandex.ru']
['Токенизация','каких-либо','других','языков?']

View File

@ -0,0 +1,9 @@
SELECT tokenize('It is quite a wonderful day, isn\'t it?');
SELECT tokenize('There is.... so much to learn!');
SELECT tokenize('22:00 email@yandex.ru');
SELECT tokenize('Токенизация каких-либо других языков?');
SELECT tokenizeWhitespace('It is quite a wonderful day, isn\'t it?');
SELECT tokenizeWhitespace('There is.... so much to learn!');
SELECT tokenizeWhitespace('22:00 email@yandex.ru');
SELECT tokenizeWhitespace('Токенизация каких-либо других языков?');

View File

@ -0,0 +1,21 @@
given
combinatori
collect
possibl
studi
commonplac
pack
комбинаторн
получ
огранич
конечн
максимальн
суммарн
стоимост
remplissag
valeur
maximis
dépass
intens
étudi
peuvent

View File

@ -0,0 +1,23 @@
SELECT stem('en', 'given');
SELECT stem('en', 'combinatorial');
SELECT stem('en', 'collection');
SELECT stem('en', 'possibility');
SELECT stem('en', 'studied');
SELECT stem('en', 'commonplace');
SELECT stem('en', 'packing');
SELECT stem('ru', 'комбинаторной');
SELECT stem('ru', 'получила');
SELECT stem('ru', 'ограничена');
SELECT stem('ru', 'конечной');
SELECT stem('ru', 'максимальной');
SELECT stem('ru', 'суммарный');
SELECT stem('ru', 'стоимостью');
SELECT stem('fr', 'remplissage');
SELECT stem('fr', 'valeur');
SELECT stem('fr', 'maximiser');
SELECT stem('fr', 'dépasser');
SELECT stem('fr', 'intensivement');
SELECT stem('fr', 'étudié');
SELECT stem('fr', 'peuvent');