mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 04:12:19 +00:00
added tests
This commit is contained in:
parent
fb217ca916
commit
1d6a01dd6f
0
tests/integration/test_nlp/__init__.py
Normal file
0
tests/integration/test_nlp/__init__.py
Normal file
48
tests/integration/test_nlp/configs/config.xml
Normal file
48
tests/integration/test_nlp/configs/config.xml
Normal file
@ -0,0 +1,48 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<logger>
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
</logger>
|
||||
|
||||
<tcp_port>9000</tcp_port>
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
|
||||
<openSSL>
|
||||
<client>
|
||||
<cacheSessions>true</cacheSessions>
|
||||
<verificationMode>none</verificationMode>
|
||||
<invalidCertificateHandler>
|
||||
<name>AcceptCertificateHandler</name>
|
||||
</invalidCertificateHandler>
|
||||
</client>
|
||||
</openSSL>
|
||||
|
||||
<max_concurrent_queries>500</max_concurrent_queries>
|
||||
<mark_cache_size>5368709120</mark_cache_size>
|
||||
<path>./clickhouse/</path>
|
||||
<users_config>users.xml</users_config>
|
||||
|
||||
<synonyms_extensions>
|
||||
<extension>
|
||||
<name>en</name>
|
||||
<type>plain</type>
|
||||
<path>/etc/clickhouse-server/dictionaries/ext-en.txt</path>
|
||||
</extension>
|
||||
<extension>
|
||||
<name>ru</name>
|
||||
<type>plain</type>
|
||||
<path>/etc/clickhouse-server/dictionaries/ext-ru.txt</path>
|
||||
</extension>
|
||||
</synonyms_extensions>
|
||||
|
||||
<lemmatizers>
|
||||
<lemmatizer>
|
||||
<lang>en</lang>
|
||||
<path>/etc/clickhouse-server/dictionaries/lem-en.bin</path>
|
||||
</lemmatizer>
|
||||
</lemmatizers>
|
||||
</yandex>
|
4
tests/integration/test_nlp/dictionaries/ext-en.txt
Normal file
4
tests/integration/test_nlp/dictionaries/ext-en.txt
Normal file
@ -0,0 +1,4 @@
|
||||
important big critical crucial essential
|
||||
happy cheerful delighted ecstatic
|
||||
however nonetheless but yet
|
||||
quiz query check exam
|
4
tests/integration/test_nlp/dictionaries/ext-ru.txt
Normal file
4
tests/integration/test_nlp/dictionaries/ext-ru.txt
Normal file
@ -0,0 +1,4 @@
|
||||
важный большой высокий хороший главный
|
||||
веселый счастливый живой яркий смешной
|
||||
хотя однако но правда
|
||||
экзамен испытание проверка
|
50
tests/integration/test_nlp/test.py
Normal file
50
tests/integration/test_nlp/test.py
Normal file
@ -0,0 +1,50 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import assert_eq_with_retry
|
||||
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
instance = cluster.add_instance('instance', main_configs=['configs/config.xml'])
|
||||
|
||||
def copy_file_to_container(local_path, dist_path, container_id):
|
||||
os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path))
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
|
||||
copy_file_to_container(os.path.join(SCRIPT_DIR, 'dictionaries/.'), '/etc/clickhouse-server/dictionaries', instance.docker_id)
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
def test_lemmatize(start_cluster):
|
||||
assert instance.query("SELECT lemmatize('en', 'wolves')") == "wolf"
|
||||
assert instance.query("SELECT lemmatize('en', 'dogs')") == "dog"
|
||||
assert instance.query("SELECT lemmatize('en', 'looking')") == "look"
|
||||
assert instance.query("SELECT lemmatize('en', 'took')") == "take"
|
||||
assert instance.query("SELECT lemmatize('en', 'imported')") == "import"
|
||||
assert instance.query("SELECT lemmatize('en', 'tokenized')") == "tokenize"
|
||||
assert instance.query("SELECT lemmatize('en', 'flown')") == "fly"
|
||||
|
||||
def test_synonyms_extensions(start_cluster):
|
||||
assert instance.query("SELECT synonyms('en', 'crucial')") == "['important','big','critical','crucial','essential']"
|
||||
assert instance.query("SELECT synonyms('en', 'cheerful')") == "['happy','cheerful','delighted','ecstatic']"
|
||||
assert instance.query("SELECT synonyms('en', 'yet')") == "['however','nonetheless','but','yet']"
|
||||
assert instance.query("SELECT synonyms('en', 'quiz')") == "['quiz','query','check','exam']"
|
||||
|
||||
assert instance.query("SELECT synonyms('ru', 'главный')") == "['важный','большой','высокий','хороший','главный']"
|
||||
assert instance.query("SELECT synonyms('ru', 'веселый')") == "['веселый','счастливый','живой','яркий','смешной]"
|
||||
assert instance.query("SELECT synonyms('ru', 'правда')") == "['хотя','однако','но','правда']"
|
||||
assert instance.query("SELECT synonyms('ru', 'экзамен')") == "['экзамен','испытание','проверка']"
|
||||
|
8
tests/queries/0_stateless/01889_tokenize.reference
Normal file
8
tests/queries/0_stateless/01889_tokenize.reference
Normal file
@ -0,0 +1,8 @@
|
||||
['It','is','quite','a','wonderful','day','isn','t','it']
|
||||
['There','is','so','much','to','learn']
|
||||
['22','00','email','yandex','ru']
|
||||
['Токенизация','каких','либо','других','языков']
|
||||
['It','is','quite','a','wonderful','day,','isn\'t','it?']
|
||||
['There','is....','so','much','to','learn!']
|
||||
['22:00','email@yandex.ru']
|
||||
['Токенизация','каких-либо','других','языков?']
|
9
tests/queries/0_stateless/01889_tokenize.sql
Normal file
9
tests/queries/0_stateless/01889_tokenize.sql
Normal file
@ -0,0 +1,9 @@
|
||||
SELECT tokenize('It is quite a wonderful day, isn\'t it?');
|
||||
SELECT tokenize('There is.... so much to learn!');
|
||||
SELECT tokenize('22:00 email@yandex.ru');
|
||||
SELECT tokenize('Токенизация каких-либо других языков?');
|
||||
|
||||
SELECT tokenizeWhitespace('It is quite a wonderful day, isn\'t it?');
|
||||
SELECT tokenizeWhitespace('There is.... so much to learn!');
|
||||
SELECT tokenizeWhitespace('22:00 email@yandex.ru');
|
||||
SELECT tokenizeWhitespace('Токенизация каких-либо других языков?');
|
21
tests/queries/0_stateless/01890_stem.reference
Normal file
21
tests/queries/0_stateless/01890_stem.reference
Normal file
@ -0,0 +1,21 @@
|
||||
given
|
||||
combinatori
|
||||
collect
|
||||
possibl
|
||||
studi
|
||||
commonplac
|
||||
pack
|
||||
комбинаторн
|
||||
получ
|
||||
огранич
|
||||
конечн
|
||||
максимальн
|
||||
суммарн
|
||||
стоимост
|
||||
remplissag
|
||||
valeur
|
||||
maximis
|
||||
dépass
|
||||
intens
|
||||
étudi
|
||||
peuvent
|
23
tests/queries/0_stateless/01890_stem.sql
Normal file
23
tests/queries/0_stateless/01890_stem.sql
Normal file
@ -0,0 +1,23 @@
|
||||
SELECT stem('en', 'given');
|
||||
SELECT stem('en', 'combinatorial');
|
||||
SELECT stem('en', 'collection');
|
||||
SELECT stem('en', 'possibility');
|
||||
SELECT stem('en', 'studied');
|
||||
SELECT stem('en', 'commonplace');
|
||||
SELECT stem('en', 'packing');
|
||||
|
||||
SELECT stem('ru', 'комбинаторной');
|
||||
SELECT stem('ru', 'получила');
|
||||
SELECT stem('ru', 'ограничена');
|
||||
SELECT stem('ru', 'конечной');
|
||||
SELECT stem('ru', 'максимальной');
|
||||
SELECT stem('ru', 'суммарный');
|
||||
SELECT stem('ru', 'стоимостью');
|
||||
|
||||
SELECT stem('fr', 'remplissage');
|
||||
SELECT stem('fr', 'valeur');
|
||||
SELECT stem('fr', 'maximiser');
|
||||
SELECT stem('fr', 'dépasser');
|
||||
SELECT stem('fr', 'intensivement');
|
||||
SELECT stem('fr', 'étudié');
|
||||
SELECT stem('fr', 'peuvent');
|
Loading…
Reference in New Issue
Block a user