integration tests for StorageDictionary

This commit is contained in:
Nikolai Kochetov 2017-06-07 19:43:07 +03:00
parent 04e1a23d1a
commit af661f9be1
6 changed files with 451 additions and 0 deletions

View File

@ -0,0 +1,30 @@
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<mark_cache_size>5368709120</mark_cache_size>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
<dictionaries_config>/etc/clickhouse-server/config.d/*.xml</dictionaries_config>
</yandex>

View File

@ -0,0 +1,4 @@
0 0 0 0 50 13874 980694578 980694579 50 13874 980694578 980694579 0 0 4761183170873013810 2007-12-27 1970-01-02 06:51:14 0
1 1 1 1 48 57392 4083802160 4083802161 48 -8144 -211165136 -211165135 1.5 1.5 10577349846663553072 2037-06-02 1970-01-02 09:50:24 0
2 2 2 2 69 35909 1447922757 1447922758 69 -29627 1447922757 1447922758 3 3 18198135717204167749 1978-08-08 1970-01-02 03:52:21 1
3 3 3 3 250 1274 1029309690 1029309691 -6 1274 1029309690 1029309691 4.5 4.5 9624464864560415994 1973-06-28 1970-01-02 03:21:14 2
1 0 0 0 0 50 13874 980694578 980694579 50 13874 980694578 980694579 0 0 4761183170873013810 2007-12-27 1970-01-02 06:51:14 0
2 1 1 1 1 48 57392 4083802160 4083802161 48 -8144 -211165136 -211165135 1.5 1.5 10577349846663553072 2037-06-02 1970-01-02 09:50:24 0
3 2 2 2 2 69 35909 1447922757 1447922758 69 -29627 1447922757 1447922758 3 3 18198135717204167749 1978-08-08 1970-01-02 03:52:21 1
4 3 3 3 3 250 1274 1029309690 1029309691 -6 1274 1029309690 1029309691 4.5 4.5 9624464864560415994 1973-06-28 1970-01-02 03:21:14 2

View File

@ -0,0 +1,23 @@
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<password></password>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default>
</default>
</quotas>
</yandex>

View File

@ -0,0 +1,265 @@
import os
import difflib
files = ['key_simple.tsv', 'key_complex_integers.tsv', 'key_complex_mixed.tsv']
types = [
'UInt8', 'UInt16', 'UInt32', 'UInt64',
'Int8', 'Int16', 'Int32', 'Int64',
'Float32', 'Float64',
'String',
'Date', 'DateTime'
]
explicit_defaults = [
'42', '42', '42', '42',
'-42', '-42', '-42', '-42',
'1.5', '1.6',
"'explicit-default'",
"'2015-01-01'", "'2015-01-01 00:00:00'"
]
implicit_defaults = [
'1', '1', '1', '1',
'-1', '-1', '-1', '-1',
'2.71828', '2.71828',
'implicit-default',
'2015-11-25', '2015-11-25 00:00:00'
]
def generate_structure():
# [ name, key_type, has_parent ]
return [
# Simple key dictionaries
['clickhouse_flat', 0, True],
['clickhouse_hashed', 0, True],
['clickhouse_cache', 0, True],
# Complex key dictionaries with (UInt8, UInt8) key
['clickhouse_complex_integers_key_hashed', 1, False],
['clickhouse_complex_integers_key_cache', 1, False],
# Complex key dictionaries with (String, UInt8) key
['clickhouse_complex_mixed_key_hashed', 2, False],
['clickhouse_complex_mixed_key_cache', 2, False],
]
def generate_dictionaries(path, structure):
dictionary_skeleton = '''
<dictionaries>
<dictionary>
<name>{name}</name>
<source>
{source}
</source>
<lifetime>
<min>0</min>
<max>0</max>
</lifetime>
<layout>
{layout}
</layout>
<structure>
{key}
%s
{parent}
</structure>
</dictionary>
</dictionaries>'''
attribute_skeleton = '''
<attribute>
<name>%s_</name>
<type>%s</type>
<null_value>%s</null_value>
</attribute>
'''
dictionary_skeleton = \
dictionary_skeleton % reduce(lambda xml, (type, default): xml + attribute_skeleton % (type, type, default),
zip(types, implicit_defaults), '')
source_clickhouse = '''
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>test</db>
<table>dictionary_source</table>
</clickhouse>
'''
layout_flat = '<flat />'
layout_hashed = '<hashed />'
layout_cache = '<cache><size_in_cells>128</size_in_cells></cache>'
layout_complex_key_hashed = '<complex_key_hashed />'
layout_complex_key_cache = '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>'
key_simple = '''
<id>
<name>id</name>
</id>
'''
key_complex_integers = '''
<key>
<attribute>
<name>key0</name>
<type>UInt8</type>
</attribute>
<attribute>
<name>key1</name>
<type>UInt8</type>
</attribute>
</key>
'''
key_complex_mixed = '''
<key>
<attribute>
<name>key0_str</name>
<type>String</type>
</attribute>
<attribute>
<name>key1</name>
<type>UInt8</type>
</attribute>
</key>
'''
keys = [key_simple, key_complex_integers, key_complex_mixed]
parent_attribute = '''
<attribute>
<name>Parent</name>
<type>UInt64</type>
<hierarchical>true</hierarchical>
<null_value>0</null_value>
</attribute>
'''
sources_and_layouts = [
# Simple key dictionaries
[source_clickhouse, layout_flat],
[source_clickhouse, layout_hashed],
[source_clickhouse, layout_cache],
# Complex key dictionaries with (UInt8, UInt8) key
[source_clickhouse, layout_complex_key_hashed],
[source_clickhouse, layout_complex_key_cache],
# Complex key dictionaries with (String, UInt8) key
[source_clickhouse, layout_complex_key_hashed],
[source_clickhouse, layout_complex_key_cache],
]
file_names = []
for (name, key_idx, has_parent), (source, layout) in zip(structure, sources_and_layouts):
filename = os.path.join(path, 'dictionary_%s.xml' % name)
file_names.append(filename)
with open(filename, 'w') as file:
dictionary_xml = dictionary_skeleton.format(
key=keys[key_idx], parent=parent_attribute if has_parent else '', **locals())
file.write(dictionary_xml)
return file_names
class DictionaryTestTable:
def __init__(self, source_file_name):
self.structure = '''id UInt64, key0 UInt8, key0_str String, key1 UInt8,
UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,
Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,
Float32_ Float32, Float64_ Float64,
String_ String,
Date_ Date, DateTime_ DateTime, Parent UInt64'''
self.names_and_types = map(str.split, self.structure.split(','))
self.keys_names_and_types = self.names_and_types[:4]
self.values_names_and_types = self.names_and_types[4:]
self.source_file_name = source_file_name
self.rows = None
def create_clickhouse_source(self, instance):
query = '''
create database if not exists test;
drop table if exists test.dictionary_source;
create table test.dictionary_source (%s) engine=Log; insert into test.dictionary_source values %s ;
'''
types = tuple(pair[1] for pair in self.names_and_types)
with open(self.source_file_name) as source_file:
lines = source_file.read().split('\n')
lines = tuple(filter(len, lines))
self.rows = []
def wrap_value(pair):
value, type = pair
return "'" + value + "'" if type in ('String', 'Date', 'DateTime') else value
def make_tuple(line):
row = tuple(line.split('\t'))
self.rows.append(row)
return '(' + ','.join(map(wrap_value, zip(row, types))) + ')'
values = ','.join(map(make_tuple, lines))
instance.query(query % (self.structure, values))
def get_structure_for_keys(self, keys, enable_parent=True):
structure = ','.join(name + ' ' + type for name, type in self.keys_names_and_types if name in keys)
return structure + ', ' + ','.join(name + ' ' + type for name, type in self.values_names_and_types
if enable_parent or name != 'Parent')
def _build_line_from_row(self, row, names):
return '\t'.join((value for value, (name, type) in zip(row, self.names_and_types) if name in set(names)))
def compare_rows_by_keys(self, keys, values, lines, add_not_found_rows=True):
rows = [line.rstrip('\n').split('\t') for line in lines]
diff = []
matched = []
lines_map = {self._build_line_from_row(row, keys): self._build_line_from_row(row, values) for row in self.rows}
for row in rows:
key = '\t'.join(row[:len(keys)])
value = '\t'.join(row[len(keys):])
if key in lines_map.keys():
pattern_value = lines_map[key]
del lines_map[key]
if not value == pattern_value:
diff.append((key + '\t' + value, key + '\t' + pattern_value))
else:
matched.append((key + '\t' + value, key + '\t' + pattern_value))
else:
diff.append((key + '\t' + value, ''))
if add_not_found_rows:
for key, value in lines_map.items():
diff.append(('', key + '\t' + value))
if not diff:
return None
diff += matched
left_lines = tuple(pair[0] for pair in diff)
right_lines = tuple(pair[1] for pair in diff)
return left_lines, right_lines
def compare_by_keys(self, keys, lines, with_parent_column=True, add_not_found_rows=True):
values = [name for name, type in self.values_names_and_types if with_parent_column or name != 'Parent']
return self.compare_rows_by_keys(keys, values, lines, add_not_found_rows)
def process_diff(self, diff):
if not diff:
return ''
left_lines, right_lines = diff
args = {'fromfile': 'received', 'tofile': 'expected', 'lineterm': ''}
return '\n'.join(tuple(difflib.context_diff(left_lines, right_lines, **args))[:])

View File

@ -0,0 +1,129 @@
import time
import datetime
import pytest
import os
import sys
import difflib
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
from generate_dictionaries import generate_structure, generate_dictionaries, DictionaryTestTable
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = None
instance = None
test_table = None
def setup_module(module):
global cluster
global instance
global test_table
structure = generate_structure()
dictionary_files = generate_dictionaries(os.path.join(SCRIPT_DIR, 'configs/dictionaries'), structure)
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
instance = cluster.add_instance('instance', dictionary_files)
test_table = DictionaryTestTable(os.path.join(SCRIPT_DIR, 'configs/dictionaries/source.tsv'))
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
test_table.create_clickhouse_source(instance)
for line in TSV(instance.query('select name from system.dictionaries')).lines:
print line,
yield cluster
finally:
cluster.shutdown()
@pytest.fixture(params=[
# name, keys, use_parent
('clickhouse_cache', ('id',), True),
('clickhouse_hashed', ('id',), True),
('clickhouse_flat', ('id',), True),
('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False),
('clickhouse_complex_integers_key_cache', ('key0', 'key1'), False),
('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False),
('clickhouse_complex_mixed_key_cache', ('key0_str', 'key1'), False)
],
ids=['clickhouse_cache', 'clickhouse_hashed', 'clickhouse_flat',
'clickhouse_complex_integers_key_hashed', 'clickhouse_complex_integers_key_cache',
'clickhouse_complex_mixed_key_hashed', 'clickhouse_complex_mixed_key_cache']
)
def dictionary_structure(started_cluster, request):
return request.param
def test_select_all(dictionary_structure):
name, keys, use_parent = dictionary_structure
query = instance.query
structure = test_table.get_structure_for_keys(keys, use_parent)
query('''
DROP TABLE IF EXISTS test.{0}
'''.format(name))
create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure)
result = TSV(query(create_query))
# query("select dictGetUInt8('clickhouse_cache', 'UInt8_', toUInt64(0))")
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=False)
print test_table.process_diff(diff)
assert not diff
@pytest.fixture(params=[
# name, keys, use_parent
('clickhouse_cache', ('id',), True),
('clickhouse_complex_integers_key_cache', ('key0', 'key1'), False),
('clickhouse_complex_mixed_key_cache', ('key0_str', 'key1'), False)
],
ids=['clickhouse_cache', 'clickhouse_complex_integers_key_cache', 'clickhouse_complex_mixed_key_cache']
)
def cached_dictionary_structure(started_cluster, request):
return request.param
def test_select_from_cached(cached_dictionary_structure):
name, keys, use_parent = cached_dictionary_structure
query = instance.query
structure = test_table.get_structure_for_keys(keys, use_parent)
query('''
DROP TABLE IF EXISTS test.{0}
'''.format(name))
create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure)
TSV(query(create_query))
for i in range(4):
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=False)
print test_table.process_diff(diff)
assert not diff
key = []
for key_name in keys:
if key_name.endswith('str'):
key.append("'" + str(i) + "'")
else:
key.append(str(i))
if len(key) == 1:
key = 'toUInt64(' + str(i) + ')'
else:
key = str('(' + ','.join(key) + ')')
query("select dictGetUInt8('{0}', 'UInt8_', {1})".format(name, key))
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True)
print test_table.process_diff(diff)
assert not diff