2017-06-07 16:43:07 +00:00
|
|
|
import difflib
|
2020-09-16 04:26:10 +00:00
|
|
|
import os
|
2020-10-02 16:54:07 +00:00
|
|
|
from functools import reduce
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
files = ["key_simple.tsv", "key_complex_integers.tsv", "key_complex_mixed.tsv"]
|
|
|
|
|
|
|
|
types = [
|
|
|
|
"UInt8",
|
|
|
|
"UInt16",
|
|
|
|
"UInt32",
|
|
|
|
"UInt64",
|
|
|
|
"Int8",
|
|
|
|
"Int16",
|
|
|
|
"Int32",
|
|
|
|
"Int64",
|
|
|
|
"Float32",
|
|
|
|
"Float64",
|
|
|
|
"String",
|
|
|
|
"Date",
|
|
|
|
"DateTime",
|
|
|
|
]
|
|
|
|
|
|
|
|
implicit_defaults = [
|
2018-10-09 19:00:09 +00:00
|
|
|
"1",
|
|
|
|
"1",
|
|
|
|
"1",
|
2017-06-07 16:43:07 +00:00
|
|
|
"",
|
|
|
|
"-1",
|
|
|
|
"-1",
|
|
|
|
"-1",
|
|
|
|
"-1",
|
|
|
|
"2.71828",
|
|
|
|
"2.71828",
|
|
|
|
"implicit-default",
|
2018-10-09 19:00:09 +00:00
|
|
|
"2015-11-25",
|
2022-03-22 16:39:58 +00:00
|
|
|
"",
|
2017-06-07 16:43:07 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def generate_structure():
|
|
|
|
# [ name, key_type, has_parent ]
|
|
|
|
return [
|
|
|
|
# Simple key dictionaries
|
|
|
|
["clickhouse_flat", 0, True],
|
|
|
|
["clickhouse_hashed", 0, True],
|
|
|
|
["clickhouse_cache", 0, True],
|
|
|
|
# Complex key dictionaries with (UInt8, UInt8) key
|
|
|
|
["clickhouse_complex_integers_key_hashed", 1, False],
|
|
|
|
["clickhouse_complex_integers_key_cache", 1, False],
|
|
|
|
# Complex key dictionaries with (String, UInt8) key
|
|
|
|
["clickhouse_complex_mixed_key_hashed", 2, False],
|
|
|
|
["clickhouse_complex_mixed_key_cache", 2, False],
|
2017-12-25 20:53:57 +00:00
|
|
|
# Range hashed dictionary
|
|
|
|
["clickhouse_range_hashed", 3, False],
|
2017-06-07 16:43:07 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def generate_dictionaries(path, structure):
|
|
|
|
dictionary_skeleton = """
|
2021-09-25 04:08:34 +00:00
|
|
|
<clickhouse>
|
2019-07-25 20:24:58 +00:00
|
|
|
<dictionary>
|
|
|
|
<name>{name}</name>
|
2019-07-25 20:11:26 +00:00
|
|
|
|
2019-07-25 20:24:58 +00:00
|
|
|
<source>
|
|
|
|
{source}
|
|
|
|
</source>
|
2019-07-25 20:11:26 +00:00
|
|
|
|
2019-07-25 20:24:58 +00:00
|
|
|
<lifetime>
|
|
|
|
<min>0</min>
|
|
|
|
<max>0</max>
|
|
|
|
</lifetime>
|
2019-07-25 20:11:26 +00:00
|
|
|
|
2019-07-25 20:24:58 +00:00
|
|
|
<layout>
|
|
|
|
{layout}
|
|
|
|
</layout>
|
2019-07-25 20:11:26 +00:00
|
|
|
|
2019-07-25 20:24:58 +00:00
|
|
|
<structure>
|
|
|
|
{key}
|
2019-07-25 20:11:26 +00:00
|
|
|
|
2019-07-25 20:24:58 +00:00
|
|
|
%s
|
2019-07-25 20:11:26 +00:00
|
|
|
|
2019-07-25 20:24:58 +00:00
|
|
|
{parent}
|
|
|
|
</structure>
|
|
|
|
</dictionary>
|
2021-09-25 04:08:34 +00:00
|
|
|
</clickhouse>"""
|
2017-06-07 16:43:07 +00:00
|
|
|
attribute_skeleton = """
|
|
|
|
<attribute>
|
|
|
|
<name>%s_</name>
|
|
|
|
<type>%s</type>
|
|
|
|
<null_value>%s</null_value>
|
|
|
|
</attribute>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-06-07 16:43:07 +00:00
|
|
|
|
2020-10-02 16:54:07 +00:00
|
|
|
dictionary_skeleton = dictionary_skeleton % reduce(
|
|
|
|
lambda xml, type_default: xml
|
|
|
|
+ attribute_skeleton % (type_default[0], type_default[0], type_default[1]),
|
|
|
|
list(zip(types, implicit_defaults)),
|
2022-03-22 16:39:58 +00:00
|
|
|
"",
|
2020-10-02 16:54:07 +00:00
|
|
|
)
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
source_clickhouse = """
|
|
|
|
<clickhouse>
|
|
|
|
<host>localhost</host>
|
|
|
|
<port>9000</port>
|
|
|
|
<user>default</user>
|
|
|
|
<password></password>
|
|
|
|
<db>test</db>
|
|
|
|
<table>dictionary_source</table>
|
|
|
|
</clickhouse>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
|
|
|
|
2017-06-07 16:43:07 +00:00
|
|
|
layout_flat = "<flat />"
|
|
|
|
layout_hashed = "<hashed />"
|
|
|
|
layout_cache = "<cache><size_in_cells>128</size_in_cells></cache>"
|
|
|
|
layout_complex_key_hashed = "<complex_key_hashed />"
|
|
|
|
layout_complex_key_cache = (
|
|
|
|
"<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>"
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
2017-12-25 20:53:57 +00:00
|
|
|
layout_range_hashed = "<range_hashed />"
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2017-06-07 16:43:07 +00:00
|
|
|
key_simple = """
|
|
|
|
<id>
|
|
|
|
<name>id</name>
|
|
|
|
</id>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-06-07 16:43:07 +00:00
|
|
|
key_complex_integers = """
|
|
|
|
<key>
|
|
|
|
<attribute>
|
|
|
|
<name>key0</name>
|
|
|
|
<type>UInt8</type>
|
|
|
|
</attribute>
|
|
|
|
|
|
|
|
<attribute>
|
|
|
|
<name>key1</name>
|
|
|
|
<type>UInt8</type>
|
|
|
|
</attribute>
|
|
|
|
</key>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-06-07 16:43:07 +00:00
|
|
|
key_complex_mixed = """
|
|
|
|
<key>
|
|
|
|
<attribute>
|
|
|
|
<name>key0_str</name>
|
|
|
|
<type>String</type>
|
|
|
|
</attribute>
|
|
|
|
|
|
|
|
<attribute>
|
|
|
|
<name>key1</name>
|
|
|
|
<type>UInt8</type>
|
|
|
|
</attribute>
|
|
|
|
</key>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-06-07 16:43:07 +00:00
|
|
|
|
2017-12-25 20:53:57 +00:00
|
|
|
key_range_hashed = """
|
|
|
|
<id>
|
|
|
|
<name>id</name>
|
|
|
|
</id>
|
|
|
|
<range_min>
|
|
|
|
<name>StartDate</name>
|
|
|
|
</range_min>
|
|
|
|
<range_max>
|
|
|
|
<name>EndDate</name>
|
|
|
|
</range_max>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-12-25 20:53:57 +00:00
|
|
|
|
|
|
|
keys = [key_simple, key_complex_integers, key_complex_mixed, key_range_hashed]
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
parent_attribute = """
|
|
|
|
<attribute>
|
|
|
|
<name>Parent</name>
|
|
|
|
<type>UInt64</type>
|
|
|
|
<hierarchical>true</hierarchical>
|
|
|
|
<null_value>0</null_value>
|
|
|
|
</attribute>
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-06-07 16:43:07 +00:00
|
|
|
sources_and_layouts = [
|
|
|
|
# Simple key dictionaries
|
|
|
|
[source_clickhouse, layout_flat],
|
|
|
|
[source_clickhouse, layout_hashed],
|
|
|
|
[source_clickhouse, layout_cache],
|
|
|
|
# Complex key dictionaries with (UInt8, UInt8) key
|
|
|
|
[source_clickhouse, layout_complex_key_hashed],
|
|
|
|
[source_clickhouse, layout_complex_key_cache],
|
|
|
|
# Complex key dictionaries with (String, UInt8) key
|
|
|
|
[source_clickhouse, layout_complex_key_hashed],
|
|
|
|
[source_clickhouse, layout_complex_key_cache],
|
2017-12-25 20:53:57 +00:00
|
|
|
# Range hashed dictionary
|
|
|
|
[source_clickhouse, layout_range_hashed],
|
2017-06-07 16:43:07 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
file_names = []
|
|
|
|
|
2019-04-11 22:37:42 +00:00
|
|
|
# Generate dictionaries.
|
2017-06-07 16:43:07 +00:00
|
|
|
for (name, key_idx, has_parent), (source, layout) in zip(
|
|
|
|
structure, sources_and_layouts
|
|
|
|
):
|
|
|
|
filename = os.path.join(path, "dictionary_%s.xml" % name)
|
|
|
|
file_names.append(filename)
|
|
|
|
with open(filename, "w") as file:
|
|
|
|
dictionary_xml = dictionary_skeleton.format(
|
|
|
|
key=keys[key_idx],
|
|
|
|
parent=parent_attribute if has_parent else "",
|
|
|
|
**locals()
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
2017-06-07 16:43:07 +00:00
|
|
|
file.write(dictionary_xml)
|
|
|
|
|
|
|
|
return file_names
|
|
|
|
|
|
|
|
|
|
|
|
class DictionaryTestTable:
|
|
|
|
def __init__(self, source_file_name):
|
|
|
|
self.structure = """id UInt64, key0 UInt8, key0_str String, key1 UInt8,
|
2017-12-25 20:53:57 +00:00
|
|
|
StartDate Date, EndDate Date,
|
2017-06-07 16:43:07 +00:00
|
|
|
UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,
|
|
|
|
Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,
|
|
|
|
Float32_ Float32, Float64_ Float64,
|
|
|
|
String_ String,
|
|
|
|
Date_ Date, DateTime_ DateTime, Parent UInt64"""
|
|
|
|
|
2020-10-02 16:54:07 +00:00
|
|
|
self.names_and_types = list(map(str.split, self.structure.split(",")))
|
2017-12-25 20:53:57 +00:00
|
|
|
self.keys_names_and_types = self.names_and_types[:6]
|
|
|
|
self.values_names_and_types = self.names_and_types[6:]
|
2017-06-07 16:43:07 +00:00
|
|
|
self.source_file_name = source_file_name
|
|
|
|
self.rows = None
|
|
|
|
|
|
|
|
def create_clickhouse_source(self, instance):
|
|
|
|
query = """
|
|
|
|
create database if not exists test;
|
|
|
|
drop table if exists test.dictionary_source;
|
|
|
|
create table test.dictionary_source (%s) engine=Log; insert into test.dictionary_source values %s ;
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
types = tuple(pair[1] for pair in self.names_and_types)
|
|
|
|
|
|
|
|
with open(self.source_file_name) as source_file:
|
|
|
|
lines = source_file.read().split("\n")
|
|
|
|
lines = tuple(filter(len, lines))
|
|
|
|
|
|
|
|
self.rows = []
|
|
|
|
|
|
|
|
def wrap_value(pair):
|
|
|
|
value, type = pair
|
|
|
|
return (
|
|
|
|
"'" + value + "'" if type in ("String", "Date", "DateTime") else value
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
def make_tuple(line):
|
|
|
|
row = tuple(line.split("\t"))
|
|
|
|
self.rows.append(row)
|
2020-10-02 16:54:07 +00:00
|
|
|
return "(" + ",".join(map(wrap_value, list(zip(row, types)))) + ")"
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
values = ",".join(map(make_tuple, lines))
|
2020-10-02 16:54:07 +00:00
|
|
|
print(query % (self.structure, values))
|
2017-06-07 16:43:07 +00:00
|
|
|
instance.query(query % (self.structure, values))
|
|
|
|
|
|
|
|
def get_structure_for_keys(self, keys, enable_parent=True):
|
|
|
|
structure = ",".join(
|
|
|
|
name + " " + type
|
|
|
|
for name, type in self.keys_names_and_types
|
|
|
|
if name in keys
|
|
|
|
)
|
|
|
|
return (
|
|
|
|
structure
|
|
|
|
+ ", "
|
|
|
|
+ ",".join(
|
|
|
|
name + " " + type
|
|
|
|
for name, type in self.values_names_and_types
|
|
|
|
if enable_parent or name != "Parent"
|
|
|
|
)
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
def _build_line_from_row(self, row, names):
|
|
|
|
return "\t".join(
|
2022-03-22 16:39:58 +00:00
|
|
|
(
|
2017-06-07 16:43:07 +00:00
|
|
|
value
|
|
|
|
for value, (name, type) in zip(row, self.names_and_types)
|
|
|
|
if name in set(names)
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
)
|
2017-06-07 16:43:07 +00:00
|
|
|
|
|
|
|
def compare_rows_by_keys(self, keys, values, lines, add_not_found_rows=True):
|
|
|
|
rows = [line.rstrip("\n").split("\t") for line in lines]
|
|
|
|
diff = []
|
|
|
|
matched = []
|
|
|
|
lines_map = {
|
|
|
|
self._build_line_from_row(row, keys): self._build_line_from_row(row, values)
|
|
|
|
for row in self.rows
|
|
|
|
}
|
|
|
|
for row in rows:
|
|
|
|
key = "\t".join(row[: len(keys)])
|
|
|
|
value = "\t".join(row[len(keys) :])
|
2020-10-02 16:54:07 +00:00
|
|
|
if key in list(lines_map.keys()):
|
2017-06-07 16:43:07 +00:00
|
|
|
pattern_value = lines_map[key]
|
|
|
|
del lines_map[key]
|
|
|
|
if not value == pattern_value:
|
|
|
|
diff.append((key + "\t" + value, key + "\t" + pattern_value))
|
|
|
|
else:
|
|
|
|
matched.append((key + "\t" + value, key + "\t" + pattern_value))
|
|
|
|
else:
|
|
|
|
diff.append((key + "\t" + value, ""))
|
|
|
|
|
|
|
|
if add_not_found_rows:
|
2020-10-02 16:54:07 +00:00
|
|
|
for key, value in list(lines_map.items()):
|
2017-06-07 16:43:07 +00:00
|
|
|
diff.append(("", key + "\t" + value))
|
|
|
|
|
|
|
|
if not diff:
|
|
|
|
return None
|
|
|
|
|
|
|
|
diff += matched
|
|
|
|
left_lines = tuple(pair[0] for pair in diff)
|
|
|
|
right_lines = tuple(pair[1] for pair in diff)
|
|
|
|
return left_lines, right_lines
|
|
|
|
|
|
|
|
def compare_by_keys(
|
|
|
|
self, keys, lines, with_parent_column=True, add_not_found_rows=True
|
|
|
|
):
|
|
|
|
values = [
|
|
|
|
name
|
|
|
|
for name, type in self.values_names_and_types
|
|
|
|
if with_parent_column or name != "Parent"
|
|
|
|
]
|
|
|
|
return self.compare_rows_by_keys(keys, values, lines, add_not_found_rows)
|
|
|
|
|
|
|
|
def process_diff(self, diff):
|
|
|
|
if not diff:
|
|
|
|
return ""
|
|
|
|
left_lines, right_lines = diff
|
|
|
|
args = {"fromfile": "received", "tofile": "expected", "lineterm": ""}
|
|
|
|
return "\n".join(
|
|
|
|
tuple(difflib.context_diff(left_lines, right_lines, **args))[:]
|
|
|
|
)
|