import difflib import os from functools import reduce files = ["key_simple.tsv", "key_complex_integers.tsv", "key_complex_mixed.tsv"] types = [ "UInt8", "UInt16", "UInt32", "UInt64", "Int8", "Int16", "Int32", "Int64", "Float32", "Float64", "String", "Date", "DateTime", ] implicit_defaults = [ "1", "1", "1", "", "-1", "-1", "-1", "-1", "2.71828", "2.71828", "implicit-default", "2015-11-25", "", ] def generate_structure(): # [ name, key_type, has_parent ] return [ # Simple key dictionaries ["clickhouse_flat", 0, True], ["clickhouse_hashed", 0, True], ["clickhouse_cache", 0, True], # Complex key dictionaries with (UInt8, UInt8) key ["clickhouse_complex_integers_key_hashed", 1, False], ["clickhouse_complex_integers_key_cache", 1, False], # Complex key dictionaries with (String, UInt8) key ["clickhouse_complex_mixed_key_hashed", 2, False], ["clickhouse_complex_mixed_key_cache", 2, False], # Range hashed dictionary ["clickhouse_range_hashed", 3, False], ] def generate_dictionaries(path, structure): dictionary_skeleton = """ {name} {source} 0 0 {layout} {key} %s {parent} """ attribute_skeleton = """ %s_ %s %s """ dictionary_skeleton = dictionary_skeleton % reduce( lambda xml, type_default: xml + attribute_skeleton % (type_default[0], type_default[0], type_default[1]), list(zip(types, implicit_defaults)), "", ) source_clickhouse = """ localhost 9000 default test dictionary_source
""" layout_flat = "" layout_hashed = "" layout_cache = "128" layout_complex_key_hashed = "" layout_complex_key_cache = ( "128" ) layout_range_hashed = "" key_simple = """ id """ key_complex_integers = """ key0 UInt8 key1 UInt8 """ key_complex_mixed = """ key0_str String key1 UInt8 """ key_range_hashed = """ id StartDate EndDate """ keys = [key_simple, key_complex_integers, key_complex_mixed, key_range_hashed] parent_attribute = """ Parent UInt64 true 0 """ sources_and_layouts = [ # Simple key dictionaries [source_clickhouse, layout_flat], [source_clickhouse, layout_hashed], [source_clickhouse, layout_cache], # Complex key dictionaries with (UInt8, UInt8) key [source_clickhouse, layout_complex_key_hashed], [source_clickhouse, layout_complex_key_cache], # Complex key dictionaries with (String, UInt8) key [source_clickhouse, layout_complex_key_hashed], [source_clickhouse, layout_complex_key_cache], # Range hashed dictionary [source_clickhouse, layout_range_hashed], ] file_names = [] # Generate dictionaries. for (name, key_idx, has_parent), (source, layout) in zip( structure, sources_and_layouts ): filename = os.path.join(path, "dictionary_%s.xml" % name) file_names.append(filename) with open(filename, "w") as file: dictionary_xml = dictionary_skeleton.format( key=keys[key_idx], parent=parent_attribute if has_parent else "", **locals() ) file.write(dictionary_xml) return file_names class DictionaryTestTable: def __init__(self, source_file_name): self.structure = """id UInt64, key0 UInt8, key0_str String, key1 UInt8, StartDate Date, EndDate Date, UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64, Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64, Float32_ Float32, Float64_ Float64, String_ String, Date_ Date, DateTime_ DateTime, Parent UInt64""" self.names_and_types = list(map(str.split, self.structure.split(","))) self.keys_names_and_types = self.names_and_types[:6] self.values_names_and_types = self.names_and_types[6:] self.source_file_name = source_file_name self.rows = None def create_clickhouse_source(self, instance): query = """ create database if not exists test; drop table if exists test.dictionary_source; create table test.dictionary_source (%s) engine=Log; insert into test.dictionary_source values %s ; """ types = tuple(pair[1] for pair in self.names_and_types) with open(self.source_file_name) as source_file: lines = source_file.read().split("\n") lines = tuple(filter(len, lines)) self.rows = [] def wrap_value(pair): value, type = pair return ( "'" + value + "'" if type in ("String", "Date", "DateTime") else value ) def make_tuple(line): row = tuple(line.split("\t")) self.rows.append(row) return "(" + ",".join(map(wrap_value, list(zip(row, types)))) + ")" values = ",".join(map(make_tuple, lines)) print(query % (self.structure, values)) instance.query(query % (self.structure, values)) def get_structure_for_keys(self, keys, enable_parent=True): structure = ",".join( name + " " + type for name, type in self.keys_names_and_types if name in keys ) return ( structure + ", " + ",".join( name + " " + type for name, type in self.values_names_and_types if enable_parent or name != "Parent" ) ) def _build_line_from_row(self, row, names): return "\t".join( ( value for value, (name, type) in zip(row, self.names_and_types) if name in set(names) ) ) def compare_rows_by_keys(self, keys, values, lines, add_not_found_rows=True): rows = [line.rstrip("\n").split("\t") for line in lines] diff = [] matched = [] lines_map = { self._build_line_from_row(row, keys): self._build_line_from_row(row, values) for row in self.rows } for row in rows: key = "\t".join(row[: len(keys)]) value = "\t".join(row[len(keys) :]) if key in list(lines_map.keys()): pattern_value = lines_map[key] del lines_map[key] if not value == pattern_value: diff.append((key + "\t" + value, key + "\t" + pattern_value)) else: matched.append((key + "\t" + value, key + "\t" + pattern_value)) else: diff.append((key + "\t" + value, "")) if add_not_found_rows: for key, value in list(lines_map.items()): diff.append(("", key + "\t" + value)) if not diff: return None diff += matched left_lines = tuple(pair[0] for pair in diff) right_lines = tuple(pair[1] for pair in diff) return left_lines, right_lines def compare_by_keys( self, keys, lines, with_parent_column=True, add_not_found_rows=True ): values = [ name for name, type in self.values_names_and_types if with_parent_column or name != "Parent" ] return self.compare_rows_by_keys(keys, values, lines, add_not_found_rows) def process_diff(self, diff): if not diff: return "" left_lines, right_lines = diff args = {"fromfile": "received", "tofile": "expected", "lineterm": ""} return "\n".join( tuple(difflib.context_diff(left_lines, right_lines, **args))[:] )