ClickHouse/tests/integration/helpers/dictionary.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

461 lines
15 KiB
Python
Raw Normal View History

2019-05-28 20:17:30 +00:00
# -*- coding: utf-8 -*-
2019-02-21 16:43:21 +00:00
import copy
2019-02-21 12:04:08 +00:00
class Layout(object):
LAYOUTS_STR_DICT = {
"flat": "<flat/>",
"hashed": "<hashed/>",
"cache": "<cache><size_in_cells>128</size_in_cells></cache>",
2021-05-11 13:13:26 +00:00
"ssd_cache": "<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>",
2019-02-21 12:04:08 +00:00
"complex_key_hashed": "<complex_key_hashed/>",
"complex_key_hashed_one_key": "<complex_key_hashed/>",
"complex_key_hashed_two_keys": "<complex_key_hashed/>",
2019-02-21 12:04:08 +00:00
"complex_key_cache": "<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>",
2021-05-11 13:13:26 +00:00
"complex_key_ssd_cache": "<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>",
2019-05-28 20:17:30 +00:00
"range_hashed": "<range_hashed/>",
"direct": "<direct/>",
2020-05-12 15:00:08 +00:00
"complex_key_direct": "<complex_key_direct/>",
2019-02-21 12:04:08 +00:00
}
def __init__(self, name):
self.name = name
self.is_complex = False
self.is_simple = False
self.is_ranged = False
if self.name.startswith("complex"):
2019-05-28 20:17:30 +00:00
self.layout_type = "complex"
2019-02-21 12:04:08 +00:00
self.is_complex = True
2019-05-28 20:17:30 +00:00
elif name.startswith("range"):
self.layout_type = "ranged"
2019-02-21 12:04:08 +00:00
self.is_ranged = True
else:
2019-05-28 20:17:30 +00:00
self.layout_type = "simple"
2019-02-21 12:04:08 +00:00
self.is_simple = True
def get_str(self):
return self.LAYOUTS_STR_DICT[self.name]
def get_key_block_name(self):
if self.is_complex:
return "key"
2019-05-28 20:17:30 +00:00
return "id"
2019-02-21 12:04:08 +00:00
class Row(object):
def __init__(self, fields, values):
self.data = {}
for field, value in zip(fields, values):
self.data[field.name] = value
def has_field(self, name):
return name in self.data
2019-02-21 12:04:08 +00:00
def get_value_by_name(self, name):
return self.data[name]
2019-04-17 10:11:38 +00:00
def set_value(self, name, value):
self.data[name] = value
2019-02-21 12:04:08 +00:00
class Field(object):
def __init__(
self,
name,
field_type,
is_key=False,
is_range_key=False,
default=None,
hierarchical=False,
range_hash_type=None,
default_value_for_get=None,
):
2019-02-21 12:04:08 +00:00
self.name = name
self.field_type = field_type
self.is_key = is_key
self.default = default
self.hierarchical = hierarchical
self.range_hash_type = range_hash_type
self.is_range = self.range_hash_type is not None
self.is_range_key = is_range_key
2019-02-22 10:55:12 +00:00
self.default_value_for_get = default_value_for_get
2019-02-21 12:04:08 +00:00
def get_attribute_str(self):
return """
<attribute>
<name>{name}</name>
<type>{field_type}</type>
<null_value>{default}</null_value>
<hierarchical>{hierarchical}</hierarchical>
</attribute>""".format(
name=self.name,
field_type=self.field_type,
default=self.default if self.default else "",
hierarchical="true" if self.hierarchical else "false",
)
def get_simple_index_str(self):
return "<name>{name}</name>".format(name=self.name)
def get_range_hash_str(self):
if not self.range_hash_type:
raise Exception("Field {} is not range hashed".format(self.name))
return """
<range_{type}>
<name>{name}</name>
</range_{type}>
""".format(
type=self.range_hash_type, name=self.name
)
class DictionaryStructure(object):
def __init__(self, layout, fields):
self.layout = layout
self.keys = []
self.range_key = None
self.ordinary_fields = []
self.range_fields = []
self.has_hierarchy = False
2019-02-21 12:04:08 +00:00
for field in fields:
if field.is_key:
self.keys.append(field)
elif field.is_range:
self.range_fields.append(field)
else:
self.ordinary_fields.append(field)
if field.hierarchical:
self.has_hierarchy = True
2019-02-21 12:04:08 +00:00
if field.is_range_key:
if self.range_key is not None:
raise Exception("Duplicate range key {}".format(field.name))
self.range_key = field
if not self.layout.is_complex and len(self.keys) > 1:
raise Exception(
"More than one key {} field in non complex layout {}".format(
len(self.keys), self.layout.name
)
)
2019-02-21 12:04:08 +00:00
if self.layout.is_ranged and (
not self.range_key or len(self.range_fields) != 2
):
raise Exception("Inconsistent configuration of ranged dictionary")
def get_structure_str(self):
fields_strs = []
for field in self.ordinary_fields:
fields_strs.append(field.get_attribute_str())
2019-04-15 01:34:10 +00:00
2019-02-21 12:04:08 +00:00
key_strs = []
if self.layout.is_complex:
for key_field in self.keys:
key_strs.append(key_field.get_attribute_str())
2019-05-28 20:17:30 +00:00
else: # same for simple and ranged
2019-02-21 12:04:08 +00:00
for key_field in self.keys:
key_strs.append(key_field.get_simple_index_str())
ranged_strs = []
if self.layout.is_ranged:
for range_field in self.range_fields:
ranged_strs.append(range_field.get_range_hash_str())
return """
<layout>
{layout_str}
</layout>
<structure>
<{key_block_name}>
{key_str}
</{key_block_name}>
{range_strs}
2019-02-21 16:43:21 +00:00
{attributes_str}
2019-02-21 12:04:08 +00:00
</structure>""".format(
layout_str=self.layout.get_str(),
key_block_name=self.layout.get_key_block_name(),
key_str="\n".join(key_strs),
attributes_str="\n".join(fields_strs),
range_strs="\n".join(ranged_strs),
)
2019-02-21 16:43:21 +00:00
def get_ordered_names(self):
fields_strs = []
for key_field in self.keys:
fields_strs.append(key_field.name)
for range_field in self.range_fields:
fields_strs.append(range_field.name)
for field in self.ordinary_fields:
fields_strs.append(field.name)
return fields_strs
2019-02-25 10:45:22 +00:00
def get_all_fields(self):
return self.keys + self.range_fields + self.ordinary_fields
2019-02-21 16:43:21 +00:00
2019-02-22 10:55:12 +00:00
def _get_dict_get_common_expression(
self, dict_name, field, row, or_default, with_type, has
):
2019-02-21 12:04:08 +00:00
if field in self.keys:
raise Exception(
"Trying to receive key field {} from dictionary".format(field.name)
)
2019-02-21 12:04:08 +00:00
if not self.layout.is_complex:
2019-02-22 10:55:12 +00:00
if not or_default:
key_expr = ", toUInt64({})".format(row.data[self.keys[0].name])
else:
key_expr = ", toUInt64({})".format(self.keys[0].default_value_for_get)
2019-02-21 12:04:08 +00:00
else:
key_exprs_strs = []
for key in self.keys:
2019-02-22 10:55:12 +00:00
if not or_default:
val = row.data[key.name]
else:
val = key.default_value_for_get
2019-02-21 12:04:08 +00:00
if isinstance(val, str):
val = "'" + val + "'"
key_exprs_strs.append(
"to{type}({value})".format(type=key.field_type, value=val)
)
2019-05-30 19:44:40 +00:00
key_expr = ", tuple(" + ",".join(key_exprs_strs) + ")"
2019-02-21 12:04:08 +00:00
date_expr = ""
if self.layout.is_ranged:
val = row.data[self.range_key.name]
if isinstance(val, str):
val = "'" + val + "'"
val = "to{type}({val})".format(type=self.range_key.field_type, val=val)
date_expr = ", " + val
2019-02-22 10:55:12 +00:00
if or_default:
raise Exception(
"Can create 'dictGetOrDefault' query for ranged dictionary"
)
if or_default:
or_default_expr = "OrDefault"
if field.default_value_for_get is None:
raise Exception(
"Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(
field.name
)
)
2019-02-22 10:55:12 +00:00
val = field.default_value_for_get
if isinstance(val, str):
val = "'" + val + "'"
default_value_for_get = ", to{type}({value})".format(
type=field.field_type, value=val
)
else:
or_default_expr = ""
default_value_for_get = ""
if with_type:
field_type = field.field_type
else:
field_type = ""
field_name = ", '" + field.name + "'"
if has:
what = "Has"
field_type = ""
or_default = ""
field_name = ""
date_expr = ""
def_for_get = ""
else:
what = "Get"
return "dict{what}{field_type}{or_default}('{dict_name}'{field_name}{key_expr}{date_expr}{def_for_get})".format(
what=what,
field_type=field_type,
2019-02-21 12:04:08 +00:00
dict_name=dict_name,
2019-02-22 10:55:12 +00:00
field_name=field_name,
2019-02-21 12:04:08 +00:00
key_expr=key_expr,
date_expr=date_expr,
2019-02-22 10:55:12 +00:00
or_default=or_default_expr,
def_for_get=default_value_for_get,
2019-02-21 12:04:08 +00:00
)
2019-02-22 10:55:12 +00:00
def get_get_expressions(self, dict_name, field, row):
return [
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=False, has=False
),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=True, has=False
),
]
def get_get_or_default_expressions(self, dict_name, field, row):
if not self.layout.is_ranged:
return [
self._get_dict_get_common_expression(
dict_name, field, row, or_default=True, with_type=False, has=False
),
2019-02-22 10:55:12 +00:00
self._get_dict_get_common_expression(
dict_name, field, row, or_default=True, with_type=True, has=False
),
]
return []
def get_has_expressions(self, dict_name, field, row):
if not self.layout.is_ranged:
return [
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=False, has=True
)
]
2019-02-22 10:55:12 +00:00
return []
2019-02-24 17:45:10 +00:00
def get_hierarchical_expressions(self, dict_name, row):
if self.layout.is_simple:
key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
return [
"dictGetHierarchy('{dict_name}', {key})".format(
dict_name=dict_name,
key=key_expr,
),
]
return []
def get_is_in_expressions(self, dict_name, row, parent_row):
if self.layout.is_simple:
child_key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
parent_key_expr = "toUInt64({})".format(parent_row.data[self.keys[0].name])
return [
"dictIsIn('{dict_name}', {child_key}, {parent_key})".format(
dict_name=dict_name,
child_key=child_key_expr,
parent_key=parent_key_expr,
)
2019-02-24 17:45:10 +00:00
]
return []
2019-02-21 12:04:08 +00:00
class Dictionary(object):
def __init__(
self,
name,
structure,
source,
config_path,
table_name,
fields,
min_lifetime=3,
max_lifetime=5,
):
2019-02-21 12:04:08 +00:00
self.name = name
2019-02-21 16:43:21 +00:00
self.structure = copy.deepcopy(structure)
self.source = copy.deepcopy(source)
2019-02-21 12:04:08 +00:00
self.config_path = config_path
self.table_name = table_name
2019-05-28 20:17:30 +00:00
self.fields = fields
self.min_lifetime = min_lifetime
self.max_lifetime = max_lifetime
2019-02-21 12:04:08 +00:00
def generate_config(self):
with open(self.config_path, "w") as result:
2020-05-12 15:00:08 +00:00
if "direct" not in self.structure.layout.get_str():
result.write(
"""
<clickhouse>
<dictionary>
<lifetime>
<min>{min_lifetime}</min>
<max>{max_lifetime}</max>
</lifetime>
<name>{name}</name>
{structure}
<source>
{source}
</source>
</dictionary>
</clickhouse>
""".format(
min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime,
name=self.name,
structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name),
)
)
else:
result.write(
"""
<clickhouse>
<dictionary>
<name>{name}</name>
{structure}
<source>
{source}
</source>
</dictionary>
</clickhouse>
""".format(
min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime,
name=self.name,
structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name),
)
)
2019-02-21 12:04:08 +00:00
2019-02-21 16:43:21 +00:00
def prepare_source(self, cluster):
self.source.prepare(self.structure, self.table_name, cluster)
2019-02-21 12:04:08 +00:00
def load_data(self, data):
if not self.source.prepared:
raise Exception(
"Cannot load data for dictionary {}, source is not prepared".format(
self.name
)
)
2019-02-21 12:04:08 +00:00
self.source.load_data(data, self.table_name)
2019-02-22 10:55:12 +00:00
def get_select_get_queries(self, field, row):
return [
"select {}".format(expr)
for expr in self.structure.get_get_expressions(self.name, field, row)
]
def get_select_get_or_default_queries(self, field, row):
return [
"select {}".format(expr)
for expr in self.structure.get_get_or_default_expressions(
self.name, field, row
)
]
2019-02-22 10:55:12 +00:00
def get_select_has_queries(self, field, row):
return [
"select {}".format(expr)
for expr in self.structure.get_has_expressions(self.name, field, row)
]
2019-02-21 12:04:08 +00:00
2019-02-24 17:45:10 +00:00
def get_hierarchical_queries(self, row):
return [
"select {}".format(expr)
for expr in self.structure.get_hierarchical_expressions(self.name, row)
]
def get_is_in_queries(self, row, parent_row):
return [
"select {}".format(expr)
for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)
]
2019-02-21 12:04:08 +00:00
def is_complex(self):
return self.structure.layout.is_complex
def get_fields(self):
return self.fields