mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-24 02:30:51 +00:00
Merge pull request #1702 from yandex/CLICKHOUSE-3521
Dictionary(range_hashed) fix
This commit is contained in:
commit
1d36d61f8b
@ -54,7 +54,7 @@ Block createSampleBlock(const DictionaryStructure & dict_struct)
|
||||
|
||||
if (dict_struct.id)
|
||||
block.insert(ColumnWithTypeAndName{
|
||||
ColumnUInt64::create(1), std::make_shared<DataTypeUInt64>(), dict_struct.id->name});
|
||||
ColumnUInt64::create(1, 0), std::make_shared<DataTypeUInt64>(), dict_struct.id->name});
|
||||
|
||||
if (dict_struct.key)
|
||||
{
|
||||
@ -70,7 +70,7 @@ Block createSampleBlock(const DictionaryStructure & dict_struct)
|
||||
if (dict_struct.range_min)
|
||||
for (const auto & attribute : { dict_struct.range_min, dict_struct.range_max })
|
||||
block.insert(ColumnWithTypeAndName{
|
||||
ColumnUInt16::create(1), std::make_shared<DataTypeDate>(), attribute->name});
|
||||
ColumnUInt16::create(1, 0), std::make_shared<DataTypeDate>(), attribute->name});
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <Dictionaries/RangeHashedDictionary.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
namespace DB
|
||||
@ -57,6 +58,9 @@ private:
|
||||
Block fillBlock(const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
PaddedPODArray<UInt16> makeDateKey(
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
DictionatyPtr dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
@ -88,8 +92,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t star
|
||||
for (auto idx : ext::range(start, start + length))
|
||||
{
|
||||
block_ids.push_back(ids[idx]);
|
||||
block_start_dates.push_back(block_start_dates[idx]);
|
||||
block_end_dates.push_back(block_end_dates[idx]);
|
||||
block_start_dates.push_back(start_dates[idx]);
|
||||
block_end_dates.push_back(end_dates[idx]);
|
||||
}
|
||||
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
@ -144,6 +148,23 @@ void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
PaddedPODArray<UInt16> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
|
||||
{
|
||||
PaddedPODArray<UInt16> key(start_dates.size());
|
||||
for (size_t i = 0; i < key.size(); ++i)
|
||||
{
|
||||
if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i]))
|
||||
key[i] = start_dates[i];
|
||||
else
|
||||
key[i] = end_dates[i];
|
||||
}
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
const PaddedPODArray<Key>& ids,
|
||||
@ -158,6 +179,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
|
||||
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
|
||||
|
||||
auto date_key = makeDateKey(start_dates, end_dates);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute& attribute = structure.attributes[idx];
|
||||
@ -165,7 +188,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8:
|
||||
@ -202,7 +225,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::String:
|
||||
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
|
||||
column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -391,10 +391,11 @@ void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPOD
|
||||
start_dates.reserve(attr.size());
|
||||
end_dates.reserve(attr.size());
|
||||
|
||||
for (const auto & key : attr) {
|
||||
ids.push_back(key.first);
|
||||
for (const auto & key : attr)
|
||||
{
|
||||
for (const auto & value : key.second)
|
||||
{
|
||||
ids.push_back(key.first);
|
||||
start_dates.push_back(value.range.first);
|
||||
end_dates.push_back(value.range.second);
|
||||
}
|
||||
|
@ -82,11 +82,12 @@ public:
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
struct Range : std::pair<UInt16, UInt16>
|
||||
{
|
||||
using std::pair<UInt16, UInt16>::pair;
|
||||
|
||||
static bool isCorrectDate(const UInt16 date) { return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; }
|
||||
|
||||
bool contains(const UInt16 date) const
|
||||
{
|
||||
const auto & left = first;
|
||||
@ -95,8 +96,8 @@ private:
|
||||
if (left <= date && date <= right)
|
||||
return true;
|
||||
|
||||
const auto has_left_bound = 0 < left && left <= DATE_LUT_MAX_DAY_NUM;
|
||||
const auto has_right_bound = 0 < right && right <= DATE_LUT_MAX_DAY_NUM;
|
||||
const auto has_left_bound = isCorrectDate(left);
|
||||
const auto has_right_bound = isCorrectDate(right);
|
||||
|
||||
if ((!has_left_bound || left <= date) && (!has_right_bound || date <= right))
|
||||
return true;
|
||||
@ -105,6 +106,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
struct Value final
|
||||
{
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <sstream>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <Dictionaries/IDictionarySource.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
@ -87,9 +88,9 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure
|
||||
if (dictionary_structure.id)
|
||||
dictionary_names_and_types.emplace_back(dictionary_structure.id->name, std::make_shared<DataTypeUInt64>());
|
||||
if (dictionary_structure.range_min)
|
||||
dictionary_names_and_types.emplace_back(dictionary_structure.range_min->name, std::make_shared<DataTypeUInt16>());
|
||||
dictionary_names_and_types.emplace_back(dictionary_structure.range_min->name, std::make_shared<DataTypeDate>());
|
||||
if (dictionary_structure.range_max)
|
||||
dictionary_names_and_types.emplace_back(dictionary_structure.range_max->name, std::make_shared<DataTypeUInt16>());
|
||||
dictionary_names_and_types.emplace_back(dictionary_structure.range_max->name, std::make_shared<DataTypeDate>());
|
||||
if (dictionary_structure.key)
|
||||
for (const auto & attribute : *dictionary_structure.key)
|
||||
dictionary_names_and_types.emplace_back(attribute.name, attribute.type);
|
||||
|
@ -1,4 +1,4 @@
|
||||
0 0 0 0 50 13874 980694578 980694579 50 13874 980694578 980694579 0 0 4761183170873013810 2007-12-27 1970-01-02 06:51:14 0
|
||||
1 1 1 1 48 57392 4083802160 4083802161 48 -8144 -211165136 -211165135 1.5 1.5 10577349846663553072 2037-06-02 1970-01-02 09:50:24 0
|
||||
2 2 2 2 69 35909 1447922757 1447922758 69 -29627 1447922757 1447922758 3 3 18198135717204167749 1978-08-08 1970-01-02 03:52:21 1
|
||||
3 3 3 3 250 1274 1029309690 1029309691 -6 1274 1029309690 1029309691 4.5 4.5 9624464864560415994 1973-06-28 1970-01-02 03:21:14 2
|
||||
0 0 0 0 0000-00-00 0000-00-00 50 13874 980694578 980694579 50 13874 980694578 980694579 0 0 4761183170873013810 2007-12-27 1970-01-02 06:51:14 0
|
||||
1 1 1 1 1970-01-02 0000-00-00 48 57392 4083802160 4083802161 48 -8144 -211165136 -211165135 1.5 1.5 10577349846663553072 2037-06-02 1970-01-02 09:50:24 0
|
||||
2 2 2 2 0000-00-00 1970-01-02 69 35909 1447922757 1447922758 69 -29627 1447922757 1447922758 3 3 18198135717204167749 1978-08-08 1970-01-02 03:52:21 1
|
||||
3 3 3 3 1990-01-03 1990-01-03 250 1274 1029309690 1029309691 -6 1274 1029309690 1029309691 4.5 4.5 9624464864560415994 1973-06-28 1970-01-02 03:21:14 2
|
|
@ -43,6 +43,9 @@ def generate_structure():
|
||||
# Complex key dictionaries with (String, UInt8) key
|
||||
['clickhouse_complex_mixed_key_hashed', 2, False],
|
||||
['clickhouse_complex_mixed_key_cache', 2, False],
|
||||
|
||||
# Range hashed dictionary
|
||||
['clickhouse_range_hashed', 3, False],
|
||||
]
|
||||
|
||||
|
||||
@ -102,6 +105,7 @@ def generate_dictionaries(path, structure):
|
||||
layout_cache = '<cache><size_in_cells>128</size_in_cells></cache>'
|
||||
layout_complex_key_hashed = '<complex_key_hashed />'
|
||||
layout_complex_key_cache = '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>'
|
||||
layout_range_hashed = '<range_hashed />'
|
||||
|
||||
key_simple = '''
|
||||
<id>
|
||||
@ -135,7 +139,19 @@ def generate_dictionaries(path, structure):
|
||||
</key>
|
||||
'''
|
||||
|
||||
keys = [key_simple, key_complex_integers, key_complex_mixed]
|
||||
key_range_hashed = '''
|
||||
<id>
|
||||
<name>id</name>
|
||||
</id>
|
||||
<range_min>
|
||||
<name>StartDate</name>
|
||||
</range_min>
|
||||
<range_max>
|
||||
<name>EndDate</name>
|
||||
</range_max>
|
||||
'''
|
||||
|
||||
keys = [key_simple, key_complex_integers, key_complex_mixed, key_range_hashed]
|
||||
|
||||
parent_attribute = '''
|
||||
<attribute>
|
||||
@ -158,6 +174,9 @@ def generate_dictionaries(path, structure):
|
||||
# Complex key dictionaries with (String, UInt8) key
|
||||
[source_clickhouse, layout_complex_key_hashed],
|
||||
[source_clickhouse, layout_complex_key_cache],
|
||||
|
||||
# Range hashed dictionary
|
||||
[source_clickhouse, layout_range_hashed],
|
||||
]
|
||||
|
||||
file_names = []
|
||||
@ -176,6 +195,7 @@ def generate_dictionaries(path, structure):
|
||||
class DictionaryTestTable:
|
||||
def __init__(self, source_file_name):
|
||||
self.structure = '''id UInt64, key0 UInt8, key0_str String, key1 UInt8,
|
||||
StartDate Date, EndDate Date,
|
||||
UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,
|
||||
Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,
|
||||
Float32_ Float32, Float64_ Float64,
|
||||
@ -183,8 +203,8 @@ class DictionaryTestTable:
|
||||
Date_ Date, DateTime_ DateTime, Parent UInt64'''
|
||||
|
||||
self.names_and_types = map(str.split, self.structure.split(','))
|
||||
self.keys_names_and_types = self.names_and_types[:4]
|
||||
self.values_names_and_types = self.names_and_types[4:]
|
||||
self.keys_names_and_types = self.names_and_types[:6]
|
||||
self.values_names_and_types = self.names_and_types[6:]
|
||||
self.source_file_name = source_file_name
|
||||
self.rows = None
|
||||
|
||||
@ -213,6 +233,7 @@ class DictionaryTestTable:
|
||||
return '(' + ','.join(map(wrap_value, zip(row, types))) + ')'
|
||||
|
||||
values = ','.join(map(make_tuple, lines))
|
||||
print query % (self.structure, values)
|
||||
instance.query(query % (self.structure, values))
|
||||
|
||||
def get_structure_for_keys(self, keys, enable_parent=True):
|
||||
|
@ -45,10 +45,12 @@ def started_cluster():
|
||||
('clickhouse_flat', ('id',), True),
|
||||
('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False),
|
||||
('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False),
|
||||
('clickhouse_range_hashed', ('id', 'StartDate', 'EndDate'), False),
|
||||
],
|
||||
ids=['clickhouse_hashed', 'clickhouse_flat',
|
||||
'clickhouse_complex_integers_key_hashed',
|
||||
'clickhouse_complex_mixed_key_hashed']
|
||||
'clickhouse_complex_mixed_key_hashed',
|
||||
'clickhouse_range_hashed']
|
||||
)
|
||||
def dictionary_structure(started_cluster, request):
|
||||
return request.param
|
||||
|
Loading…
Reference in New Issue
Block a user