mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-24 02:30:51 +00:00
Merge pull request #1702 from yandex/CLICKHOUSE-3521
Dictionary(range_hashed) fix
This commit is contained in:
commit
1d36d61f8b
@ -54,7 +54,7 @@ Block createSampleBlock(const DictionaryStructure & dict_struct)
|
|||||||
|
|
||||||
if (dict_struct.id)
|
if (dict_struct.id)
|
||||||
block.insert(ColumnWithTypeAndName{
|
block.insert(ColumnWithTypeAndName{
|
||||||
ColumnUInt64::create(1), std::make_shared<DataTypeUInt64>(), dict_struct.id->name});
|
ColumnUInt64::create(1, 0), std::make_shared<DataTypeUInt64>(), dict_struct.id->name});
|
||||||
|
|
||||||
if (dict_struct.key)
|
if (dict_struct.key)
|
||||||
{
|
{
|
||||||
@ -70,7 +70,7 @@ Block createSampleBlock(const DictionaryStructure & dict_struct)
|
|||||||
if (dict_struct.range_min)
|
if (dict_struct.range_min)
|
||||||
for (const auto & attribute : { dict_struct.range_min, dict_struct.range_max })
|
for (const auto & attribute : { dict_struct.range_min, dict_struct.range_max })
|
||||||
block.insert(ColumnWithTypeAndName{
|
block.insert(ColumnWithTypeAndName{
|
||||||
ColumnUInt16::create(1), std::make_shared<DataTypeDate>(), attribute->name});
|
ColumnUInt16::create(1, 0), std::make_shared<DataTypeDate>(), attribute->name});
|
||||||
|
|
||||||
for (const auto & attribute : dict_struct.attributes)
|
for (const auto & attribute : dict_struct.attributes)
|
||||||
{
|
{
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
||||||
#include <Dictionaries/DictionaryStructure.h>
|
#include <Dictionaries/DictionaryStructure.h>
|
||||||
#include <Dictionaries/IDictionary.h>
|
#include <Dictionaries/IDictionary.h>
|
||||||
|
#include <Dictionaries/RangeHashedDictionary.h>
|
||||||
#include <ext/range.h>
|
#include <ext/range.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -57,6 +58,9 @@ private:
|
|||||||
Block fillBlock(const PaddedPODArray<Key> & ids,
|
Block fillBlock(const PaddedPODArray<Key> & ids,
|
||||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||||
|
|
||||||
|
PaddedPODArray<UInt16> makeDateKey(
|
||||||
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||||
|
|
||||||
DictionatyPtr dictionary;
|
DictionatyPtr dictionary;
|
||||||
Names column_names;
|
Names column_names;
|
||||||
PaddedPODArray<Key> ids;
|
PaddedPODArray<Key> ids;
|
||||||
@ -88,8 +92,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t star
|
|||||||
for (auto idx : ext::range(start, start + length))
|
for (auto idx : ext::range(start, start + length))
|
||||||
{
|
{
|
||||||
block_ids.push_back(ids[idx]);
|
block_ids.push_back(ids[idx]);
|
||||||
block_start_dates.push_back(block_start_dates[idx]);
|
block_start_dates.push_back(start_dates[idx]);
|
||||||
block_end_dates.push_back(block_end_dates[idx]);
|
block_end_dates.push_back(end_dates[idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||||
@ -144,6 +148,23 @@ void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename DictionaryType, typename Key>
|
||||||
|
PaddedPODArray<UInt16> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
|
||||||
|
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
|
||||||
|
{
|
||||||
|
PaddedPODArray<UInt16> key(start_dates.size());
|
||||||
|
for (size_t i = 0; i < key.size(); ++i)
|
||||||
|
{
|
||||||
|
if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i]))
|
||||||
|
key[i] = start_dates[i];
|
||||||
|
else
|
||||||
|
key[i] = end_dates[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename DictionaryType, typename Key>
|
template <typename DictionaryType, typename Key>
|
||||||
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||||
const PaddedPODArray<Key>& ids,
|
const PaddedPODArray<Key>& ids,
|
||||||
@ -158,6 +179,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
|||||||
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
|
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
|
||||||
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
|
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
|
||||||
|
|
||||||
|
auto date_key = makeDateKey(start_dates, end_dates);
|
||||||
|
|
||||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||||
{
|
{
|
||||||
const DictionaryAttribute& attribute = structure.attributes[idx];
|
const DictionaryAttribute& attribute = structure.attributes[idx];
|
||||||
@ -165,7 +188,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
|||||||
{
|
{
|
||||||
ColumnPtr column;
|
ColumnPtr column;
|
||||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
||||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
|
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary)
|
||||||
switch (attribute.underlying_type)
|
switch (attribute.underlying_type)
|
||||||
{
|
{
|
||||||
case AttributeUnderlyingType::UInt8:
|
case AttributeUnderlyingType::UInt8:
|
||||||
@ -202,7 +225,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
|||||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||||
break;
|
break;
|
||||||
case AttributeUnderlyingType::String:
|
case AttributeUnderlyingType::String:
|
||||||
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
|
column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -391,10 +391,11 @@ void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPOD
|
|||||||
start_dates.reserve(attr.size());
|
start_dates.reserve(attr.size());
|
||||||
end_dates.reserve(attr.size());
|
end_dates.reserve(attr.size());
|
||||||
|
|
||||||
for (const auto & key : attr) {
|
for (const auto & key : attr)
|
||||||
ids.push_back(key.first);
|
{
|
||||||
for (const auto & value : key.second)
|
for (const auto & value : key.second)
|
||||||
{
|
{
|
||||||
|
ids.push_back(key.first);
|
||||||
start_dates.push_back(value.range.first);
|
start_dates.push_back(value.range.first);
|
||||||
end_dates.push_back(value.range.second);
|
end_dates.push_back(value.range.second);
|
||||||
}
|
}
|
||||||
|
@ -82,11 +82,12 @@ public:
|
|||||||
|
|
||||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||||
|
|
||||||
private:
|
|
||||||
struct Range : std::pair<UInt16, UInt16>
|
struct Range : std::pair<UInt16, UInt16>
|
||||||
{
|
{
|
||||||
using std::pair<UInt16, UInt16>::pair;
|
using std::pair<UInt16, UInt16>::pair;
|
||||||
|
|
||||||
|
static bool isCorrectDate(const UInt16 date) { return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; }
|
||||||
|
|
||||||
bool contains(const UInt16 date) const
|
bool contains(const UInt16 date) const
|
||||||
{
|
{
|
||||||
const auto & left = first;
|
const auto & left = first;
|
||||||
@ -95,8 +96,8 @@ private:
|
|||||||
if (left <= date && date <= right)
|
if (left <= date && date <= right)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
const auto has_left_bound = 0 < left && left <= DATE_LUT_MAX_DAY_NUM;
|
const auto has_left_bound = isCorrectDate(left);
|
||||||
const auto has_right_bound = 0 < right && right <= DATE_LUT_MAX_DAY_NUM;
|
const auto has_right_bound = isCorrectDate(right);
|
||||||
|
|
||||||
if ((!has_left_bound || left <= date) && (!has_right_bound || date <= right))
|
if ((!has_left_bound || left <= date) && (!has_right_bound || date <= right))
|
||||||
return true;
|
return true;
|
||||||
@ -105,6 +106,7 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct Value final
|
struct Value final
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <Parsers/ASTCreateQuery.h>
|
#include <Parsers/ASTCreateQuery.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <DataTypes/DataTypeDate.h>
|
||||||
#include <Dictionaries/IDictionarySource.h>
|
#include <Dictionaries/IDictionarySource.h>
|
||||||
#include <Dictionaries/DictionaryStructure.h>
|
#include <Dictionaries/DictionaryStructure.h>
|
||||||
#include <Dictionaries/CacheDictionary.h>
|
#include <Dictionaries/CacheDictionary.h>
|
||||||
@ -87,9 +88,9 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure
|
|||||||
if (dictionary_structure.id)
|
if (dictionary_structure.id)
|
||||||
dictionary_names_and_types.emplace_back(dictionary_structure.id->name, std::make_shared<DataTypeUInt64>());
|
dictionary_names_and_types.emplace_back(dictionary_structure.id->name, std::make_shared<DataTypeUInt64>());
|
||||||
if (dictionary_structure.range_min)
|
if (dictionary_structure.range_min)
|
||||||
dictionary_names_and_types.emplace_back(dictionary_structure.range_min->name, std::make_shared<DataTypeUInt16>());
|
dictionary_names_and_types.emplace_back(dictionary_structure.range_min->name, std::make_shared<DataTypeDate>());
|
||||||
if (dictionary_structure.range_max)
|
if (dictionary_structure.range_max)
|
||||||
dictionary_names_and_types.emplace_back(dictionary_structure.range_max->name, std::make_shared<DataTypeUInt16>());
|
dictionary_names_and_types.emplace_back(dictionary_structure.range_max->name, std::make_shared<DataTypeDate>());
|
||||||
if (dictionary_structure.key)
|
if (dictionary_structure.key)
|
||||||
for (const auto & attribute : *dictionary_structure.key)
|
for (const auto & attribute : *dictionary_structure.key)
|
||||||
dictionary_names_and_types.emplace_back(attribute.name, attribute.type);
|
dictionary_names_and_types.emplace_back(attribute.name, attribute.type);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
0 0 0 0 50 13874 980694578 980694579 50 13874 980694578 980694579 0 0 4761183170873013810 2007-12-27 1970-01-02 06:51:14 0
|
0 0 0 0 0000-00-00 0000-00-00 50 13874 980694578 980694579 50 13874 980694578 980694579 0 0 4761183170873013810 2007-12-27 1970-01-02 06:51:14 0
|
||||||
1 1 1 1 48 57392 4083802160 4083802161 48 -8144 -211165136 -211165135 1.5 1.5 10577349846663553072 2037-06-02 1970-01-02 09:50:24 0
|
1 1 1 1 1970-01-02 0000-00-00 48 57392 4083802160 4083802161 48 -8144 -211165136 -211165135 1.5 1.5 10577349846663553072 2037-06-02 1970-01-02 09:50:24 0
|
||||||
2 2 2 2 69 35909 1447922757 1447922758 69 -29627 1447922757 1447922758 3 3 18198135717204167749 1978-08-08 1970-01-02 03:52:21 1
|
2 2 2 2 0000-00-00 1970-01-02 69 35909 1447922757 1447922758 69 -29627 1447922757 1447922758 3 3 18198135717204167749 1978-08-08 1970-01-02 03:52:21 1
|
||||||
3 3 3 3 250 1274 1029309690 1029309691 -6 1274 1029309690 1029309691 4.5 4.5 9624464864560415994 1973-06-28 1970-01-02 03:21:14 2
|
3 3 3 3 1990-01-03 1990-01-03 250 1274 1029309690 1029309691 -6 1274 1029309690 1029309691 4.5 4.5 9624464864560415994 1973-06-28 1970-01-02 03:21:14 2
|
|
@ -43,6 +43,9 @@ def generate_structure():
|
|||||||
# Complex key dictionaries with (String, UInt8) key
|
# Complex key dictionaries with (String, UInt8) key
|
||||||
['clickhouse_complex_mixed_key_hashed', 2, False],
|
['clickhouse_complex_mixed_key_hashed', 2, False],
|
||||||
['clickhouse_complex_mixed_key_cache', 2, False],
|
['clickhouse_complex_mixed_key_cache', 2, False],
|
||||||
|
|
||||||
|
# Range hashed dictionary
|
||||||
|
['clickhouse_range_hashed', 3, False],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -102,6 +105,7 @@ def generate_dictionaries(path, structure):
|
|||||||
layout_cache = '<cache><size_in_cells>128</size_in_cells></cache>'
|
layout_cache = '<cache><size_in_cells>128</size_in_cells></cache>'
|
||||||
layout_complex_key_hashed = '<complex_key_hashed />'
|
layout_complex_key_hashed = '<complex_key_hashed />'
|
||||||
layout_complex_key_cache = '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>'
|
layout_complex_key_cache = '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>'
|
||||||
|
layout_range_hashed = '<range_hashed />'
|
||||||
|
|
||||||
key_simple = '''
|
key_simple = '''
|
||||||
<id>
|
<id>
|
||||||
@ -135,7 +139,19 @@ def generate_dictionaries(path, structure):
|
|||||||
</key>
|
</key>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
keys = [key_simple, key_complex_integers, key_complex_mixed]
|
key_range_hashed = '''
|
||||||
|
<id>
|
||||||
|
<name>id</name>
|
||||||
|
</id>
|
||||||
|
<range_min>
|
||||||
|
<name>StartDate</name>
|
||||||
|
</range_min>
|
||||||
|
<range_max>
|
||||||
|
<name>EndDate</name>
|
||||||
|
</range_max>
|
||||||
|
'''
|
||||||
|
|
||||||
|
keys = [key_simple, key_complex_integers, key_complex_mixed, key_range_hashed]
|
||||||
|
|
||||||
parent_attribute = '''
|
parent_attribute = '''
|
||||||
<attribute>
|
<attribute>
|
||||||
@ -158,6 +174,9 @@ def generate_dictionaries(path, structure):
|
|||||||
# Complex key dictionaries with (String, UInt8) key
|
# Complex key dictionaries with (String, UInt8) key
|
||||||
[source_clickhouse, layout_complex_key_hashed],
|
[source_clickhouse, layout_complex_key_hashed],
|
||||||
[source_clickhouse, layout_complex_key_cache],
|
[source_clickhouse, layout_complex_key_cache],
|
||||||
|
|
||||||
|
# Range hashed dictionary
|
||||||
|
[source_clickhouse, layout_range_hashed],
|
||||||
]
|
]
|
||||||
|
|
||||||
file_names = []
|
file_names = []
|
||||||
@ -176,6 +195,7 @@ def generate_dictionaries(path, structure):
|
|||||||
class DictionaryTestTable:
|
class DictionaryTestTable:
|
||||||
def __init__(self, source_file_name):
|
def __init__(self, source_file_name):
|
||||||
self.structure = '''id UInt64, key0 UInt8, key0_str String, key1 UInt8,
|
self.structure = '''id UInt64, key0 UInt8, key0_str String, key1 UInt8,
|
||||||
|
StartDate Date, EndDate Date,
|
||||||
UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,
|
UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,
|
||||||
Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,
|
Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,
|
||||||
Float32_ Float32, Float64_ Float64,
|
Float32_ Float32, Float64_ Float64,
|
||||||
@ -183,8 +203,8 @@ class DictionaryTestTable:
|
|||||||
Date_ Date, DateTime_ DateTime, Parent UInt64'''
|
Date_ Date, DateTime_ DateTime, Parent UInt64'''
|
||||||
|
|
||||||
self.names_and_types = map(str.split, self.structure.split(','))
|
self.names_and_types = map(str.split, self.structure.split(','))
|
||||||
self.keys_names_and_types = self.names_and_types[:4]
|
self.keys_names_and_types = self.names_and_types[:6]
|
||||||
self.values_names_and_types = self.names_and_types[4:]
|
self.values_names_and_types = self.names_and_types[6:]
|
||||||
self.source_file_name = source_file_name
|
self.source_file_name = source_file_name
|
||||||
self.rows = None
|
self.rows = None
|
||||||
|
|
||||||
@ -213,6 +233,7 @@ class DictionaryTestTable:
|
|||||||
return '(' + ','.join(map(wrap_value, zip(row, types))) + ')'
|
return '(' + ','.join(map(wrap_value, zip(row, types))) + ')'
|
||||||
|
|
||||||
values = ','.join(map(make_tuple, lines))
|
values = ','.join(map(make_tuple, lines))
|
||||||
|
print query % (self.structure, values)
|
||||||
instance.query(query % (self.structure, values))
|
instance.query(query % (self.structure, values))
|
||||||
|
|
||||||
def get_structure_for_keys(self, keys, enable_parent=True):
|
def get_structure_for_keys(self, keys, enable_parent=True):
|
||||||
|
@ -45,10 +45,12 @@ def started_cluster():
|
|||||||
('clickhouse_flat', ('id',), True),
|
('clickhouse_flat', ('id',), True),
|
||||||
('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False),
|
('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False),
|
||||||
('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False),
|
('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False),
|
||||||
|
('clickhouse_range_hashed', ('id', 'StartDate', 'EndDate'), False),
|
||||||
],
|
],
|
||||||
ids=['clickhouse_hashed', 'clickhouse_flat',
|
ids=['clickhouse_hashed', 'clickhouse_flat',
|
||||||
'clickhouse_complex_integers_key_hashed',
|
'clickhouse_complex_integers_key_hashed',
|
||||||
'clickhouse_complex_mixed_key_hashed']
|
'clickhouse_complex_mixed_key_hashed',
|
||||||
|
'clickhouse_range_hashed']
|
||||||
)
|
)
|
||||||
def dictionary_structure(started_cluster, request):
|
def dictionary_structure(started_cluster, request):
|
||||||
return request.param
|
return request.param
|
||||||
|
Loading…
Reference in New Issue
Block a user