mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
fixed engine Dictionary(range_hashed) for open interval date keys [#CLICKHOUSE-3521]
This commit is contained in:
parent
2ed66515ff
commit
0c082b134d
@ -8,6 +8,7 @@
|
||||
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <Dictionaries/RangeHashedDictionary.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
namespace DB
|
||||
@ -57,6 +58,9 @@ private:
|
||||
Block fillBlock(const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
PaddedPODArray<UInt16> makeDateKey(
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
DictionatyPtr dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
@ -88,8 +92,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t star
|
||||
for (auto idx : ext::range(start, start + length))
|
||||
{
|
||||
block_ids.push_back(ids[idx]);
|
||||
block_start_dates.push_back(block_start_dates[idx]);
|
||||
block_end_dates.push_back(block_end_dates[idx]);
|
||||
block_start_dates.push_back(start_dates[idx]);
|
||||
block_end_dates.push_back(end_dates[idx]);
|
||||
}
|
||||
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
@ -144,6 +148,23 @@ void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
PaddedPODArray<UInt16> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
|
||||
{
|
||||
PaddedPODArray<UInt16> key(start_dates.size());
|
||||
for (size_t i = 0; i < key.size(); ++i)
|
||||
{
|
||||
if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i]))
|
||||
key[i] = start_dates[i];
|
||||
else
|
||||
key[i] = end_dates[i];
|
||||
}
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename Key>
|
||||
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
const PaddedPODArray<Key>& ids,
|
||||
@ -158,6 +179,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
|
||||
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
|
||||
|
||||
auto date_key = makeDateKey(start_dates, end_dates);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute& attribute = structure.attributes[idx];
|
||||
@ -165,7 +188,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8:
|
||||
@ -202,7 +225,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::String:
|
||||
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
|
||||
column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -392,9 +392,9 @@ void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPOD
|
||||
end_dates.reserve(attr.size());
|
||||
|
||||
for (const auto & key : attr) {
|
||||
ids.push_back(key.first);
|
||||
for (const auto & value : key.second)
|
||||
{
|
||||
ids.push_back(key.first);
|
||||
start_dates.push_back(value.range.first);
|
||||
end_dates.push_back(value.range.second);
|
||||
}
|
||||
|
@ -82,11 +82,12 @@ public:
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
struct Range : std::pair<UInt16, UInt16>
|
||||
{
|
||||
using std::pair<UInt16, UInt16>::pair;
|
||||
|
||||
static bool isCorrectDate(const UInt16 date) { return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; }
|
||||
|
||||
bool contains(const UInt16 date) const
|
||||
{
|
||||
const auto & left = first;
|
||||
@ -95,8 +96,8 @@ private:
|
||||
if (left <= date && date <= right)
|
||||
return true;
|
||||
|
||||
const auto has_left_bound = 0 < left && left <= DATE_LUT_MAX_DAY_NUM;
|
||||
const auto has_right_bound = 0 < right && right <= DATE_LUT_MAX_DAY_NUM;
|
||||
const auto has_left_bound = isCorrectDate(left);
|
||||
const auto has_right_bound = isCorrectDate(right);
|
||||
|
||||
if ((!has_left_bound || left <= date) && (!has_right_bound || date <= right))
|
||||
return true;
|
||||
@ -105,6 +106,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
struct Value final
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user