fixed engine Dictionary(range_hashed) for open interval date keys [#CLICKHOUSE-3521]

This commit is contained in:
Nikolai Kochetov 2017-12-25 22:00:48 +03:00
parent 2ed66515ff
commit 0c082b134d
3 changed files with 33 additions and 8 deletions

View File

@ -8,6 +8,7 @@
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/RangeHashedDictionary.h>
#include <ext/range.h>
namespace DB
@ -57,6 +58,9 @@ private:
Block fillBlock(const PaddedPODArray<Key> & ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
PaddedPODArray<UInt16> makeDateKey(
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
DictionatyPtr dictionary;
Names column_names;
PaddedPODArray<Key> ids;
@ -88,8 +92,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t star
for (auto idx : ext::range(start, start + length))
{
block_ids.push_back(ids[idx]);
block_start_dates.push_back(block_start_dates[idx]);
block_end_dates.push_back(block_end_dates[idx]);
block_start_dates.push_back(start_dates[idx]);
block_end_dates.push_back(end_dates[idx]);
}
return fillBlock(block_ids, block_start_dates, block_end_dates);
@ -144,6 +148,23 @@ void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
}
}
template <typename DictionaryType, typename Key>
PaddedPODArray<UInt16> RangeDictionaryBlockInputStream<DictionaryType, Key>::makeDateKey(
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
{
PaddedPODArray<UInt16> key(start_dates.size());
for (size_t i = 0; i < key.size(); ++i)
{
if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i]))
key[i] = start_dates[i];
else
key[i] = end_dates[i];
}
return key;
}
template <typename DictionaryType, typename Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
const PaddedPODArray<Key>& ids,
@ -158,6 +179,8 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
auto date_key = makeDateKey(start_dates, end_dates);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute& attribute = structure.attributes[idx];
@ -165,7 +188,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
@ -202,7 +225,7 @@ Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::String:
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary);
break;
}

View File

@ -392,9 +392,9 @@ void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPOD
end_dates.reserve(attr.size());
for (const auto & key : attr) {
ids.push_back(key.first);
for (const auto & value : key.second)
{
ids.push_back(key.first);
start_dates.push_back(value.range.first);
end_dates.push_back(value.range.second);
}

View File

@ -82,11 +82,12 @@ public:
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
struct Range : std::pair<UInt16, UInt16>
{
using std::pair<UInt16, UInt16>::pair;
static bool isCorrectDate(const UInt16 date) { return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; }
bool contains(const UInt16 date) const
{
const auto & left = first;
@ -95,8 +96,8 @@ private:
if (left <= date && date <= right)
return true;
const auto has_left_bound = 0 < left && left <= DATE_LUT_MAX_DAY_NUM;
const auto has_right_bound = 0 < right && right <= DATE_LUT_MAX_DAY_NUM;
const auto has_left_bound = isCorrectDate(left);
const auto has_right_bound = isCorrectDate(right);
if ((!has_left_bound || left <= date) && (!has_right_bound || date <= right))
return true;
@ -105,6 +106,7 @@ private:
}
};
private:
template <typename T>
struct Value final
{