From 0c082b134d310c6a194feb5a8f3063fcabb8c649 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 25 Dec 2017 22:00:48 +0300 Subject: [PATCH] fixed engine Dictionary(range_hashed) for open interval date keys [#CLICKHOUSE-3521] --- .../RangeDictionaryBlockInputStream.h | 31 ++++++++++++++++--- .../Dictionaries/RangeHashedDictionary.cpp | 2 +- dbms/src/Dictionaries/RangeHashedDictionary.h | 8 +++-- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index 9b6382ee851..02c327ed6eb 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace DB @@ -57,6 +58,9 @@ private: Block fillBlock(const PaddedPODArray & ids, const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const; + PaddedPODArray makeDateKey( + const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const; + DictionatyPtr dictionary; Names column_names; PaddedPODArray ids; @@ -88,8 +92,8 @@ Block RangeDictionaryBlockInputStream::getBlock(size_t star for (auto idx : ext::range(start, start + length)) { block_ids.push_back(ids[idx]); - block_start_dates.push_back(block_start_dates[idx]); - block_end_dates.push_back(block_end_dates[idx]); + block_start_dates.push_back(start_dates[idx]); + block_end_dates.push_back(end_dates[idx]); } return fillBlock(block_ids, block_start_dates, block_end_dates); @@ -144,6 +148,23 @@ void RangeDictionaryBlockInputStream::addSpecialColumn( } } +template +PaddedPODArray RangeDictionaryBlockInputStream::makeDateKey( + const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const +{ + PaddedPODArray key(start_dates.size()); + for (size_t i = 0; i < key.size(); ++i) + { + if (RangeHashedDictionary::Range::isCorrectDate(start_dates[i])) + key[i] = start_dates[i]; + else + key[i] = end_dates[i]; + } + + return key; +} + + template Block RangeDictionaryBlockInputStream::fillBlock( const PaddedPODArray& ids, @@ -158,6 +179,8 @@ Block RangeDictionaryBlockInputStream::fillBlock( addSpecialColumn(structure.range_min, std::make_shared(), "Range Start", names, start_dates, columns); addSpecialColumn(structure.range_max, std::make_shared(), "Range End", names, end_dates, columns); + auto date_key = makeDateKey(start_dates, end_dates); + for (const auto idx : ext::range(0, structure.attributes.size())) { const DictionaryAttribute& attribute = structure.attributes[idx]; @@ -165,7 +188,7 @@ Block RangeDictionaryBlockInputStream::fillBlock( { ColumnPtr column; #define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\ - column = getColumnFromAttribute(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary) + column = getColumnFromAttribute(&DictionaryType::get##TYPE, ids, date_key, attribute, *dictionary) switch (attribute.underlying_type) { case AttributeUnderlyingType::UInt8: @@ -202,7 +225,7 @@ Block RangeDictionaryBlockInputStream::fillBlock( GET_COLUMN_FORM_ATTRIBUTE(Float64); break; case AttributeUnderlyingType::String: - column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary); + column = getColumnFromAttributeString(ids, date_key, attribute, *dictionary); break; } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index 7afe71f86dc..68c44683515 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -392,9 +392,9 @@ void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPOD end_dates.reserve(attr.size()); for (const auto & key : attr) { - ids.push_back(key.first); for (const auto & value : key.second) { + ids.push_back(key.first); start_dates.push_back(value.range.first); end_dates.push_back(value.range.second); } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.h b/dbms/src/Dictionaries/RangeHashedDictionary.h index 5424cf1a206..30a23a247af 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.h +++ b/dbms/src/Dictionaries/RangeHashedDictionary.h @@ -82,11 +82,12 @@ public: BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; -private: struct Range : std::pair { using std::pair::pair; + static bool isCorrectDate(const UInt16 date) { return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; } + bool contains(const UInt16 date) const { const auto & left = first; @@ -95,8 +96,8 @@ private: if (left <= date && date <= right) return true; - const auto has_left_bound = 0 < left && left <= DATE_LUT_MAX_DAY_NUM; - const auto has_right_bound = 0 < right && right <= DATE_LUT_MAX_DAY_NUM; + const auto has_left_bound = isCorrectDate(left); + const auto has_right_bound = isCorrectDate(right); if ((!has_left_bound || left <= date) && (!has_right_bound || date <= right)) return true; @@ -105,6 +106,7 @@ private: } }; +private: template struct Value final {