diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h index ccd77d49e0f..6531f5cba9d 100644 --- a/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -17,14 +17,14 @@ namespace DB * BlockInputStream implementation for external dictionaries * read() returns single block consisting of the in-memory contents of the dictionaries */ -template +template class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase { public: - using DictionaryPtr = std::shared_ptr; + using Key = UInt64; RangeDictionaryBlockInputStream( - DictionaryPtr dictionary, + std::shared_ptr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray && ids_to_fill, @@ -40,35 +40,26 @@ private: template ColumnPtr getColumnFromPODArray(const PaddedPODArray & array) const; - template - void addSpecialColumn( - const std::optional & attribute, - DataTypePtr type, - const std::string & default_name, - const std::unordered_set & column_names_set, - const PaddedPODArray & values, - ColumnsWithTypeAndName & columns, - bool force = false) const; - Block fillBlock( const PaddedPODArray & ids_to_fill, const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const; - PaddedPODArray - makeDateKey(const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const; + PaddedPODArray makeDateKey( + const PaddedPODArray & block_start_dates, + const PaddedPODArray & block_end_dates) const; - DictionaryPtr dictionary; - Names column_names; + std::shared_ptr dictionary; + NameSet column_names; PaddedPODArray ids; PaddedPODArray start_dates; PaddedPODArray end_dates; }; -template -RangeDictionaryBlockInputStream::RangeDictionaryBlockInputStream( - DictionaryPtr dictionary_, +template +RangeDictionaryBlockInputStream::RangeDictionaryBlockInputStream( + std::shared_ptr dictionary_, size_t max_block_size_, const Names & column_names_, PaddedPODArray && ids_, @@ -76,15 +67,15 @@ RangeDictionaryBlockInputStream::RangeDictionary PaddedPODArray && block_end_dates) : DictionaryBlockInputStreamBase(ids_.size(), max_block_size_) , dictionary(dictionary_) - , column_names(column_names_) + , column_names(column_names_.begin(), column_names_.end()) , ids(std::move(ids_)) , start_dates(std::move(block_start_dates)) , end_dates(std::move(block_end_dates)) { } -template -Block RangeDictionaryBlockInputStream::getBlock(size_t start, size_t length) const +template +Block RangeDictionaryBlockInputStream::getBlock(size_t start, size_t length) const { PaddedPODArray block_ids; PaddedPODArray block_start_dates; @@ -103,38 +94,19 @@ Block RangeDictionaryBlockInputStream::getBlock( return fillBlock(block_ids, block_start_dates, block_end_dates); } -template +template template -ColumnPtr RangeDictionaryBlockInputStream::getColumnFromPODArray(const PaddedPODArray & array) const +ColumnPtr RangeDictionaryBlockInputStream::getColumnFromPODArray(const PaddedPODArray & array) const { auto column_vector = ColumnVector::create(); column_vector->getData().reserve(array.size()); - for (T value : array) - column_vector->insertValue(value); + column_vector->getData().insert(array.begin(), array.end()); + return column_vector; } -template -template -void RangeDictionaryBlockInputStream::addSpecialColumn( - const std::optional & attribute, - DataTypePtr type, - const std::string & default_name, - const std::unordered_set & column_names_set, - const PaddedPODArray & values, - ColumnsWithTypeAndName & columns, - bool force) const -{ - std::string name = default_name; - if (attribute) - name = attribute->name; - - if (force || column_names_set.find(name) != column_names_set.end()) - columns.emplace_back(getColumnFromPODArray(values), type, name); -} - -template -PaddedPODArray RangeDictionaryBlockInputStream::makeDateKey( +template +PaddedPODArray RangeDictionaryBlockInputStream::makeDateKey( const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const { PaddedPODArray key(block_start_dates.size()); @@ -150,8 +122,8 @@ PaddedPODArray RangeDictionaryBlockInputStream -Block RangeDictionaryBlockInputStream::fillBlock( +template +Block RangeDictionaryBlockInputStream::fillBlock( const PaddedPODArray & ids_to_fill, const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const @@ -159,20 +131,32 @@ Block RangeDictionaryBlockInputStream::fillBlock ColumnsWithTypeAndName columns; const DictionaryStructure & structure = dictionary->getStructure(); - std::unordered_set names(column_names.begin(), column_names.end()); - - addSpecialColumn(structure.id, std::make_shared(), "ID", names, ids_to_fill, columns, true); - auto ids_column = columns.back().column; - addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns); - addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns); + auto ids_column = getColumnFromPODArray(ids_to_fill); + const std::string & id_column_name = structure.id->name; + if (column_names.find(id_column_name) != column_names.end()) + columns.emplace_back(ids_column, std::make_shared(), id_column_name); auto date_key = makeDateKey(block_start_dates, block_end_dates); auto date_column = getColumnFromPODArray(date_key); + const std::string & range_min_column_name = structure.range_min->name; + if (column_names.find(range_min_column_name) != column_names.end()) + { + auto range_min_column = getColumnFromPODArray(block_start_dates); + columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name); + } + + const std::string & range_max_column_name = structure.range_max->name; + if (column_names.find(range_max_column_name) != column_names.end()) + { + auto range_max_column = getColumnFromPODArray(block_end_dates); + columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name); + } + for (const auto idx : ext::range(0, structure.attributes.size())) { const DictionaryAttribute & attribute = structure.attributes[idx]; - if (names.find(attribute.name) != names.end()) + if (column_names.find(attribute.name) != column_names.end()) { ColumnPtr column = dictionary->getColumn( attribute.name, diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index c87da2c740d..52b6f219bac 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -515,7 +515,9 @@ RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, template void RangeHashedDictionary::getIdsAndDates( - PaddedPODArray & ids, PaddedPODArray & start_dates, PaddedPODArray & end_dates) const + PaddedPODArray & ids, + PaddedPODArray & start_dates, + PaddedPODArray & end_dates) const { const auto & attribute = attributes.front(); @@ -523,11 +525,9 @@ void RangeHashedDictionary::getIdsAndDates( { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; - if constexpr (std::is_same_v) - getIdsAndDates(attribute, ids, start_dates, end_dates); - else - getIdsAndDates(attribute, ids, start_dates, end_dates); + getIdsAndDates(attribute, ids, start_dates, end_dates); }; callOnDictionaryAttributeType(attribute.type, type_call); @@ -571,13 +571,20 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStreamImpl(const Names & PaddedPODArray end_dates; getIdsAndDates(ids, start_dates, end_dates); - using BlockInputStreamType = RangeDictionaryBlockInputStream; - auto dict_ptr = std::static_pointer_cast(shared_from_this()); - return std::make_shared( - dict_ptr, max_block_size, column_names, std::move(ids), std::move(start_dates), std::move(end_dates)); + using BlockInputStreamType = RangeDictionaryBlockInputStream; + + auto stream = std::make_shared( + shared_from_this(), + max_block_size, + column_names, + std::move(ids), + std::move(start_dates), + std::move(end_dates)); + + return stream; } -struct RangeHashedDIctionaryCallGetBlockInputStreamImpl +struct RangeHashedDictionaryCallGetBlockInputStreamImpl { BlockInputStreamPtr stream; const RangeHashedDictionary * dict; @@ -597,7 +604,7 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & col { using ListType = TypeList; - RangeHashedDIctionaryCallGetBlockInputStreamImpl callable; + RangeHashedDictionaryCallGetBlockInputStreamImpl callable; callable.dict = this; callable.column_names = &column_names; callable.max_block_size = max_block_size; diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 7ac2d33c63b..f2b24e52dfc 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -93,8 +93,6 @@ private: template using Ptr = std::unique_ptr>; - using NullableSet = HashSet>; - struct Attribute final { public: @@ -187,7 +185,7 @@ private: template BlockInputStreamPtr getBlockInputStreamImpl(const Names & column_names, size_t max_block_size) const; - friend struct RangeHashedDIctionaryCallGetBlockInputStreamImpl; + friend struct RangeHashedDictionaryCallGetBlockInputStreamImpl; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; diff --git a/tests/queries/0_stateless/01676_range_hashed_dictionary.reference b/tests/queries/0_stateless/01676_range_hashed_dictionary.reference index 7b19ddda8cb..23a5180d99c 100644 --- a/tests/queries/0_stateless/01676_range_hashed_dictionary.reference +++ b/tests/queries/0_stateless/01676_range_hashed_dictionary.reference @@ -11,9 +11,22 @@ dictHas 1 0 select columns from dictionary +allColumns 1 2019-05-05 2019-05-20 0.33 1 2019-05-21 2019-05-30 0.42 2 2019-05-21 2019-05-30 0.46 +noColumns +1 +1 +1 +onlySpecificColumns +1 2019-05-05 0.33 +1 2019-05-21 0.42 +2 2019-05-21 0.46 +onlySpecificColumn +0.33 +0.42 +0.46 Dictionary nullable dictGet 0.33 @@ -27,6 +40,19 @@ dictHas 1 0 select columns from dictionary +allColumns 1 2019-05-05 2019-05-20 0.33 1 2019-05-21 2019-05-30 0.42 2 2019-05-21 2019-05-30 \N +noColumns +1 +1 +1 +onlySpecificColumns +1 2019-05-05 0.33 +1 2019-05-21 0.42 +2 2019-05-21 \N +onlySpecificColumn +0.33 +0.42 +\N diff --git a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql index a4d56e188c4..455e850b239 100644 --- a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql @@ -42,7 +42,14 @@ SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(1), toDate(' SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-29')); SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31')); SELECT 'select columns from dictionary'; +SELECT 'allColumns'; SELECT * FROM database_for_range_dict.range_dictionary; +SELECT 'noColumns'; +SELECT 1 FROM database_for_range_dict.range_dictionary; +SELECT 'onlySpecificColumns'; +SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary; +SELECT 'onlySpecificColumn'; +SELECT Tax FROM database_for_range_dict.range_dictionary; DROP TABLE database_for_range_dict.date_table; DROP DICTIONARY database_for_range_dict.range_dictionary; @@ -87,7 +94,14 @@ SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(1), SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-29')); SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31')); SELECT 'select columns from dictionary'; +SELECT 'allColumns'; SELECT * FROM database_for_range_dict.range_dictionary_nullable; +SELECT 'noColumns'; +SELECT 1 FROM database_for_range_dict.range_dictionary_nullable; +SELECT 'onlySpecificColumns'; +SELECT CountryID, StartDate, Tax FROM database_for_range_dict.range_dictionary_nullable; +SELECT 'onlySpecificColumn'; +SELECT Tax FROM database_for_range_dict.range_dictionary_nullable; DROP TABLE database_for_range_dict.date_table; DROP DICTIONARY database_for_range_dict.range_dictionary_nullable;