diff --git a/src/Dictionaries/DictionaryHelpers.cpp b/src/Dictionaries/DictionaryHelpers.cpp new file mode 100644 index 00000000000..b54b9eabfb6 --- /dev/null +++ b/src/Dictionaries/DictionaryHelpers.cpp @@ -0,0 +1,48 @@ +#include "DictionaryHelpers.h" + +namespace DB +{ + +MutableColumns deserializeColumnsFromKeys( + const DictionaryStructure & dictionary_structure, + const PaddedPODArray & keys, + size_t start, + size_t end) +{ + MutableColumns result_columns; + result_columns.reserve(dictionary_structure.key->size()); + + for (const DictionaryAttribute & attribute : *dictionary_structure.key) + result_columns.emplace_back(attribute.type->createColumn()); + + for (size_t index = start; index < end; ++index) + { + const auto & key = keys[index]; + const auto * ptr = key.data; + + for (auto & result_column : result_columns) + ptr = result_column->deserializeAndInsertFromArena(ptr); + } + + return result_columns; +} + +ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys( + const DictionaryStructure & dictionary_structure, + const PaddedPODArray & keys, + size_t start, + size_t end) +{ + ColumnsWithTypeAndName result; + MutableColumns columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end); + + for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i) + { + const auto & dictionary_attribute = (*dictionary_structure.key)[i]; + result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name}); + } + + return result; +} + +} diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index dde41864ddc..3d077414291 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -497,6 +497,20 @@ private: Arena * complex_key_arena; }; +/// Deserialize columns from keys array using dictionary structure +MutableColumns deserializeColumnsFromKeys( + const DictionaryStructure & dictionary_structure, + const PaddedPODArray & keys, + size_t start, + size_t end); + +/// Deserialize columns with type and name from keys array using dictionary structure +ColumnsWithTypeAndName deserializeColumnsWithTypeAndNameFromKeys( + const DictionaryStructure & dictionary_structure, + const PaddedPODArray & keys, + size_t start, + size_t end); + /** Merge block with blocks from stream. If there are duplicate keys in block they are filtered out. * In result block_to_update will be merged with blocks from stream. * Note: readPrefix readImpl readSuffix will be called on stream object during function execution. diff --git a/src/Dictionaries/DictionarySource.cpp b/src/Dictionaries/DictionarySource.cpp index 7ba6ea82ca9..fbb03cb00fa 100644 --- a/src/Dictionaries/DictionarySource.cpp +++ b/src/Dictionaries/DictionarySource.cpp @@ -29,7 +29,7 @@ DictionarySourceData::DictionarySourceData( , key_type(DictionaryInputStreamKeyType::ComplexKey) { const DictionaryStructure & dictionary_structure = dictionary->getStructure(); - fillKeyColumns(keys, 0, keys.size(), dictionary_structure, key_columns); + key_columns = deserializeColumnsWithTypeAndNameFromKeys(dictionary_structure, keys, 0, keys.size()); } DictionarySourceData::DictionarySourceData( @@ -158,32 +158,4 @@ Block DictionarySourceData::fillBlock( return Block(block_columns); } -void DictionarySourceData::fillKeyColumns( - const PaddedPODArray & keys, - size_t start, - size_t size, - const DictionaryStructure & dictionary_structure, - ColumnsWithTypeAndName & result) -{ - MutableColumns columns; - columns.reserve(dictionary_structure.key->size()); - - for (const DictionaryAttribute & attribute : *dictionary_structure.key) - columns.emplace_back(attribute.type->createColumn()); - - for (size_t index = start; index < size; ++index) - { - const auto & key = keys[index]; - const auto *ptr = key.data; - for (auto & column : columns) - ptr = column->deserializeAndInsertFromArena(ptr); - } - - for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i) - { - const auto & dictionary_attribute = (*dictionary_structure.key)[i]; - result.emplace_back(ColumnWithTypeAndName{std::move(columns[i]), dictionary_attribute.type, dictionary_attribute.name}); - } -} - } diff --git a/src/Dictionaries/DictionarySource.h b/src/Dictionaries/DictionarySource.h index 195a3c66484..cd4b3120ac0 100644 --- a/src/Dictionaries/DictionarySource.h +++ b/src/Dictionaries/DictionarySource.h @@ -51,13 +51,6 @@ private: const DataTypes & types, ColumnsWithTypeAndName && view) const; - static void fillKeyColumns( - const PaddedPODArray & keys, - size_t start, - size_t size, - const DictionaryStructure & dictionary_structure, - ColumnsWithTypeAndName & result); - const size_t num_rows; std::shared_ptr dictionary; std::unordered_set column_names; diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 9f46addd912..2fc3a5aa0d0 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -134,42 +134,11 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration if (id->name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "'id' cannot be empty"); - const char * range_default_type = "Date"; - if (config.has(structure_prefix + ".range_min")) - range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type)); - - if (config.has(structure_prefix + ".range_max")) - range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type)); - - if (range_min.has_value() != range_max.has_value()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Dictionary structure should have both 'range_min' and 'range_max' either specified or not."); - } - - if (range_min && range_max && !range_min->type->equals(*range_max->type)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Dictionary structure 'range_min' and 'range_max' should have same type, " - "'range_min' type: {}," - "'range_max' type: {}", - range_min->type->getName(), - range_max->type->getName()); - } - - if (range_min) - { - if (!range_min->type->isValueRepresentedByInteger()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." - " Actual 'range_min' and 'range_max' type is {}", - range_min->type->getName()); - } - - if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty())) + if (!id->expression.empty()) has_expressions = true; } + parseRangeConfiguration(config, structure_prefix); attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false); for (size_t i = 0; i < attributes.size(); ++i) @@ -439,4 +408,42 @@ std::vector DictionaryStructure::getAttributes( return res_attributes; } +void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix) +{ + const char * range_default_type = "Date"; + if (config.has(structure_prefix + ".range_min")) + range_min.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_min", range_default_type)); + + if (config.has(structure_prefix + ".range_max")) + range_max.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".range_max", range_default_type)); + + if (range_min.has_value() != range_max.has_value()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Dictionary structure should have both 'range_min' and 'range_max' either specified or not."); + } + + if (range_min && range_max && !range_min->type->equals(*range_max->type)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Dictionary structure 'range_min' and 'range_max' should have same type, " + "'range_min' type: {}," + "'range_max' type: {}", + range_min->type->getName(), + range_max->type->getName()); + } + + if (range_min) + { + if (!range_min->type->isValueRepresentedByInteger()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." + " Actual 'range_min' and 'range_max' type is {}", + range_min->type->getName()); + } + + if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty())) + has_expressions = true; +} + } diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 3ea640d77e8..6ab849d1d89 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -67,8 +67,9 @@ using DictionaryLifetime = ExternalLoadableLifetime; * - null_value, used as a default value for non-existent entries in the dictionary, * decimal representation for numeric attributes; * - hierarchical, whether this attribute defines a hierarchy; -* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?) -* - is_object_id, used in mongo dictionary, converts string key to objectid +* - injective, whether the mapping to parent is injective (can be used for optimization of GROUP BY?); +* - is_object_id, used in mongo dictionary, converts string key to objectid; +* - is_nullable, is attribute nullable; */ struct DictionaryAttribute final { @@ -153,6 +154,10 @@ private: const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, bool complex_key_attributes); + + /// parse range_min and range_max + void parseRangeConfiguration(const Poco::Util::AbstractConfiguration & config, const std::string & structure_prefix); + }; } diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 10c4f67d809..9ddaaeb573a 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -133,6 +133,29 @@ void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const writeQuoted(key.name, out); } + + if (dict_struct.range_min && dict_struct.range_max) + { + writeString(", ", out); + + if (!dict_struct.range_min->expression.empty()) + { + writeParenthesisedString(dict_struct.range_min->expression, out); + writeString(" AS ", out); + } + + writeQuoted(dict_struct.range_min->name, out); + + writeString(", ", out); + + if (!dict_struct.range_max->expression.empty()) + { + writeParenthesisedString(dict_struct.range_max->expression, out); + writeString(" AS ", out); + } + + writeQuoted(dict_struct.range_max->name, out); + } } for (const auto & attr : dict_struct.attributes) diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 94946e41ff8..af4b77a6ff8 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -64,7 +64,7 @@ public: bool isInjective(const std::string & attribute_name) const override { - return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; + return dict_struct.getAttribute(attribute_name).injective; } DictionaryKeyType getKeyType() const override { return DictionaryKeyType::complex; } diff --git a/src/Dictionaries/RangeDictionarySource.h b/src/Dictionaries/RangeDictionarySource.h index d4fce32a54f..4d195137dcc 100644 --- a/src/Dictionaries/RangeDictionarySource.h +++ b/src/Dictionaries/RangeDictionarySource.h @@ -14,170 +14,213 @@ namespace DB { -template +enum class RangeDictionaryType +{ + simple, + complex +}; + +template class RangeDictionarySourceData { public: - using Key = UInt64; + + using KeyType = std::conditional_t; RangeDictionarySourceData( std::shared_ptr dictionary, const Names & column_names, - PaddedPODArray && ids_to_fill, + PaddedPODArray && keys, PaddedPODArray && start_dates, PaddedPODArray && end_dates); Block getBlock(size_t start, size_t length) const; - size_t getNumRows() const { return ids.size(); } + size_t getNumRows() const { return keys.size(); } private: Block fillBlock( - const PaddedPODArray & ids_to_fill, + const PaddedPODArray & keys_to_fill, const PaddedPODArray & block_start_dates, - const PaddedPODArray & block_end_dates) const; + const PaddedPODArray & block_end_dates, + size_t start, + size_t end) const; - PaddedPODArray makeDateKey( + PaddedPODArray makeDateKeys( const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const; std::shared_ptr dictionary; NameSet column_names; - PaddedPODArray ids; + PaddedPODArray keys; PaddedPODArray start_dates; PaddedPODArray end_dates; }; -template -RangeDictionarySourceData::RangeDictionarySourceData( +template +RangeDictionarySourceData::RangeDictionarySourceData( std::shared_ptr dictionary_, const Names & column_names_, - PaddedPODArray && ids_, + PaddedPODArray && keys, PaddedPODArray && block_start_dates, PaddedPODArray && block_end_dates) : dictionary(dictionary_) , column_names(column_names_.begin(), column_names_.end()) - , ids(std::move(ids_)) + , keys(std::move(keys)) , start_dates(std::move(block_start_dates)) , end_dates(std::move(block_end_dates)) { } -template -Block RangeDictionarySourceData::getBlock(size_t start, size_t length) const +template +Block RangeDictionarySourceData::getBlock(size_t start, size_t length) const { - PaddedPODArray block_ids; + PaddedPODArray block_keys; PaddedPODArray block_start_dates; PaddedPODArray block_end_dates; - block_ids.reserve(length); + block_keys.reserve(length); block_start_dates.reserve(length); block_end_dates.reserve(length); - for (auto idx : collections::range(start, start + length)) + for (size_t index = start; index < start + length; ++index ) { - block_ids.push_back(ids[idx]); - block_start_dates.push_back(start_dates[idx]); - block_end_dates.push_back(end_dates[idx]); + block_keys.push_back(block_keys[index]); + block_start_dates.push_back(start_dates[index]); + block_end_dates.push_back(end_dates[index]); } - return fillBlock(block_ids, block_start_dates, block_end_dates); + return fillBlock(block_keys, block_start_dates, block_end_dates, start, start + length); } -template -PaddedPODArray RangeDictionarySourceData::makeDateKey( - const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const -{ - PaddedPODArray key(block_start_dates.size()); - for (size_t i = 0; i < key.size(); ++i) - { - if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i])) - key[i] = block_start_dates[i]; - else - key[i] = block_end_dates[i]; - } - - return key; -} - - -template -Block RangeDictionarySourceData::fillBlock( - const PaddedPODArray & ids_to_fill, +template +PaddedPODArray RangeDictionarySourceData::makeDateKeys( const PaddedPODArray & block_start_dates, const PaddedPODArray & block_end_dates) const +{ + PaddedPODArray keys(block_start_dates.size()); + + for (size_t i = 0; i < keys.size(); ++i) + { + if (Range::isCorrectDate(block_start_dates[i])) + keys[i] = block_start_dates[i]; + else + keys[i] = block_end_dates[i]; + } + + return keys; +} + + +template +Block RangeDictionarySourceData::fillBlock( + const PaddedPODArray & keys_to_fill, + const PaddedPODArray & block_start_dates, + const PaddedPODArray & block_end_dates, + size_t start, + size_t end) const { ColumnsWithTypeAndName columns; - const DictionaryStructure & structure = dictionary->getStructure(); + const DictionaryStructure & dictionary_structure = dictionary->getStructure(); - auto ids_column = getColumnFromPODArray(ids_to_fill); - const std::string & id_column_name = structure.id->name; - if (column_names.find(id_column_name) != column_names.end()) - columns.emplace_back(ids_column, std::make_shared(), id_column_name); + DataTypes keys_types; + Columns keys_columns; + Strings keys_names = dictionary_structure.getKeysNames(); - auto date_key = makeDateKey(block_start_dates, block_end_dates); + if constexpr (range_dictionary_type == RangeDictionaryType::simple) + { + keys_columns = {getColumnFromPODArray(keys_to_fill)}; + keys_types = {std::make_shared()}; + } + else + { + for (const auto & attribute : *dictionary_structure.key) + keys_types.emplace_back(attribute.type); + + auto deserialized_columns = deserializeColumnsFromKeys(dictionary_structure, keys, start, end); + for (auto & deserialized_column : deserialized_columns) + keys_columns.emplace_back(std::move(deserialized_column)); + } + + size_t keys_size = keys_names.size(); + + std::cerr << "Keys size " << keys_size << " key columns size " << keys_columns.size(); + std::cerr << " keys types size " << keys_types.size() << std::endl; + + assert(keys_columns.size() == keys_size); + assert(keys_types.size() == keys_size); + + for (size_t i = 0; i < keys_size; ++i) + { + auto & key_name = keys_names[i]; + + if (column_names.find(key_name) != column_names.end()) + columns.emplace_back(keys_columns[i], keys_types[i], key_name); + } + + auto date_key = makeDateKeys(block_start_dates, block_end_dates); auto date_column = getColumnFromPODArray(date_key); - const std::string & range_min_column_name = structure.range_min->name; + keys_columns.emplace_back(std::move(date_column)); + keys_types.emplace_back(std::make_shared()); + + const auto & range_min_column_name = dictionary_structure.range_min->name; if (column_names.find(range_min_column_name) != column_names.end()) { auto range_min_column = getColumnFromPODArray(block_start_dates); - columns.emplace_back(range_min_column, structure.range_max->type, range_min_column_name); + columns.emplace_back(range_min_column, dictionary_structure.range_max->type, range_min_column_name); } - const std::string & range_max_column_name = structure.range_max->name; + const auto & range_max_column_name = dictionary_structure.range_max->name; if (column_names.find(range_max_column_name) != column_names.end()) { auto range_max_column = getColumnFromPODArray(block_end_dates); - columns.emplace_back(range_max_column, structure.range_max->type, range_max_column_name); + columns.emplace_back(range_max_column, dictionary_structure.range_max->type, range_max_column_name); } - for (const auto idx : collections::range(0, structure.attributes.size())) + size_t attributes_size = dictionary_structure.attributes.size(); + for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index) { - const DictionaryAttribute & attribute = structure.attributes[idx]; - if (column_names.find(attribute.name) != column_names.end()) - { - ColumnPtr column = dictionary->getColumn( - attribute.name, - attribute.type, - {ids_column, date_column}, - {std::make_shared(), std::make_shared()}, - nullptr); - columns.emplace_back(column, attribute.type, attribute.name); - } + const auto & attribute = dictionary_structure.attributes[attribute_index]; + if (column_names.find(attribute.name) == column_names.end()) + continue; + + auto column = dictionary->getColumn( + attribute.name, + attribute.type, + keys_columns, + keys_types, + nullptr /* default_values_column*/); + + columns.emplace_back(std::move(column), attribute.type, attribute.name); } return Block(columns); } -/* - * BlockInputStream implementation for external dictionaries - * read() returns single block consisting of the in-memory contents of the dictionaries - */ -template +template class RangeDictionarySource : public DictionarySourceBase { public: - using Key = UInt64; - RangeDictionarySource(RangeDictionarySourceData data_, size_t max_block_size); + RangeDictionarySource(RangeDictionarySourceData data_, size_t max_block_size); String getName() const override { return "RangeDictionarySource"; } protected: Block getBlock(size_t start, size_t length) const override; - RangeDictionarySourceData data; + RangeDictionarySourceData data; }; -template -RangeDictionarySource::RangeDictionarySource(RangeDictionarySourceData data_, size_t max_block_size) +template +RangeDictionarySource::RangeDictionarySource(RangeDictionarySourceData data_, size_t max_block_size) : DictionarySourceBase(data_.getBlock(0, 0), data_.getNumRows(), max_block_size) , data(std::move(data_)) { } -template -Block RangeDictionarySource::getBlock(size_t start, size_t length) const +template +Block RangeDictionarySource::getBlock(size_t start, size_t length) const { return data.getBlock(start, length); } diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index bbd70b51437..19a7696765f 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -10,7 +10,8 @@ namespace { -using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType; + +using RangeStorageType = DB::RangeStorageType; // Null values mean that specified boundary, either min or max is not set on range. // To simplify comparison, null value of min bound should be bigger than any other value, @@ -25,7 +26,7 @@ RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t i return default_value; const RangeStorageType result = static_cast(column.getInt(index)); - if (isDate && !DB::RangeHashedDictionary::Range::isCorrectDate(result)) + if (isDate && !DB::Range::isCorrectDate(result)) return default_value; return result; @@ -54,23 +55,23 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date) +bool Range::isCorrectDate(const RangeStorageType & date) { return 0 < date && date <= DATE_LUT_MAX_DAY_NUM; } -bool RangeHashedDictionary::Range::contains(const RangeStorageType & value) const +bool Range::contains(const RangeStorageType & value) const { return left <= value && value <= right; } -static bool operator<(const RangeHashedDictionary::Range & left, const RangeHashedDictionary::Range & right) +static bool operator<(const Range & left, const Range & right) { return std::tie(left.left, left.right) < std::tie(right.left, right.right); } - -RangeHashedDictionary::RangeHashedDictionary( +template +RangeHashedDictionary::RangeHashedDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, @@ -87,7 +88,8 @@ RangeHashedDictionary::RangeHashedDictionary( calculateBytesAllocated(); } -ColumnPtr RangeHashedDictionary::getColumn( +template +ColumnPtr RangeHashedDictionary::getColumn( const std::string & attribute_name, const DataTypePtr & result_type, const Columns & key_columns, @@ -96,20 +98,18 @@ ColumnPtr RangeHashedDictionary::getColumn( { ColumnPtr result; - const auto & attribute = getAttribute(attribute_name); const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type); - - auto keys_size = key_columns.front()->size(); + const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; + const auto & attribute = attributes[attribute_index]; /// Cast second column to storage type Columns modified_key_columns = key_columns; - - auto range_storage_column = key_columns[1]; - ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""}; - + auto range_storage_column = key_columns.back(); + ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; auto range_column_storage_type = std::make_shared(); - modified_key_columns[1] = castColumnAccurate(column_to_cast, range_column_storage_type); + modified_key_columns.back() = castColumnAccurate(column_to_cast, range_column_storage_type); + size_t keys_size = key_columns.front()->size(); bool is_attribute_nullable = attribute.is_nullable; ColumnUInt8::MutablePtr col_null_map_to; @@ -204,24 +204,26 @@ ColumnPtr RangeHashedDictionary::getColumn( return result; } -ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const +template +ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const { - auto range_storage_column = key_columns[1]; - ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""}; - auto range_column_storage_type = std::make_shared(); + auto range_storage_column = key_columns.back(); + ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""}; auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type); - - PaddedPODArray key_backup_storage; PaddedPODArray range_backup_storage; - - const PaddedPODArray & ids = getColumnVectorData(this, key_columns[0], key_backup_storage); const PaddedPODArray & dates = getColumnVectorData(this, range_column_updated, range_backup_storage); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + const auto & attribute = attributes.front(); - ColumnUInt8::Ptr result; - + auto result = ColumnUInt8::create(keys_size); + auto & out = result->getData(); size_t keys_found = 0; auto type_call = [&](const auto & dictionary_attribute_type) @@ -229,58 +231,48 @@ ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, con using Type = std::decay_t; using AttributeType = typename Type::AttributeType; using ValueType = DictionaryValueType; - result = hasKeysImpl(attribute, ids, dates, keys_found); + + const auto & collection = std::get>(attribute.maps); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + const auto key = keys_extractor.extractCurrentKey(); + const auto it = collection.find(key); + + if (it) + { + const auto date = dates[key_index]; + const auto & ranges_and_values = it->getMapped(); + const auto val_it = std::find_if( + std::begin(ranges_and_values), + std::end(ranges_and_values), + [date](const Value & v) + { + return v.range.contains(date); + }); + + out[key_index] = val_it != std::end(ranges_and_values); + keys_found += out[key_index]; + } + else + { + out[key_index] = false; + } + + keys_extractor.rollbackCurrentKey(); + } }; callOnDictionaryAttributeType(attribute.type, type_call); - query_count.fetch_add(ids.size(), std::memory_order_relaxed); + query_count.fetch_add(keys_size, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); return result; } -template -ColumnUInt8::Ptr RangeHashedDictionary::hasKeysImpl( - const Attribute & attribute, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - size_t & keys_found) const -{ - auto result = ColumnUInt8::create(ids.size()); - auto& out = result->getData(); - - const auto & attr = *std::get>(attribute.maps); - - keys_found = 0; - - for (const auto row : collections::range(0, ids.size())) - { - const auto it = attr.find(ids[row]); - - if (it) - { - const auto date = dates[row]; - const auto & ranges_and_values = it->getMapped(); - const auto val_it = std::find_if( - std::begin(ranges_and_values), - std::end(ranges_and_values), - [date](const Value & v) - { - return v.range.contains(date); - }); - - out[row] = val_it != std::end(ranges_and_values); - keys_found += out[row]; - } - else - out[row] = false; - } - - return result; -} - -void RangeHashedDictionary::createAttributes() +template +void RangeHashedDictionary::createAttributes() { const auto size = dict_struct.attributes.size(); attributes.reserve(size); @@ -296,7 +288,8 @@ void RangeHashedDictionary::createAttributes() } } -void RangeHashedDictionary::loadData() +template +void RangeHashedDictionary::loadData() { QueryPipeline pipeline; pipeline.init(source_ptr->loadAll()); @@ -305,38 +298,57 @@ void RangeHashedDictionary::loadData() Block block; while (executor.pull(block)) { - const auto & id_column = *block.safeGetByPosition(0).column; + size_t skip_keys_size_offset = dict_struct.getKeysSize(); + + Columns key_columns; + key_columns.reserve(skip_keys_size_offset); + + /// Split into keys columns and attribute columns + for (size_t i = 0; i < skip_keys_size_offset; ++i) + key_columns.emplace_back(block.safeGetByPosition(i).column); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + element_count += keys_size; // Support old behaviour, where invalid date means 'open range'. const bool is_date = isDate(dict_struct.range_min->type); - const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(1).column); - const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(2).column); + const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column); + const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column); - element_count += id_column.size(); + skip_keys_size_offset += 2; - for (const auto attribute_idx : collections::range(0, attributes.size())) + for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) { - const auto & attribute_column = *block.safeGetByPosition(attribute_idx + 3).column; - auto & attribute = attributes[attribute_idx]; + const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column; + auto & attribute = attributes[attribute_index]; - for (const auto row_idx : collections::range(0, id_column.size())) + for (size_t key_index = 0; key_index < keys_size; ++key_index) { + auto key = keys_extractor.extractCurrentKey(); + RangeStorageType lower_bound; RangeStorageType upper_bound; if (is_date) { - lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, 0); - upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, DATE_LUT_MAX_DAY_NUM + 1); + lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0); + upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1); } else { - lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, RANGE_MIN_NULL_VALUE); - upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE); + lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE); + upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE); } - setAttributeValue(attribute, id_column.getUInt(row_idx), Range{lower_bound, upper_bound}, attribute_column[row_idx]); + if constexpr (std::is_same_v) + key = copyKeyInArena(key); + + setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]); + keys_extractor.rollbackCurrentKey(); } } } @@ -346,22 +358,8 @@ void RangeHashedDictionary::loadData() "{}: dictionary source is empty and 'require_nonempty' property is set."); } -template -void RangeHashedDictionary::addAttributeSize(const Attribute & attribute) -{ - const auto & map_ref = std::get>(attribute.maps); - bytes_allocated += sizeof(Collection) + map_ref->getBufferSizeInBytes(); - bucket_count = map_ref->getBufferSizeInCells(); -} - -template <> -void RangeHashedDictionary::addAttributeSize(const Attribute & attribute) -{ - addAttributeSize(attribute); - bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); -} - -void RangeHashedDictionary::calculateBytesAllocated() +template +void RangeHashedDictionary::calculateBytesAllocated() { bytes_allocated += attributes.size() * sizeof(attributes.front()); @@ -371,14 +369,25 @@ void RangeHashedDictionary::calculateBytesAllocated() { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; - addAttributeSize(attribute); + using ValueType = DictionaryValueType; + + const auto & collection = std::get>(attribute.maps); + bytes_allocated += sizeof(CollectionType) + collection.getBufferSizeInBytes(); + bucket_count = collection.getBufferSizeInCells(); + + if constexpr (std::is_same_v) + bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); }; callOnDictionaryAttributeType(attribute.type, type_call); } + + if constexpr (dictionary_key_type == DictionaryKeyType::complex) + bytes_allocated += complex_key_arena.size(); } -RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute) +template +typename RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute) { Attribute attribute{dictionary_attribute.underlying_type, dictionary_attribute.is_nullable, {}, {}}; @@ -391,7 +400,7 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di if constexpr (std::is_same_v) attribute.string_arena = std::make_unique(); - attribute.maps = std::make_unique>(); + attribute.maps = CollectionType(); }; callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call); @@ -399,29 +408,35 @@ RangeHashedDictionary::Attribute RangeHashedDictionary::createAttribute(const Di return attribute; } +template template -void RangeHashedDictionary::getItemsImpl( +void RangeHashedDictionary::getItemsImpl( const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const { - PaddedPODArray key_backup_storage; - PaddedPODArray range_backup_storage; - - const PaddedPODArray & ids = getColumnVectorData(this, key_columns[0], key_backup_storage); - const PaddedPODArray & dates = getColumnVectorData(this, key_columns[1], range_backup_storage); - - const auto & attr = *std::get>(attribute.maps); + const auto & collection = std::get>(attribute.maps); size_t keys_found = 0; - for (const auto row : collections::range(0, ids.size())) + PaddedPODArray range_backup_storage; + const auto & dates = getColumnVectorData(this, key_columns.back(), range_backup_storage); + + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) { - const auto it = attr.find(ids[row]); + auto key = keys_extractor.extractCurrentKey(); + const auto it = collection.find(key); + if (it) { - const auto date = dates[row]; + const auto date = dates[key_index]; const auto & ranges_and_values = it->getMapped(); const auto val_it = std::find_if( std::begin(ranges_and_values), @@ -439,35 +454,38 @@ void RangeHashedDictionary::getItemsImpl( if constexpr (is_nullable) { if (value.has_value()) - set_value(row, *value, false); + set_value(key_index, *value, false); else - set_value(row, default_value_extractor[row], true); + set_value(key_index, default_value_extractor[key_index], true); } else { - set_value(row, *value, false); + set_value(key_index, *value, false); } + keys_extractor.rollbackCurrentKey(); continue; } } if constexpr (is_nullable) - set_value(row, default_value_extractor[row], default_value_extractor.isNullAt(row)); + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); else - set_value(row, default_value_extractor[row], false); + set_value(key_index, default_value_extractor[key_index], false); + + keys_extractor.rollbackCurrentKey(); } - query_count.fetch_add(ids.size(), std::memory_order_relaxed); + query_count.fetch_add(keys_size, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); } - +template template -void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value) +void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value) { using ValueType = std::conditional_t, StringRef, T>; - auto & map = *std::get>(attribute.maps); + auto & collection = std::get>(attribute.maps); Value value_to_insert; @@ -490,61 +508,47 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const U } } - const auto it = map.find(id); + const auto it = collection.find(key); if (it) { auto & values = it->getMapped(); - const auto insert_it - = std::lower_bound(std::begin(values), std::end(values), range, [](const Value & lhs, const Range & rhs_range) - { - return lhs.range < rhs_range; - }); + const auto insert_it = std::lower_bound( + std::begin(values), + std::end(values), + range, + [](const Value & lhs, const Range & rhs_range) + { + return lhs.range < rhs_range; + }); values.insert(insert_it, std::move(value_to_insert)); } else - map.insert({id, Values{std::move(value_to_insert)}}); + { + collection.insert({key, Values{std::move(value_to_insert)}}); + } } -void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value) +template +void RangeHashedDictionary::setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value) { auto type_call = [&](const auto &dictionary_attribute_type) { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; - setAttributeValueImpl(attribute, id, range, value); + setAttributeValueImpl(attribute, key, range, value); }; callOnDictionaryAttributeType(attribute.type, type_call); } -const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const -{ - const auto it = attribute_index_by_name.find(attribute_name); - if (it == std::end(attribute_index_by_name)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: no such attribute '{}'", full_name, attribute_name); - - return attributes[it->second]; -} - -const RangeHashedDictionary::Attribute & -RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const -{ - const auto & attribute = getAttribute(attribute_name); - if (attribute.type != type) - throw Exception(ErrorCodes::TYPE_MISMATCH, "attribute {} has type {}", - attribute_name, - toString(attribute.type)); - - return attribute; -} - +template template -void RangeHashedDictionary::getIdsAndDates( - PaddedPODArray & ids, +void RangeHashedDictionary::getKeysAndDates( + PaddedPODArray & keys, PaddedPODArray & start_dates, PaddedPODArray & end_dates) const { @@ -556,32 +560,33 @@ void RangeHashedDictionary::getIdsAndDates( using AttributeType = typename Type::AttributeType; using ValueType = DictionaryValueType; - getIdsAndDates(attribute, ids, start_dates, end_dates); + getKeysAndDates(attribute, keys, start_dates, end_dates); }; callOnDictionaryAttributeType(attribute.type, type_call); } +template template -void RangeHashedDictionary::getIdsAndDates( +void RangeHashedDictionary::getKeysAndDates( const Attribute & attribute, - PaddedPODArray & ids, + PaddedPODArray & keys, PaddedPODArray & start_dates, PaddedPODArray & end_dates) const { - const HashMap> & attr = *std::get>(attribute.maps); + const auto & collection = std::get>(attribute.maps); - ids.reserve(attr.size()); - start_dates.reserve(attr.size()); - end_dates.reserve(attr.size()); + keys.reserve(collection.size()); + start_dates.reserve(collection.size()); + end_dates.reserve(collection.size()); const bool is_date = isDate(dict_struct.range_min->type); - for (const auto & key : attr) + for (const auto & key : collection) { for (const auto & value : key.getMapped()) { - ids.push_back(key.getKey()); + keys.push_back(key.getKey()); start_dates.push_back(value.range.left); end_dates.push_back(value.range.right); @@ -592,22 +597,23 @@ void RangeHashedDictionary::getIdsAndDates( } } - +template template -Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_block_size) const +Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_block_size) const { - PaddedPODArray ids; + PaddedPODArray keys; PaddedPODArray start_dates; PaddedPODArray end_dates; - getIdsAndDates(ids, start_dates, end_dates); + getKeysAndDates(keys, start_dates, end_dates); - using RangeDictionarySourceType = RangeDictionarySource; + static constexpr RangeDictionaryType range_dictionary_type = (dictionary_key_type == DictionaryKeyType::simple) ? RangeDictionaryType::simple : RangeDictionaryType::complex; + using RangeDictionarySourceType = RangeDictionarySource; auto source = std::make_shared( - RangeDictionarySourceData( + RangeDictionarySourceData( shared_from_this(), column_names, - std::move(ids), + std::move(keys), std::move(start_dates), std::move(end_dates)), max_block_size); @@ -615,10 +621,21 @@ Pipe RangeHashedDictionary::readImpl(const Names & column_names, size_t max_bloc return Pipe(source); } +template +StringRef RangeHashedDictionary::copyKeyInArena(StringRef key) +{ + size_t key_size = key.size; + char * place_for_key = complex_key_arena.alloc(key_size); + memcpy(reinterpret_cast(place_for_key), reinterpret_cast(key.data), key_size); + StringRef updated_key{place_for_key, key_size}; + return updated_key; +} + +template struct RangeHashedDictionaryCallGetSourceImpl { Pipe pipe; - const RangeHashedDictionary * dict; + const RangeHashedDictionary * dict; const Names * column_names; size_t max_block_size; @@ -627,15 +644,16 @@ struct RangeHashedDictionaryCallGetSourceImpl { const auto & type = dict->dict_struct.range_min->type; if (pipe.empty() && dynamic_cast *>(type.get())) - pipe = dict->readImpl(*column_names, max_block_size); + pipe = dict->template readImpl(*column_names, max_block_size); } }; -Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size) const +template +Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_size) const { using ListType = TypeList; - RangeHashedDictionaryCallGetSourceImpl callable; + RangeHashedDictionaryCallGetSourceImpl callable; callable.dict = this; callable.column_names = &column_names; callable.max_block_size = max_block_size; @@ -653,7 +671,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names, size_t max_block_si void registerDictionaryRangeHashed(DictionaryFactory & factory) { - auto create_layout = [=](const std::string & full_name, + auto create_layout_simple = [=](const std::string & full_name, const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, @@ -672,9 +690,32 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory) const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; - factory.registerLayout("range_hashed", create_layout, false); + factory.registerLayout("range_hashed", create_layout_simple, false); + + auto create_layout_complex = [=](const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr, + ContextPtr /* context */, + bool /*created_from_ddl*/) -> DictionaryPtr + { + if (dict_struct.id) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'"); + + if (!dict_struct.range_min || !dict_struct.range_max) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "{}: dictionary of layout 'complex_key_range_hashed' requires .structure.range_min and .structure.range_max", + full_name); + + const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + }; + factory.registerLayout("complex_key_range_hashed", create_layout_complex, true); } } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 13fa6ad570f..f9b09189265 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -16,9 +16,25 @@ namespace DB { + +using RangeStorageType = Int64; + +struct Range +{ + RangeStorageType left; + RangeStorageType right; + + static bool isCorrectDate(const RangeStorageType & date); + bool contains(const RangeStorageType & value) const; +}; + +template class RangeHashedDictionary final : public IDictionary { public: + using KeyType = std::conditional_t; + static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by hashed dictionary"); + RangeHashedDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, @@ -59,7 +75,7 @@ public: bool isInjective(const std::string & attribute_name) const override { - return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; + return dict_struct.getAttribute(attribute_name).injective; } DictionaryKeyType getKeyType() const override { return DictionaryKeyType::range; } @@ -73,19 +89,8 @@ public: ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override; - using RangeStorageType = Int64; - Pipe read(const Names & column_names, size_t max_block_size) const override; - struct Range - { - RangeStorageType left; - RangeStorageType right; - - static bool isCorrectDate(const RangeStorageType & date); - bool contains(const RangeStorageType & value) const; - }; - private: template struct Value final @@ -96,10 +101,12 @@ private: template using Values = std::vector>; - template - using Collection = HashMap>; - template - using Ptr = std::unique_ptr>; + + template + using CollectionType = std::conditional_t< + dictionary_key_type == DictionaryKeyType::simple, + HashMap>, + HashMapWithSavedHash, DefaultHash>>; struct Attribute final { @@ -108,27 +115,27 @@ private: bool is_nullable; std::variant< - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr, - Ptr> + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType, + CollectionType> maps; std::unique_ptr string_arena; }; @@ -137,9 +144,6 @@ private: void loadData(); - template - void addAttributeSize(const Attribute & attribute); - void calculateBytesAllocated(); static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute); @@ -151,35 +155,30 @@ private: ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const; - template - ColumnUInt8::Ptr hasKeysImpl( - const Attribute & attribute, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - size_t & keys_found) const; - template - static void setAttributeValueImpl(Attribute & attribute, const UInt64 id, const Range & range, const Field & value); + static void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value); - static void setAttributeValue(Attribute & attribute, const UInt64 id, const Range & range, const Field & value); - - const Attribute & getAttribute(const std::string & attribute_name) const; - - const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const; + static void setAttributeValue(Attribute & attribute, KeyType key, const Range & range, const Field & value); template - void getIdsAndDates(PaddedPODArray & ids, PaddedPODArray & start_dates, PaddedPODArray & end_dates) const; + void getKeysAndDates( + PaddedPODArray & keys, + PaddedPODArray & start_dates, + PaddedPODArray & end_dates) const; template - void getIdsAndDates( + void getKeysAndDates( const Attribute & attribute, - PaddedPODArray & ids, + PaddedPODArray & keys, PaddedPODArray & start_dates, PaddedPODArray & end_dates) const; template Pipe readImpl(const Names & column_names, size_t max_block_size) const; + StringRef copyKeyInArena(StringRef key); + + template friend struct RangeHashedDictionaryCallGetSourceImpl; const DictionaryStructure dict_struct; @@ -189,6 +188,7 @@ private: std::map attribute_index_by_name; std::vector attributes; + Arena complex_key_arena; size_t bytes_allocated = 0; size_t element_count = 0; diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.h b/src/Dictionaries/getDictionaryConfigurationFromAST.h index de8659e4d7b..b464fdf1d8c 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.h +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.h @@ -6,6 +6,7 @@ namespace DB { + using DictionaryConfigurationPtr = Poco::AutoPtr; /// Convert dictionary AST to Poco::AbstractConfiguration @@ -13,4 +14,5 @@ using DictionaryConfigurationPtr = Poco::AutoPtrgetName()); + /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys. + ColumnPtr key_column = key_column_with_type.column->convertToFullColumnIfConst(); + DataTypePtr key_column_type = key_column_with_type.type; + + Columns key_columns; + DataTypes key_types; + + if (isTuple(key_column_type)) + { + key_columns = assert_cast(*key_column).getColumnsCopy(); + key_types = assert_cast(*key_column_type).getElements(); + } + else + { + key_columns = {key_column, range_col}; + key_types = {std::make_shared(), range_col_type}; + } return dictionary->hasKeys({key_column, range_col}, {std::make_shared(), range_col_type}); } @@ -487,18 +498,29 @@ public: } else if (dictionary_key_type == DictionaryKeyType::range) { - if (!WhichDataType(key_col_with_type.type).isUInt64()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument of function {} must be UInt64 when dictionary is range. Actual type {}.", - getName(), - key_col_with_type.type->getName()); + /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys. + ColumnPtr key_column = key_col_with_type.column->convertToFullColumnIfConst(); + DataTypePtr key_column_type = key_col_with_type.type; + + Columns key_columns; + DataTypes key_types; + + if (isTuple(key_column_type)) + { + key_columns = assert_cast(*key_column).getColumnsCopy(); + key_types = assert_cast(*key_column_type).getElements(); + } + else + { + key_columns = {key_column, range_col}; + key_types = {std::make_shared(), range_col_type}; + } result = executeDictionaryRequest( dictionary, attribute_names, - {key_column, range_col}, - {std::make_shared(), range_col_type}, + key_columns, + key_types, result_type, default_cols); } diff --git a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.reference b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.reference new file mode 100644 index 00000000000..dfa00d0027c --- /dev/null +++ b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.reference @@ -0,0 +1,58 @@ +Dictionary not nullable +dictGet +0.2 +0.2 +0.2 +0.2 +0.4 +dictHas +1 +1 +1 +0 +select columns from dictionary +allColumns +2019-05-05 2019-05-20 1 1 0.33 +2019-05-21 2019-05-30 1 1 0.42 +2019-05-21 2019-05-30 2 2 0.46 +noColumns +1 +1 +1 +onlySpecificColumns +1 2019-05-05 0.33 +1 2019-05-21 0.42 +2 2019-05-21 0.46 +onlySpecificColumn +0.33 +0.42 +0.46 +Dictionary nullable +dictGet +0.2 +0.2 +0.2 +0.2 +0.4 +dictHas +1 +1 +1 +0 +select columns from dictionary +allColumns +2019-05-05 2019-05-20 1 1 0.33 +2019-05-21 2019-05-30 1 1 0.42 +2019-05-21 2019-05-30 2 2 \N +noColumns +1 +1 +1 +onlySpecificColumns +1 2019-05-05 0.33 +1 2019-05-21 0.42 +2 2019-05-21 \N +onlySpecificColumn +0.33 +0.42 +\N diff --git a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql new file mode 100644 index 00000000000..ba6ed30b609 --- /dev/null +++ b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql @@ -0,0 +1,109 @@ +DROP TABLE IF EXISTS date_table; +CREATE TABLE date_table +( + CountryID UInt64, + CountryKey String, + StartDate Date, + EndDate Date, + Tax Float64 +) +ENGINE = MergeTree() +ORDER BY CountryID; + +INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), 0.46); + +DROP DICTIONARY IF EXISTS range_dictionary; +CREATE DICTIONARY range_dictionary +( + CountryID UInt64, + CountryKey String, + StartDate Date, + EndDate Date, + Tax Float64 DEFAULT 0.2 +) +PRIMARY KEY CountryID, CountryKey +SOURCE(CLICKHOUSE(TABLE 'date_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_RANGE_HASHED()) +RANGE(MIN StartDate MAX EndDate); + +SELECT 'Dictionary not nullable'; +SELECT 'dictGet'; +SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15')); +SELECT dictGet('range_dictionary', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29')); +SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29')); +SELECT dictGet('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31')); +SELECT dictGetOrDefault('range_dictionary', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4); +SELECT 'dictHas'; +SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-15')); +SELECT dictHas('range_dictionary', (toUInt64(1), '1'), toDate('2019-05-29')); +SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-29')); +SELECT dictHas('range_dictionary', (toUInt64(2), '2'), toDate('2019-05-31')); +SELECT 'select columns from dictionary'; +SELECT 'allColumns'; +SELECT * FROM range_dictionary; +SELECT 'noColumns'; +SELECT 1 FROM range_dictionary; +SELECT 'onlySpecificColumns'; +SELECT CountryID, StartDate, Tax FROM range_dictionary; +SELECT 'onlySpecificColumn'; +SELECT Tax FROM range_dictionary; + +DROP TABLE date_table; +DROP DICTIONARY range_dictionary; + +CREATE TABLE date_table +( + CountryID UInt64, + CountryKey String, + StartDate Date, + EndDate Date, + Tax Nullable(Float64) +) +ENGINE = MergeTree() +ORDER BY CountryID; + +INSERT INTO date_table VALUES(1, '1', toDate('2019-05-05'), toDate('2019-05-20'), 0.33); +INSERT INTO date_table VALUES(1, '1', toDate('2019-05-21'), toDate('2019-05-30'), 0.42); +INSERT INTO date_table VALUES(2, '2', toDate('2019-05-21'), toDate('2019-05-30'), NULL); + +CREATE DICTIONARY range_dictionary_nullable +( + CountryID UInt64, + CountryKey String, + StartDate Date, + EndDate Date, + Tax Nullable(Float64) DEFAULT 0.2 +) +PRIMARY KEY CountryID, CountryKey +SOURCE(CLICKHOUSE(TABLE 'date_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_RANGE_HASHED()) +RANGE(MIN StartDate MAX EndDate); + +SELECT 'Dictionary nullable'; +SELECT 'dictGet'; +SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-15')); +SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(1), '1'), toDate('2019-05-29')); +SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-29')); +SELECT dictGet('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31')); +SELECT dictGetOrDefault('range_dictionary_nullable', 'Tax', (toUInt64(2), '2'), toDate('2019-05-31'), 0.4); +SELECT 'dictHas'; +SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-15')); +SELECT dictHas('range_dictionary_nullable', (toUInt64(1), '1'), toDate('2019-05-29')); +SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-29')); +SELECT dictHas('range_dictionary_nullable', (toUInt64(2), '2'), toDate('2019-05-31')); +SELECT 'select columns from dictionary'; +SELECT 'allColumns'; +SELECT * FROM range_dictionary_nullable; +SELECT 'noColumns'; +SELECT 1 FROM range_dictionary_nullable; +SELECT 'onlySpecificColumns'; +SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable; +SELECT 'onlySpecificColumn'; +SELECT Tax FROM range_dictionary_nullable; + +DROP TABLE date_table; +DROP DICTIONARY range_dictionary_nullable;