From 3e2d615e6245f735fcd19bed6f5e32baa22982fa Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 3 Jan 2021 13:07:21 +0300 Subject: [PATCH] Added Nullable support for HashedDictionary --- src/Dictionaries/DictionaryStructure.cpp | 2 + src/Dictionaries/FlatDictionary.cpp | 9 +- src/Dictionaries/FlatDictionary.h | 3 +- src/Dictionaries/HashedDictionary.cpp | 118 ++++++++++++++--------- src/Dictionaries/HashedDictionary.h | 8 +- 5 files changed, 85 insertions(+), 55 deletions(-) diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 3ccfdd49a27..df785bcb550 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -70,6 +70,8 @@ AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type) case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32; case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64; + case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128; + case TypeIndex::String: return AttributeUnderlyingType::utString; // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries. diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index a7346fb0ad7..9cbbbbd4f83 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -235,13 +235,12 @@ ColumnPtr FlatDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); - /// TODO: Fix if (attribute.is_nullable) { for (size_t row = 0; row < ids.size(); ++row) { auto id = ids[row]; - if (attribute.nullable_set->find(id) != attribute.nullable_set->end()) + if (attribute.nullable_set->find(id) != nullptr) { (*vec_null_map_to)[row] = true; } @@ -542,11 +541,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons } else { - auto find_iter = attribute.nullable_set->find(id); - if (find_iter != attribute.nullable_set->end()) - { - attribute.nullable_set->erase(find_iter); - } + attribute.nullable_set->erase(id); } } diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 1796b721129..8de90ca0c15 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -86,7 +87,7 @@ private: template using ContainerType = PaddedPODArray; - using NullableSet = std::set; + using NullableSet = HashSet>; struct Attribute final { diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 7cb116c7bc9..133d9c0db99 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace { @@ -139,8 +140,18 @@ ColumnPtr HashedDictionary::getColumn( PaddedPODArray backup_storage; const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage); + auto size = ids.size(); + const auto & attribute = getAttribute(attribute_name); + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + if (attribute.is_nullable) + { + col_null_map_to = ColumnUInt8::create(size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + /// TODO: Check that attribute type is same as result type /// TODO: Check if const will work as expected @@ -149,8 +160,6 @@ ColumnPtr HashedDictionary::getColumn( using Type = std::decay_t; using AttributeType = typename Type::AttributeType; - auto size = ids.size(); - if constexpr (std::is_same_v) { auto column_string = ColumnString::create(); @@ -249,6 +258,20 @@ ColumnPtr HashedDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); + if (attribute.is_nullable) + { + for (size_t row = 0; row < ids.size(); ++row) + { + auto id = ids[row]; + if (attribute.nullable_set->find(id) != nullptr) + { + (*vec_null_map_to)[row] = true; + } + } + + result = ColumnNullable::create(result, std::move(col_null_map_to)); + } + return result; } @@ -286,7 +309,7 @@ void HashedDictionary::createAttributes() for (const auto & attribute : dict_struct.attributes) { attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); + attributes.push_back(createAttribute(attribute, attribute.null_value)); if (attribute.hierarchical) { @@ -549,9 +572,10 @@ void HashedDictionary::createAttributeImpl(Attribute & attribute, const attribute.sparse_maps = std::make_unique>(); } -HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value) { - Attribute attr{type, {}, {}, {}, {}}; + auto nullable_set = attribute.is_nullable ? std::make_unique() : nullptr; + Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}, {}}; auto type_call = [&](const auto &dictionary_attribute_type) { @@ -560,7 +584,7 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr createAttributeImpl(attr, null_value); }; - callOnDictionaryAttributeType(type, type_call); + callOnDictionaryAttributeType(attribute.underlying_type, type_call); return attr; } @@ -605,58 +629,51 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id } } +template <> +bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const String value) +{ + const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size()); + if (!sparse) + { + auto & map = *std::get>(attribute.maps); + return map.insert({id, StringRef{string_in_arena, value.size()}}).second; + } + else + { + auto & map = *std::get>(attribute.sparse_maps); + return map.insert({id, StringRef{string_in_arena, value.size()}}).second; + } +} + bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value) { - switch (attribute.type) + bool result = false; + + auto type_call = [&](const auto &dictionary_attribute_type) { - case AttributeUnderlyingType::utUInt8: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utUInt16: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utUInt32: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utUInt64: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utUInt128: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utInt8: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utInt16: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utInt32: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utInt64: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utFloat32: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utFloat64: - return setAttributeValueImpl(attribute, id, value.get()); + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; - case AttributeUnderlyingType::utDecimal32: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utDecimal64: - return setAttributeValueImpl(attribute, id, value.get()); - case AttributeUnderlyingType::utDecimal128: - return setAttributeValueImpl(attribute, id, value.get()); - - case AttributeUnderlyingType::utString: + if (attribute.is_nullable) { - const auto & string = value.get(); - const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size()); - if (!sparse) + if (value.isNull()) { - auto & map = *std::get>(attribute.maps); - return map.insert({id, StringRef{string_in_arena, string.size()}}).second; + attribute.nullable_set->insert(id); + result = true; + return; } else { - auto & map = *std::get>(attribute.sparse_maps); - return map.insert({id, StringRef{string_in_arena, string.size()}}).second; + attribute.nullable_set->erase(id); } } - } - throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS}; + result = setAttributeValueImpl(attribute, id, value.get>()); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + return result; } const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const @@ -717,7 +734,16 @@ PaddedPODArray HashedDictionary::getIds() const { using Type = std::decay_t; using AttributeType = typename Type::AttributeType; + /// TODO: Check if order is satisfied result = getIds(attribute); + + if (attribute.is_nullable) + { + for (const auto& value: *attribute.nullable_set) + { + result.push_back(value.getKey()); + } + } }; callOnDictionaryAttributeType(attribute.type, type_call); diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index ff64fb29f1f..0f718c8132b 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include "DictionaryStructure.h" @@ -101,9 +102,14 @@ private: template using SparseCollectionPtrType = std::unique_ptr>; + using NullableSet = HashSet>; + struct Attribute final { AttributeUnderlyingType type; + bool is_nullable; + std::unique_ptr nullable_set; + std::variant< UInt8, UInt16, @@ -174,7 +180,7 @@ private: template void createAttributeImpl(Attribute & attribute, const Field & null_value); - Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value); template void getItemsAttrImpl(