Added Nullable support for HashedDictionary

This commit is contained in:
Maksim Kita 2021-01-03 13:07:21 +03:00 committed by Maksim Kita
parent 7ded8be0d5
commit 3e2d615e62
5 changed files with 85 additions and 55 deletions

View File

@ -70,6 +70,8 @@ AttributeUnderlyingType getAttributeUnderlyingType(const DataTypePtr & type)
case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32; case TypeIndex::DateTime: return AttributeUnderlyingType::utUInt32;
case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64; case TypeIndex::DateTime64: return AttributeUnderlyingType::utUInt64;
case TypeIndex::UUID: return AttributeUnderlyingType::utUInt128;
case TypeIndex::String: return AttributeUnderlyingType::utString; case TypeIndex::String: return AttributeUnderlyingType::utString;
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries. // Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.

View File

@ -235,13 +235,12 @@ ColumnPtr FlatDictionary::getColumn(
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
/// TODO: Fix
if (attribute.is_nullable) if (attribute.is_nullable)
{ {
for (size_t row = 0; row < ids.size(); ++row) for (size_t row = 0; row < ids.size(); ++row)
{ {
auto id = ids[row]; auto id = ids[row];
if (attribute.nullable_set->find(id) != attribute.nullable_set->end()) if (attribute.nullable_set->find(id) != nullptr)
{ {
(*vec_null_map_to)[row] = true; (*vec_null_map_to)[row] = true;
} }
@ -542,11 +541,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
} }
else else
{ {
auto find_iter = attribute.nullable_set->find(id); attribute.nullable_set->erase(id);
if (find_iter != attribute.nullable_set->end())
{
attribute.nullable_set->erase(find_iter);
}
} }
} }

View File

@ -4,6 +4,7 @@
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <optional> #include <optional>
#include <Common/HashTable/HashSet.h>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
@ -86,7 +87,7 @@ private:
template <typename Value> template <typename Value>
using ContainerType = PaddedPODArray<Value>; using ContainerType = PaddedPODArray<Value>;
using NullableSet = std::set<size_t>; using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final struct Attribute final
{ {

View File

@ -6,6 +6,7 @@
#include <Core/Defines.h> #include <Core/Defines.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
namespace namespace
{ {
@ -139,8 +140,18 @@ ColumnPtr HashedDictionary::getColumn(
PaddedPODArray<Key> backup_storage; PaddedPODArray<Key> backup_storage;
const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage); const auto & ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
auto size = ids.size();
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable)
{
col_null_map_to = ColumnUInt8::create(size, false);
vec_null_map_to = &col_null_map_to->getData();
}
/// TODO: Check that attribute type is same as result type /// TODO: Check that attribute type is same as result type
/// TODO: Check if const will work as expected /// TODO: Check if const will work as expected
@ -149,8 +160,6 @@ ColumnPtr HashedDictionary::getColumn(
using Type = std::decay_t<decltype(dictionary_attribute_type)>; using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
auto size = ids.size();
if constexpr (std::is_same_v<AttributeType, String>) if constexpr (std::is_same_v<AttributeType, String>)
{ {
auto column_string = ColumnString::create(); auto column_string = ColumnString::create();
@ -249,6 +258,20 @@ ColumnPtr HashedDictionary::getColumn(
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
if (attribute.is_nullable)
{
for (size_t row = 0; row < ids.size(); ++row)
{
auto id = ids[row];
if (attribute.nullable_set->find(id) != nullptr)
{
(*vec_null_map_to)[row] = true;
}
}
result = ColumnNullable::create(result, std::move(col_null_map_to));
}
return result; return result;
} }
@ -286,7 +309,7 @@ void HashedDictionary::createAttributes()
for (const auto & attribute : dict_struct.attributes) for (const auto & attribute : dict_struct.attributes)
{ {
attribute_index_by_name.emplace(attribute.name, attributes.size()); attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); attributes.push_back(createAttribute(attribute, attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
{ {
@ -549,9 +572,10 @@ void HashedDictionary::createAttributeImpl<String>(Attribute & attribute, const
attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>(); attribute.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
} }
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) HashedDictionary::Attribute HashedDictionary::createAttribute(const DictionaryAttribute& attribute, const Field & null_value)
{ {
Attribute attr{type, {}, {}, {}, {}}; auto nullable_set = attribute.is_nullable ? std::make_unique<NullableSet>() : nullptr;
Attribute attr{attribute.underlying_type, attribute.is_nullable, std::move(nullable_set), {}, {}, {}, {}};
auto type_call = [&](const auto &dictionary_attribute_type) auto type_call = [&](const auto &dictionary_attribute_type)
{ {
@ -560,7 +584,7 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr
createAttributeImpl<AttributeType>(attr, null_value); createAttributeImpl<AttributeType>(attr, null_value);
}; };
callOnDictionaryAttributeType(type, type_call); callOnDictionaryAttributeType(attribute.underlying_type, type_call);
return attr; return attr;
} }
@ -605,58 +629,51 @@ bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id
} }
} }
template <>
bool HashedDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String value)
{
const auto * string_in_arena = attribute.string_arena->insert(value.data(), value.size());
if (!sparse)
{
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
return map.insert({id, StringRef{string_in_arena, value.size()}}).second;
}
else
{
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
return map.insert({id, StringRef{string_in_arena, value.size()}}).second;
}
}
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value) bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
{ {
switch (attribute.type) bool result = false;
auto type_call = [&](const auto &dictionary_attribute_type)
{ {
case AttributeUnderlyingType::utUInt8: using Type = std::decay_t<decltype(dictionary_attribute_type)>;
return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>()); using AttributeType = typename Type::AttributeType;
case AttributeUnderlyingType::utUInt16:
return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt32:
return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt64:
return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::utUInt128:
return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
case AttributeUnderlyingType::utInt8:
return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt16:
return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt32:
return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utInt64:
return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
case AttributeUnderlyingType::utFloat32:
return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
case AttributeUnderlyingType::utFloat64:
return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
case AttributeUnderlyingType::utDecimal32: if (attribute.is_nullable)
return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
case AttributeUnderlyingType::utDecimal64:
return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
case AttributeUnderlyingType::utDecimal128:
return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
case AttributeUnderlyingType::utString:
{ {
const auto & string = value.get<String>(); if (value.isNull())
const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
if (!sparse)
{ {
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps); attribute.nullable_set->insert(id);
return map.insert({id, StringRef{string_in_arena, string.size()}}).second; result = true;
return;
} }
else else
{ {
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps); attribute.nullable_set->erase(id);
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
} }
} }
}
throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS}; result = setAttributeValueImpl<AttributeType>(attribute, id, value.get<NearestFieldType<AttributeType>>());
};
callOnDictionaryAttributeType(attribute.type, type_call);
return result;
} }
const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
@ -717,7 +734,16 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
{ {
using Type = std::decay_t<decltype(dictionary_attribute_type)>; using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
/// TODO: Check if order is satisfied
result = getIds<AttributeType>(attribute); result = getIds<AttributeType>(attribute);
if (attribute.is_nullable)
{
for (const auto& value: *attribute.nullable_set)
{
result.push_back(value.getKey());
}
}
}; };
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);

View File

@ -7,6 +7,7 @@
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Core/Block.h> #include <Core/Block.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <sparsehash/sparse_hash_map> #include <sparsehash/sparse_hash_map>
#include <ext/range.h> #include <ext/range.h>
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
@ -101,9 +102,14 @@ private:
template <typename Value> template <typename Value>
using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>; using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
using NullableSet = HashSet<Key, DefaultHash<Key>>;
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
bool is_nullable;
std::unique_ptr<NullableSet> nullable_set;
std::variant< std::variant<
UInt8, UInt8,
UInt16, UInt16,
@ -174,7 +180,7 @@ private:
template <typename T> template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value); void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); Attribute createAttribute(const DictionaryAttribute& attribute, const Field & null_value);
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
void getItemsAttrImpl( void getItemsAttrImpl(