fix nullable case of short circuit in hashed dict

This commit is contained in:
jsc0218 2024-01-24 14:58:27 +00:00
parent a00ce2f2cc
commit 0e178cfab7
3 changed files with 40 additions and 15 deletions

View File

@ -251,11 +251,12 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, typename ValueSetter>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename NullSetter>
size_t getItemsShortCircuitImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value,
NullSetter && set_null,
IColumn::Filter & default_mask) const;
template <typename GetContainersFunc>
@ -538,10 +539,11 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
{
auto * out = column.get();
keys_found = getItemsShortCircuitImpl<ValueType>(
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
extractor,
[&](const size_t, const Array & value, bool) { out->insert(value); },
[&](const size_t, const Array & value) { out->insert(value); },
[&](size_t) {},
default_mask);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
@ -549,20 +551,28 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
auto * out = column.get();
if (is_attribute_nullable)
keys_found = getItemsShortCircuitImpl<ValueType>(
{
keys_found = getItemsShortCircuitImpl<ValueType, true>(
attribute,
extractor,
[&](size_t row, StringRef value, bool is_null)
[&](size_t row, StringRef value)
{
(*vec_null_map_to)[row] = is_null;
(*vec_null_map_to)[row] = false;
out->insertData(value.data, value.size);
},
[&](size_t row)
{
(*vec_null_map_to)[row] = true;
out->insertDefault();
},
default_mask);
}
else
keys_found = getItemsShortCircuitImpl<ValueType>(
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
extractor,
[&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
[&](size_t, StringRef value) { out->insertData(value.data, value.size); },
[&](size_t) {},
default_mask);
}
else
@ -570,20 +580,22 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
auto & out = column->getData();
if (is_attribute_nullable)
keys_found = getItemsShortCircuitImpl<ValueType>(
keys_found = getItemsShortCircuitImpl<ValueType, true>(
attribute,
extractor,
[&](size_t row, const auto value, bool is_null)
[&](size_t row, const auto value)
{
(*vec_null_map_to)[row] = is_null;
(*vec_null_map_to)[row] = false;
out[row] = value;
},
[&](size_t row) { (*vec_null_map_to)[row] = true; },
default_mask);
else
keys_found = getItemsShortCircuitImpl<ValueType>(
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
extractor,
[&](size_t row, const auto value, bool) { out[row] = value; },
[&](size_t row, const auto value) { out[row] = value; },
[&](size_t) {},
default_mask);
out.resize(keys_found);
@ -1143,11 +1155,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename AttributeType, typename ValueSetter>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename NullSetter>
size_t HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsShortCircuitImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value,
NullSetter && set_null,
IColumn::Filter & default_mask) const
{
const auto & attribute_containers = std::get<CollectionsHolder<AttributeType>>(attribute.containers);
@ -1165,7 +1178,15 @@ size_t HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsShortCirc
if (it != container.end())
{
set_value(key_index, getValueFromCell(it), false);
set_value(key_index, getValueFromCell(it));
default_mask[key_index] = 1;
++keys_found;
}
// Need to consider items in is_nullable_sets as well, see blockToAttributes()
else if (is_nullable && (*attribute.is_nullable_sets)[shard].find(key) != nullptr)
{
set_null(key_index);
default_mask[key_index] = 1;
++keys_found;

View File

@ -12,6 +12,8 @@ Hashed dictionary
\N
1
\N
\N
\N
Hashed array dictionary
('zero','zero')
('zero','zero')

View File

@ -54,6 +54,8 @@ SELECT dictGetOrDefault('hashed_dictionary', 'v2', id+1, intDiv(NULL, id))
FROM dictionary_source_table;
SELECT dictGetOrDefault('hashed_dictionary', 'v3', id+1, intDiv(NULL, id))
FROM dictionary_source_table;
SELECT dictGetOrDefault('hashed_dictionary', 'v2', 1, intDiv(1, id))
FROM dictionary_source_table;
DROP DICTIONARY hashed_dictionary;