mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Updated hash dictionary nullable attribute implementation
This commit is contained in:
parent
5fd575977a
commit
3f273ef983
@ -2,6 +2,8 @@
|
||||
|
||||
#include <ext/size.h>
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
@ -155,37 +157,17 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getColumn(
|
||||
const DictionaryAttribute & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
DefaultValueProvider default_value_provider(dictionary_attribute.null_value, default_values_column);
|
||||
|
||||
/// TODO: Check extractor performance
|
||||
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, temporary_complex_key_arena);
|
||||
const auto & requested_keys = extractor.getKeys();
|
||||
|
||||
auto result_column = dictionary_attribute.type->createColumn();
|
||||
auto result_column = dictionary_attribute.nested_type->createColumn();
|
||||
|
||||
size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
||||
const auto & attribute = attributes[attribute_index];
|
||||
|
||||
size_t requested_keys_size = requested_keys.size();
|
||||
|
||||
if (unlikely(attribute.is_complex_type))
|
||||
{
|
||||
const auto & attribute_container = std::get<ComplexAttributeCollectionType<Field>>(attribute.container);
|
||||
|
||||
Field row_value_to_insert;
|
||||
|
||||
for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
|
||||
{
|
||||
auto & requested_key = requested_keys[requested_key_index];
|
||||
auto it = attribute_container.find(requested_key);
|
||||
|
||||
if (it != attribute_container.end())
|
||||
row_value_to_insert = getValueFromCell(it);
|
||||
else
|
||||
row_value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
|
||||
|
||||
result_column->insert(row_value_to_insert);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
@ -247,6 +229,19 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getColumn(
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.is_nullable_set)
|
||||
{
|
||||
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(requested_keys_size, false);
|
||||
ColumnUInt8::Container& vec_null_map_to = col_null_map_to->getData();
|
||||
|
||||
for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
|
||||
{
|
||||
auto key = requested_keys[requested_key_index];
|
||||
vec_null_map_to[requested_key_index] = (attribute.is_nullable_set->find(key) != nullptr);
|
||||
}
|
||||
|
||||
result_column = ColumnNullable::create(std::move(result_column), std::move(col_null_map_to));
|
||||
}
|
||||
|
||||
query_count.fetch_add(requested_keys.size(), std::memory_order_relaxed);
|
||||
@ -260,6 +255,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
|
||||
if (dictionary_key_type == DictionaryKeyType::complex)
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
/// TODO: Check performance of extractor
|
||||
Arena complex_keys_arena;
|
||||
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, complex_keys_arena);
|
||||
|
||||
@ -275,14 +271,18 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Contaiiner
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
getAttributeContainer(0, [&](const auto & container)
|
||||
{
|
||||
for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
|
||||
{
|
||||
const auto & requested_key = keys[requested_key_index];
|
||||
|
||||
out[requested_key_index] = container.find(requested_key) != container.end();
|
||||
|
||||
if (unlikely(attribute.is_nullable_set) && !out[requested_key_index])
|
||||
out[requested_key_index] = attribute.is_nullable_set->find(requested_key) != nullptr;
|
||||
}
|
||||
});
|
||||
|
||||
@ -416,26 +416,17 @@ void HashedDictionary<dictionary_key_type, sparse>::createAttributes()
|
||||
|
||||
for (const auto & dictionary_attribute : dict_struct.attributes)
|
||||
{
|
||||
bool is_complex_type = dictionary_attribute.is_nullable || dictionary_attribute.is_array;
|
||||
|
||||
auto type_call = [&, this](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
auto is_nullable_set = dictionary_attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
|
||||
std::unique_ptr<Arena> string_arena = std::is_same_v<AttributeType, String> ? std::make_unique<Arena>() : nullptr;
|
||||
|
||||
if (is_complex_type)
|
||||
{
|
||||
Attribute attribute{dictionary_attribute.underlying_type, is_complex_type, ComplexAttributeCollectionType<Field>(), std::move(string_arena)};
|
||||
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_set), CollectionType<ValueType>(), std::move(string_arena)};
|
||||
attributes.emplace_back(std::move(attribute));
|
||||
}
|
||||
else
|
||||
{
|
||||
Attribute attribute{dictionary_attribute.underlying_type, is_complex_type, CollectionType<ValueType>(), std::move(string_arena)};
|
||||
attributes.emplace_back(std::move(attribute));
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
||||
@ -567,6 +558,9 @@ void HashedDictionary<dictionary_key_type, sparse>::blockToAttributes(const Bloc
|
||||
auto key = keys_extracted_from_block[key_index];
|
||||
auto it = container.find(key);
|
||||
|
||||
if (attribute.is_nullable_set && (attribute.is_nullable_set->find(key) != nullptr))
|
||||
continue;
|
||||
|
||||
if (it != container.end())
|
||||
continue;
|
||||
|
||||
@ -575,11 +569,13 @@ void HashedDictionary<dictionary_key_type, sparse>::blockToAttributes(const Bloc
|
||||
|
||||
attribute_column.get(key_index, column_value_to_insert);
|
||||
|
||||
if constexpr (std::is_same_v<AttributeValueType, Field>)
|
||||
if (attribute.is_nullable_set && column_value_to_insert.isNull())
|
||||
{
|
||||
container.insert({key, column_value_to_insert});
|
||||
attribute.is_nullable_set->insert(key);
|
||||
continue;
|
||||
}
|
||||
else if constexpr (std::is_same_v<AttributeValueType, StringRef>)
|
||||
|
||||
if constexpr (std::is_same_v<AttributeValueType, StringRef>)
|
||||
{
|
||||
String & value_to_insert = column_value_to_insert.get<String>();
|
||||
size_t value_to_insert_size = value_to_insert.size();
|
||||
@ -696,6 +692,9 @@ BlockInputStreamPtr HashedDictionary<dictionary_key_type, sparse>::getBlockInput
|
||||
PaddedPODArray<HashedDictionary::KeyType> keys;
|
||||
|
||||
if (!attributes.empty())
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
getAttributeContainer(0, [&](auto & container)
|
||||
{
|
||||
keys.reserve(container.size());
|
||||
@ -705,7 +704,17 @@ BlockInputStreamPtr HashedDictionary<dictionary_key_type, sparse>::getBlockInput
|
||||
(void)(value);
|
||||
keys.emplace_back(key);
|
||||
}
|
||||
|
||||
if (attribute.is_nullable_set)
|
||||
{
|
||||
const auto & is_nullable_set = *attribute.is_nullable_set;
|
||||
keys.reserve(is_nullable_set.size());
|
||||
|
||||
for (auto & node : is_nullable_set)
|
||||
keys.emplace_back(node.getKey());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
|
||||
return std::make_shared<DictionaryBlockInputStream>(shared_from_this(), max_block_size, std::move(keys), column_names);
|
||||
@ -721,13 +730,6 @@ void HashedDictionary<dictionary_key_type, sparse>::getAttributeContainer(size_t
|
||||
|
||||
auto & attribute = attributes[attribute_index];
|
||||
|
||||
if (unlikely(attribute.is_complex_type))
|
||||
{
|
||||
auto & attribute_container = std::get<ComplexAttributeCollectionType<Field>>(attribute.container);
|
||||
std::forward<GetContainerFunc>(get_container_func)(attribute_container);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
@ -739,7 +741,6 @@ void HashedDictionary<dictionary_key_type, sparse>::getAttributeContainer(size_t
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse>
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <optional>
|
||||
|
||||
#include <sparsehash/sparse_hash_map>
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
@ -116,12 +115,6 @@ private:
|
||||
HashMap<UInt64, Value>,
|
||||
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
|
||||
|
||||
template <typename Value>
|
||||
using ComplexAttributeCollectionTypeNonSparse = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::simple,
|
||||
absl::flat_hash_map<UInt64, Value, DefaultHash<UInt64>>,
|
||||
absl::flat_hash_map<StringRef, Value, DefaultHash<StringRef>>>;
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
template <typename Key, typename Value>
|
||||
using SparseHashMap = google::sparse_hash_map<Key, Value, DefaultHash<Key>>;
|
||||
@ -139,16 +132,13 @@ private:
|
||||
template <typename Value>
|
||||
using CollectionType = std::conditional_t<sparse, CollectionTypeSparse<Value>, CollectionTypeNonSparse<Value>>;
|
||||
|
||||
template <typename Value>
|
||||
using ComplexAttributeCollectionType = std::conditional_t<
|
||||
sparse,
|
||||
CollectionTypeSparse<Value>,
|
||||
ComplexAttributeCollectionTypeNonSparse<Value>>;
|
||||
using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
bool is_complex_type;
|
||||
std::optional<NullableSet> is_nullable_set;
|
||||
|
||||
std::variant<
|
||||
CollectionType<UInt8>,
|
||||
CollectionType<UInt16>,
|
||||
@ -164,10 +154,11 @@ private:
|
||||
CollectionType<Decimal128>,
|
||||
CollectionType<Float32>,
|
||||
CollectionType<Float64>,
|
||||
CollectionType<StringRef>,
|
||||
ComplexAttributeCollectionType<Field>>
|
||||
CollectionType<StringRef>>
|
||||
container;
|
||||
|
||||
std::unique_ptr<Arena> string_arena;
|
||||
|
||||
};
|
||||
|
||||
void createAttributes();
|
||||
|
@ -51,21 +51,21 @@ Get is in hierarchy
|
||||
0
|
||||
Get children
|
||||
[1]
|
||||
[2,3]
|
||||
[3,2]
|
||||
[4]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
Get all descendants
|
||||
[1,2,3,4]
|
||||
[2,3,4]
|
||||
[1,3,2,4]
|
||||
[3,2,4]
|
||||
[4]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
Get descendants at first level
|
||||
[1]
|
||||
[2,3]
|
||||
[3,2]
|
||||
[4]
|
||||
[]
|
||||
[]
|
||||
|
@ -1,95 +1,95 @@
|
||||
DROP DATABASE IF EXISTS 01778_db;
|
||||
CREATE DATABASE 01778_db;
|
||||
|
||||
CREATE TABLE 01778_db.simple_key_hierarchy_source_table (id UInt64, parent_id UInt64) ENGINE = TinyLog;
|
||||
INSERT INTO 01778_db.simple_key_hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2);
|
||||
CREATE TABLE 01778_db.hierarchy_source_table (id UInt64, parent_id UInt64) ENGINE = TinyLog;
|
||||
INSERT INTO 01778_db.hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2);
|
||||
|
||||
CREATE DICTIONARY 01778_db.simple_key_hierarchy_flat_dictionary
|
||||
CREATE DICTIONARY 01778_db.hierarchy_flat_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id UInt64 HIERARCHICAL
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_source_table' DB '01778_db'))
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hierarchy_source_table' DB '01778_db'))
|
||||
LAYOUT(FLAT())
|
||||
LIFETIME(MIN 1 MAX 1000);
|
||||
|
||||
SELECT 'Flat dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('01778_db.simple_key_hierarchy_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetHierarchy('01778_db.hierarchy_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('01778_db.simple_key_hierarchy_flat_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictIsIn('01778_db.hierarchy_flat_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get children';
|
||||
SELECT dictGetChildren('01778_db.simple_key_hierarchy_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetChildren('01778_db.hierarchy_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get all descendants';
|
||||
SELECT dictGetDescendants('01778_db.simple_key_hierarchy_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetDescendants('01778_db.hierarchy_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get descendants at first level';
|
||||
SELECT dictGetDescendants('01778_db.simple_key_hierarchy_flat_dictionary', number, 1) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetDescendants('01778_db.hierarchy_flat_dictionary', number, 1) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY 01778_db.simple_key_hierarchy_flat_dictionary;
|
||||
DROP DICTIONARY 01778_db.hierarchy_flat_dictionary;
|
||||
|
||||
CREATE DICTIONARY 01778_db.simple_key_hierarchy_hashed_dictionary
|
||||
CREATE DICTIONARY 01778_db.hierarchy_hashed_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id UInt64 HIERARCHICAL
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_source_table' DB '01778_db'))
|
||||
LAYOUT(FLAT())
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hierarchy_source_table' DB '01778_db'))
|
||||
LAYOUT(HASHED())
|
||||
LIFETIME(MIN 1 MAX 1000);
|
||||
|
||||
SELECT 'Hashed dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('01778_db.simple_key_hierarchy_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetHierarchy('01778_db.hierarchy_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('01778_db.simple_key_hierarchy_hashed_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictIsIn('01778_db.hierarchy_hashed_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get children';
|
||||
SELECT dictGetChildren('01778_db.simple_key_hierarchy_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetChildren('01778_db.hierarchy_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get all descendants';
|
||||
SELECT dictGetDescendants('01778_db.simple_key_hierarchy_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetDescendants('01778_db.hierarchy_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get descendants at first level';
|
||||
SELECT dictGetDescendants('01778_db.simple_key_hierarchy_hashed_dictionary', number, 1) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetDescendants('01778_db.hierarchy_hashed_dictionary', number, 1) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY 01778_db.simple_key_hierarchy_hashed_dictionary;
|
||||
DROP DICTIONARY 01778_db.hierarchy_hashed_dictionary;
|
||||
|
||||
CREATE DICTIONARY 01778_db.simple_key_hierarchy_cache_dictionary
|
||||
CREATE DICTIONARY 01778_db.hierarchy_cache_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id UInt64 HIERARCHICAL
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_source_table' DB '01778_db'))
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hierarchy_source_table' DB '01778_db'))
|
||||
LAYOUT(CACHE(SIZE_IN_CELLS 10))
|
||||
LIFETIME(MIN 1 MAX 1000);
|
||||
|
||||
SELECT 'Cache dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('01778_db.simple_key_hierarchy_cache_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetHierarchy('01778_db.hierarchy_cache_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('01778_db.simple_key_hierarchy_cache_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictIsIn('01778_db.hierarchy_cache_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY 01778_db.simple_key_hierarchy_cache_dictionary;
|
||||
DROP DICTIONARY 01778_db.hierarchy_cache_dictionary;
|
||||
|
||||
CREATE DICTIONARY 01778_db.simple_key_hierarchy_direct_dictionary
|
||||
CREATE DICTIONARY 01778_db.hierarchy_direct_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id UInt64 HIERARCHICAL
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_source_table' DB '01778_db'))
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hierarchy_source_table' DB '01778_db'))
|
||||
LAYOUT(DIRECT());
|
||||
|
||||
SELECT 'Direct dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('01778_db.simple_key_hierarchy_direct_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictGetHierarchy('01778_db.hierarchy_direct_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('01778_db.simple_key_hierarchy_direct_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
SELECT dictIsIn('01778_db.hierarchy_direct_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY 01778_db.simple_key_hierarchy_direct_dictionary;
|
||||
DROP DICTIONARY 01778_db.hierarchy_direct_dictionary;
|
||||
|
||||
DROP TABLE 01778_db.simple_key_hierarchy_source_table;
|
||||
DROP TABLE 01778_db.hierarchy_source_table;
|
||||
DROP DATABASE 01778_db;
|
||||
|
Loading…
Reference in New Issue
Block a user