#pragma once #include #include #include #include #include #include #include #include #include #include "DictionaryStructure.h" #include "IDictionary.h" #include "IDictionarySource.h" /** This dictionary stores all content in a hash table in memory * (a separate Key -> Value map for each attribute) * Two variants of hash table are supported: a fast HashMap and memory efficient sparse_hash_map. */ namespace DB { using BlockPtr = std::shared_ptr; class HashedDictionary final : public IDictionary { public: HashedDictionary( const std::string & database_, const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, bool require_nonempty_, bool sparse_, BlockPtr saved_block_ = nullptr); const std::string & getDatabase() const override { return database; } const std::string & getName() const override { return name; } const std::string & getFullName() const override { return full_name; } std::string getTypeName() const override { return sparse ? "SparseHashed" : "Hashed"; } size_t getBytesAllocated() const override { return bytes_allocated; } size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } double getHitRate() const override { return 1.0; } size_t getElementCount() const override { return element_count; } double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } std::shared_ptr clone() const override { return std::make_shared(database, name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, sparse, saved_block); } const IDictionarySource * getSource() const override { return source_ptr.get(); } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } const DictionaryStructure & getStructure() const override { return dict_struct; } bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; } bool hasHierarchy() const override { return hierarchical_attribute; } void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) DECLARE(UInt64) DECLARE(UInt128) DECLARE(Int8) DECLARE(Int16) DECLARE(Int32) DECLARE(Int64) DECLARE(Float32) DECLARE(Float64) DECLARE(Decimal32) DECLARE(Decimal64) DECLARE(Decimal128) #undef DECLARE void getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const; #define DECLARE(TYPE) \ void get##TYPE( \ const std::string & attribute_name, \ const PaddedPODArray & ids, \ const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) DECLARE(UInt64) DECLARE(UInt128) DECLARE(Int8) DECLARE(Int16) DECLARE(Int32) DECLARE(Int64) DECLARE(Float32) DECLARE(Float64) DECLARE(Decimal32) DECLARE(Decimal64) DECLARE(Decimal128) #undef DECLARE void getString(const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const; #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE & def, ResultArrayType & out) \ const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) DECLARE(UInt64) DECLARE(UInt128) DECLARE(Int8) DECLARE(Int16) DECLARE(Int32) DECLARE(Int64) DECLARE(Float32) DECLARE(Float64) DECLARE(Decimal32) DECLARE(Decimal64) DECLARE(Decimal128) #undef DECLARE void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; void isInVectorVector( const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: template using CollectionType = HashMap; template using CollectionPtrType = std::unique_ptr>; template using SparseCollectionType = google::sparse_hash_map>; template using SparseCollectionPtrType = std::unique_ptr>; struct Attribute final { AttributeUnderlyingType type; std::variant< UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Decimal32, Decimal64, Decimal128, Float32, Float64, String> null_values; std::variant< CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType, CollectionPtrType> maps; std::variant< SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType, SparseCollectionPtrType> sparse_maps; std::unique_ptr string_arena; }; void createAttributes(); void blockToAttributes(const Block & block); void updateData(); void loadData(); template void addAttributeSize(const Attribute & attribute); void calculateBytesAllocated(); template void createAttributeImpl(Attribute & attribute, const Field & null_value); Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); template void getItemsAttrImpl( const AttrType & attr, const PaddedPODArray & ids, ValueSetter && set_value, DefaultGetter && get_default) const; template void getItemsImpl( const Attribute & attribute, const PaddedPODArray & ids, ValueSetter && set_value, DefaultGetter && get_default) const; template bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value); bool setAttributeValue(Attribute & attribute, const Key id, const Field & value); const Attribute & getAttribute(const std::string & attribute_name) const; template void has(const Attribute & attribute, const PaddedPODArray & ids, PaddedPODArray & out) const; template PaddedPODArray getIdsAttrImpl(const AttrType & attr) const; template PaddedPODArray getIds(const Attribute & attribute) const; PaddedPODArray getIds() const; template void isInAttrImpl(const AttrType & attr, const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; template void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; const std::string database; const std::string name; const std::string full_name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; const bool require_nonempty; const bool sparse; std::map attribute_index_by_name; std::vector attributes; const Attribute * hierarchical_attribute = nullptr; size_t bytes_allocated = 0; size_t element_count = 0; size_t bucket_count = 0; mutable std::atomic query_count{0}; BlockPtr saved_block; }; }