#pragma once #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; } namespace ColumnsHashing { /// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. /// Is used for caching. class HashMethodContext { public: virtual ~HashMethodContext() = default; struct Settings { size_t max_threads; }; }; using HashMethodContextPtr = std::shared_ptr; namespace columns_hashing_impl { template struct LastElementCache { static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; Value value; bool empty = true; bool found = false; bool check(const Value & value_) { return !empty && value == value_; } template bool check(const Key & key) { return !empty && value.first == key; } }; template struct LastElementCache { static constexpr bool consecutive_keys_optimization = false; }; template class EmplaceResultImpl { Mapped & value; Mapped & cached_value; bool inserted; public: EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_) : value(value_), cached_value(cached_value_), inserted(inserted_) {} bool isInserted() const { return inserted; } auto & getMapped() const { return value; } void setMapped(const Mapped & mapped) { cached_value = mapped; value = mapped; } }; template <> class EmplaceResultImpl { bool inserted; public: explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {} bool isInserted() const { return inserted; } }; /// FindResult optionally may contain pointer to value and offset in hashtable buffer. /// Only bool found is required. /// So we will have 4 different specializations for FindResultImpl class FindResultImplBase { bool found; public: explicit FindResultImplBase(bool found_) : found(found_) {} bool isFound() const { return found; } }; template class FindResultImplOffsetBase { public: constexpr static bool has_offset = need_offset; explicit FindResultImplOffsetBase(size_t /* off */) {} }; template <> class FindResultImplOffsetBase { size_t offset; public: constexpr static bool has_offset = true; explicit FindResultImplOffsetBase(size_t off) : offset(off) {} ALWAYS_INLINE size_t getOffset() const { return offset; } }; template class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase { Mapped * value; public: FindResultImpl() : FindResultImplBase(false), FindResultImplOffsetBase(0) // NOLINT(clang-analyzer-optin.cplusplus.UninitializedObject) intentionally allow uninitialized value here {} FindResultImpl(Mapped * value_, bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase(off), value(value_) {} Mapped & getMapped() const { return *value; } }; template class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBase { public: FindResultImpl(bool found_, size_t off) : FindResultImplBase(found_), FindResultImplOffsetBase(off) {} }; template class HashMethodBase { public: using EmplaceResult = EmplaceResultImpl; using FindResult = FindResultImpl; static constexpr bool has_mapped = !std::is_same_v; using Cache = LastElementCache; static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } template ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) { auto key_holder = static_cast(*this).getKeyHolder(row, pool); return emplaceImpl(key_holder, data); } template ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) { auto key_holder = static_cast(*this).getKeyHolder(row, pool); return findKeyImpl(keyHolderGetKey(key_holder), data); } template ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) { auto key_holder = static_cast(*this).getKeyHolder(row, pool); return data.hash(keyHolderGetKey(key_holder)); } protected: Cache cache; HashMethodBase() { if constexpr (consecutive_keys_optimization) { if constexpr (has_mapped) { /// Init PairNoInit elements. cache.value.second = Mapped(); cache.value.first = {}; } else cache.value = Value(); } } template ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) { if constexpr (Cache::consecutive_keys_optimization) { if (cache.found && cache.check(keyHolderGetKey(key_holder))) { if constexpr (has_mapped) return EmplaceResult(cache.value.second, cache.value.second, false); else return EmplaceResult(false); } } typename Data::LookupResult it; bool inserted = false; data.emplace(key_holder, it, inserted); [[maybe_unused]] Mapped * cached = nullptr; if constexpr (has_mapped) cached = &it->getMapped(); if (inserted) { if constexpr (has_mapped) { new (&it->getMapped()) Mapped(); } } if constexpr (consecutive_keys_optimization) { cache.found = true; cache.empty = false; if constexpr (has_mapped) { cache.value.first = it->getKey(); cache.value.second = it->getMapped(); cached = &cache.value.second; } else { cache.value = it->getKey(); } } if constexpr (has_mapped) return EmplaceResult(it->getMapped(), *cached, inserted); else return EmplaceResult(inserted); } template ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) { if constexpr (Cache::consecutive_keys_optimization) { /// It's possible to support such combination, but code will became more complex. /// Now there's not place where we need this options enabled together static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options"); if (cache.check(key)) { if constexpr (has_mapped) return FindResult(&cache.value.second, cache.found, 0); else return FindResult(cache.found, 0); } } auto it = data.find(key); if constexpr (consecutive_keys_optimization) { cache.found = it != nullptr; cache.empty = false; if constexpr (has_mapped) { cache.value.first = key; if (it) { cache.value.second = it->getMapped(); } } else { cache.value = key; } } size_t offset = 0; if constexpr (FindResult::has_offset) { offset = it ? data.offsetInternal(it) : 0; } if constexpr (has_mapped) return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset); else return FindResult(it != nullptr, offset); } }; template struct MappedCache : public PaddedPODArray {}; template <> struct MappedCache {}; /// This class is designed to provide the functionality that is required for /// supporting nullable keys in HashMethodKeysFixed. If there are /// no nullable keys, this class is merely implemented as an empty shell. template class BaseStateKeysFixed; /// Case where nullable keys are supported. template class BaseStateKeysFixed { protected: explicit BaseStateKeysFixed(const ColumnRawPtrs & key_columns) { null_maps.reserve(key_columns.size()); actual_columns.reserve(key_columns.size()); for (const auto & col : key_columns) { if (const auto * nullable_col = checkAndGetColumn(col)) { actual_columns.push_back(&nullable_col->getNestedColumn()); null_maps.push_back(&nullable_col->getNullMapColumn()); } else { actual_columns.push_back(col); null_maps.push_back(nullptr); } } } /// Return the columns which actually contain the values of the keys. /// For a given key column, if it is nullable, we return its nested /// column. Otherwise we return the key column itself. inline const ColumnRawPtrs & getActualColumns() const { return actual_columns; } /// Create a bitmap that indicates whether, for a particular row, /// a key column bears a null value or not. KeysNullMap createBitmap(size_t row) const { KeysNullMap bitmap{}; for (size_t k = 0; k < null_maps.size(); ++k) { if (null_maps[k] != nullptr) { const auto & null_map = assert_cast(*null_maps[k]).getData(); if (null_map[row] == 1) { size_t bucket = k / 8; size_t offset = k % 8; bitmap[bucket] |= UInt8(1) << offset; } } } return bitmap; } private: ColumnRawPtrs actual_columns; ColumnRawPtrs null_maps; }; /// Case where nullable keys are not supported. template class BaseStateKeysFixed { protected: explicit BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {} const ColumnRawPtrs & getActualColumns() const { return actual_columns; } KeysNullMap createBitmap(size_t) const { throw Exception{"Internal error: calling createBitmap() for non-nullable keys" " is forbidden", ErrorCodes::LOGICAL_ERROR}; } private: ColumnRawPtrs actual_columns; }; } } }