Merge pull request #4128 from yandex/unified-sets

Unified sets
2024-11-25 17:12:03 +00:00 · 2019-02-06 20:40:11 +03:00 · 2019-02-06 20:40:11 +03:00 · 2686f64233
commit 2686f64233
parent 063565265b 9a4c4964b2
15 changed files with 1437 additions and 1485 deletions
--- a/dbms/src/Common/ColumnsHashing.h
+++ b/dbms/src/Common/ColumnsHashing.h
@ -0,0 +1,557 @@
+#pragma once
+
+
+#include <Common/ColumnsHashingImpl.h>
+#include <Common/Arena.h>
+#include <Common/LRUCache.h>
+#include <common/unaligned.h>
+
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnLowCardinality.h>
+
+#include <Core/Defines.h>
+#include <memory>
+
+namespace DB
+{
+
+namespace ColumnsHashing
+{
+
+/// For the case when there is one numeric key.
+/// UInt8/16/32/64 for any type with corresponding bit width.
+template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
+struct HashMethodOneNumber
+    : public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache>
+{
+    using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    const char * vec;
+
+    /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
+    HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+    {
+        vec = key_columns[0]->getRawData().data;
+    }
+
+    /// Creates context. Method is called once and result context is used in all threads.
+    using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr
+
+    /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr.
+    /// Data is a HashTable where to insert key from column's row.
+    /// For Serialized method, key may be placed in pool.
+    using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult
+
+    /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr.
+    using Base::findKey;  /// (Data & data, size_t row, Arena & pool) -> FindResult
+
+    /// Get hash value of row.
+    using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
+
+    /// Is used for default implementation in HashMethodBase.
+    FieldType getKey(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
+
+    /// Get StringRef from value which can be inserted into column.
+    static StringRef getValueRef(const Value & value)
+    {
+        return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
+    }
+};
+
+
+/// For the case when there is one string key.
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+struct HashMethodString
+    : public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
+{
+    using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    const IColumn::Offset * offsets;
+    const UInt8 * chars;
+
+    HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+    {
+        const IColumn & column = *key_columns[0];
+        const ColumnString & column_string = static_cast<const ColumnString &>(column);
+        offsets = column_string.getOffsets().data();
+        chars = column_string.getChars().data();
+    }
+
+    auto getKey(ssize_t row, Arena &) const
+    {
+        return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
+    }
+
+    static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
+
+protected:
+    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
+    {
+        if constexpr (place_string_to_arena)
+        {
+            if (key.size)
+                key.data = pool.insert(key.data, key.size);
+        }
+    }
+};
+
+
+/// For the case when there is one fixed-length string key.
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+struct HashMethodFixedString
+    : public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
+{
+    using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    size_t n;
+    const ColumnFixedString::Chars * chars;
+
+    HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+    {
+        const IColumn & column = *key_columns[0];
+        const ColumnFixedString & column_string = static_cast<const ColumnFixedString &>(column);
+        n = column_string.getN();
+        chars = &column_string.getChars();
+    }
+
+    StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); }
+
+    static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
+
+protected:
+    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
+    {
+        if constexpr (place_string_to_arena)
+            key.data = pool.insert(key.data, key.size);
+    }
+};
+
+
+/// Cache stores dictionaries and saved_hash per dictionary key.
+class LowCardinalityDictionaryCache : public HashMethodContext
+{
+public:
+    /// Will assume that dictionaries with same hash has the same keys.
+    /// Just in case, check that they have also the same size.
+    struct DictionaryKey
+    {
+        UInt128 hash;
+        UInt64 size;
+
+        bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; }
+    };
+
+    struct DictionaryKeyHash
+    {
+        size_t operator()(const DictionaryKey & key) const
+        {
+            SipHash hash;
+            hash.update(key.hash.low);
+            hash.update(key.hash.high);
+            hash.update(key.size);
+            return hash.get64();
+        }
+    };
+
+    struct CachedValues
+    {
+        /// Store ptr to dictionary to be sure it won't be deleted.
+        ColumnPtr dictionary_holder;
+        /// Hashes for dictionary keys.
+        const UInt64 * saved_hash = nullptr;
+    };
+
+    using CachedValuesPtr = std::shared_ptr<CachedValues>;
+
+    explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {}
+
+    CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); }
+    void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); }
+
+private:
+    using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>;
+    Cache cache;
+};
+
+
+/// Single low cardinality column.
+template <typename SingleColumnMethod, typename Mapped, bool use_cache>
+struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
+{
+    using Base = SingleColumnMethod;
+
+    enum class VisitValue
+    {
+        Empty = 0,
+        Found = 1,
+        NotFound = 2,
+    };
+
+    static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
+    using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>;
+    using FindResult = columns_hashing_impl::FindResultImpl<Mapped>;
+
+    static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings)
+    {
+        return std::make_shared<LowCardinalityDictionaryCache>(settings);
+    }
+
+    ColumnRawPtrs key_columns;
+    const IColumn * positions = nullptr;
+    size_t size_of_index_type = 0;
+
+    /// saved hash is from current column or from cache.
+    const UInt64 * saved_hash = nullptr;
+    /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted.
+    ColumnPtr dictionary_holder;
+
+    /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages.
+    columns_hashing_impl::MappedCache<Mapped> mapped_cache;
+    PaddedPODArray<VisitValue> visit_cache;
+
+    /// If initialized column is nullable.
+    bool is_nullable = false;
+
+    static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * low_cardinality_column)
+    {
+        auto column = typeid_cast<const ColumnLowCardinality *>(low_cardinality_column);
+        if (!column)
+            throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. "
+                            "Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR);
+        return *column;
+    }
+
+    HashMethodSingleLowCardinalityColumn(
+        const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context)
+        : Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context)
+    {
+        auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]);
+
+        if (!context)
+            throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn",
+                            ErrorCodes::LOGICAL_ERROR);
+
+        LowCardinalityDictionaryCache * cache;
+        if constexpr (use_cache)
+        {
+            cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get());
+            if (!cache)
+            {
+                const auto & cached_val = *context;
+                throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: "
+                                + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR);
+            }
+        }
+
+        auto * dict = column->getDictionary().getNestedNotNullableColumn().get();
+        is_nullable = column->getDictionary().nestedColumnIsNullable();
+        key_columns = {dict};
+        bool is_shared_dict = column->isSharedDictionary();
+
+        typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key;
+        typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values;
+
+        if (is_shared_dict)
+        {
+            dictionary_key = {column->getDictionary().getHash(), dict->size()};
+            if constexpr (use_cache)
+                cached_values = cache->get(dictionary_key);
+        }
+
+        if (cached_values)
+        {
+            saved_hash = cached_values->saved_hash;
+            dictionary_holder = cached_values->dictionary_holder;
+        }
+        else
+        {
+            saved_hash = column->getDictionary().tryGetSavedHash();
+            dictionary_holder = column->getDictionaryPtr();
+
+            if constexpr (use_cache)
+            {
+                if (is_shared_dict)
+                {
+                    cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>();
+                    cached_values->saved_hash = saved_hash;
+                    cached_values->dictionary_holder = dictionary_holder;
+
+                    cache->set(dictionary_key, cached_values);
+                }
+            }
+        }
+
+        if constexpr (has_mapped)
+            mapped_cache.resize(key_columns[0]->size());
+
+        VisitValue empty(VisitValue::Empty);
+        visit_cache.assign(key_columns[0]->size(), empty);
+
+        size_of_index_type = column->getSizeOfIndexType();
+        positions = column->getIndexesPtr().get();
+    }
+
+    ALWAYS_INLINE size_t getIndexAt(size_t row) const
+    {
+        switch (size_of_index_type)
+        {
+            case sizeof(UInt8): return static_cast<const ColumnUInt8 *>(positions)->getElement(row);
+            case sizeof(UInt16): return static_cast<const ColumnUInt16 *>(positions)->getElement(row);
+            case sizeof(UInt32): return static_cast<const ColumnUInt32 *>(positions)->getElement(row);
+            case sizeof(UInt64): return static_cast<const ColumnUInt64 *>(positions)->getElement(row);
+            default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
+        }
+    }
+
+    /// Get the key from the key columns for insertion into the hash table.
+    ALWAYS_INLINE auto getKey(size_t row, Arena & pool) const
+    {
+        return Base::getKey(getIndexAt(row), pool);
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool)
+    {
+        size_t row = getIndexAt(row_);
+
+        if (is_nullable && row == 0)
+        {
+            visit_cache[row] = VisitValue::Found;
+            bool has_null_key = data.hasNullKeyData();
+            data.hasNullKeyData() = true;
+
+            if constexpr (has_mapped)
+                return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key);
+            else
+                return EmplaceResult(!has_null_key);
+        }
+
+        if (visit_cache[row] == VisitValue::Found)
+        {
+            if constexpr (has_mapped)
+                return EmplaceResult(mapped_cache[row], mapped_cache[row], false);
+            else
+                return EmplaceResult(false);
+        }
+
+        auto key = getKey(row_, pool);
+
+        bool inserted = false;
+        typename Data::iterator it;
+        if (saved_hash)
+            data.emplace(key, it, inserted, saved_hash[row]);
+        else
+            data.emplace(key, it, inserted);
+
+        visit_cache[row] = VisitValue::Found;
+
+        if (inserted)
+        {
+            if constexpr (has_mapped)
+            {
+                new(&it->second) Mapped();
+                Base::onNewKey(it->first, pool);
+            }
+            else
+                Base::onNewKey(*it, pool);
+        }
+
+        if constexpr (has_mapped)
+            return EmplaceResult(it->second, mapped_cache[row], inserted);
+        else
+            return EmplaceResult(inserted);
+    }
+
+    ALWAYS_INLINE bool isNullAt(size_t i)
+    {
+        if (!is_nullable)
+            return false;
+
+        return getIndexAt(i) == 0;
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
+    {
+        size_t row = getIndexAt(row_);
+
+        if (is_nullable && row == 0)
+        {
+            if constexpr (has_mapped)
+                return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
+            else
+                return FindResult(data.hasNullKeyData());
+        }
+
+        if (visit_cache[row] != VisitValue::Empty)
+        {
+            if constexpr (has_mapped)
+                return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
+            else
+                return FindResult(visit_cache[row] == VisitValue::Found);
+        }
+
+        auto key = getKey(row_, pool);
+
+        typename Data::iterator it;
+        if (saved_hash)
+            it = data.find(key, saved_hash[row]);
+        else
+            it = data.find(key);
+
+        bool found = it != data.end();
+        visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
+
+        if constexpr (has_mapped)
+        {
+            if (found)
+                mapped_cache[row] = it->second;
+        }
+
+        if constexpr (has_mapped)
+            return FindResult(&mapped_cache[row], found);
+        else
+            return FindResult(found);
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
+    {
+        row = getIndexAt(row);
+        if (saved_hash)
+            return saved_hash[row];
+
+        return Base::getHash(data, row, pool);
+    }
+};
+
+
+// Optional mask for low cardinality columns.
+template <bool has_low_cardinality>
+struct LowCardinalityKeys
+{
+    ColumnRawPtrs nested_columns;
+    ColumnRawPtrs positions;
+    Sizes position_sizes;
+};
+
+template <>
+struct LowCardinalityKeys<false> {};
+
+/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
+template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
+struct HashMethodKeysFixed
+    : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
+    , public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache>
+{
+    using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>;
+    using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
+
+    static constexpr bool has_nullable_keys = has_nullable_keys_;
+    static constexpr bool has_low_cardinality = has_low_cardinality_;
+
+    LowCardinalityKeys<has_low_cardinality> low_cardinality_keys;
+    Sizes key_sizes;
+    size_t keys_size;
+
+    HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const HashMethodContextPtr &)
+        : Base(key_columns), key_sizes(std::move(key_sizes)), keys_size(key_columns.size())
+    {
+        if constexpr (has_low_cardinality)
+        {
+            low_cardinality_keys.nested_columns.resize(key_columns.size());
+            low_cardinality_keys.positions.assign(key_columns.size(), nullptr);
+            low_cardinality_keys.position_sizes.resize(key_columns.size());
+            for (size_t i = 0; i < key_columns.size(); ++i)
+            {
+                if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
+                {
+                    low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
+                    low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();
+                    low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType();
+                }
+                else
+                    low_cardinality_keys.nested_columns[i] = key_columns[i];
+            }
+        }
+    }
+
+    ALWAYS_INLINE Key getKey(size_t row, Arena &) const
+    {
+        if constexpr (has_nullable_keys)
+        {
+            auto bitmap = Base::createBitmap(row);
+            return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap);
+        }
+        else
+        {
+            if constexpr (has_low_cardinality)
+                return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
+                                            &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
+
+            return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
+        }
+    }
+};
+
+/** Hash by concatenating serialized key values.
+  * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
+  * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
+  * Therefore, when aggregating by several strings, there is no ambiguity.
+  */
+template <typename Value, typename Mapped>
+struct HashMethodSerialized
+    : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false>
+{
+    using Self = HashMethodSerialized<Value, Mapped>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
+
+    ColumnRawPtrs key_columns;
+    size_t keys_size;
+
+    HashMethodSerialized(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
+        : key_columns(key_columns), keys_size(key_columns.size()) {}
+
+protected:
+    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
+
+    ALWAYS_INLINE StringRef getKey(size_t row, Arena & pool) const
+    {
+        return serializeKeysToPoolContiguous(row, keys_size, key_columns, pool);
+    }
+
+    static ALWAYS_INLINE void onExistingKey(StringRef & key, Arena & pool) { pool.rollback(key.size); }
+};
+
+/// For the case when there is one string key.
+template <typename Value, typename Mapped, bool use_cache = true>
+struct HashMethodHashed
+    : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache>
+{
+    using Key = UInt128;
+    using Self = HashMethodHashed<Value, Mapped, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    ColumnRawPtrs key_columns;
+
+    HashMethodHashed(ColumnRawPtrs key_columns, const Sizes &, const HashMethodContextPtr &)
+        : key_columns(std::move(key_columns)) {}
+
+    ALWAYS_INLINE Key getKey(size_t row, Arena &) const { return hash128(row, key_columns.size(), key_columns); }
+
+    static ALWAYS_INLINE StringRef getValueRef(const Value & value)
+    {
+        return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
+    }
+};
+
+}
+}
--- a/dbms/src/Common/ColumnsHashingImpl.h
+++ b/dbms/src/Common/ColumnsHashingImpl.h
@ -0,0 +1,356 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Interpreters/AggregationCommon.h>
+
+namespace DB
+{
+
+namespace ColumnsHashing
+{
+
+/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
+/// Is used for caching.
+class HashMethodContext
+{
+public:
+    virtual ~HashMethodContext() = default;
+
+    struct Settings
+    {
+        size_t max_threads;
+    };
+};
+
+using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
+
+
+namespace columns_hashing_impl
+{
+
+template <typename Value, bool consecutive_keys_optimization_>
+struct LastElementCache
+{
+    static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
+    Value value;
+    bool empty = true;
+    bool found = false;
+
+    bool check(const Value & value_) { return !empty && value == value_; }
+
+    template <typename Key>
+    bool check(const Key & key) { return !empty && value.first == key; }
+};
+
+template <typename Data>
+struct LastElementCache<Data, false>
+{
+    static constexpr bool consecutive_keys_optimization = false;
+};
+
+template <typename Mapped>
+class EmplaceResultImpl
+{
+    Mapped & value;
+    Mapped & cached_value;
+    bool inserted;
+
+public:
+    EmplaceResultImpl(Mapped & value, Mapped & cached_value, bool inserted)
+            : value(value), cached_value(cached_value), inserted(inserted) {}
+
+    bool isInserted() const { return inserted; }
+    auto & getMapped() const { return value; }
+
+    void setMapped(const Mapped & mapped)
+    {
+        cached_value = mapped;
+        value = mapped;
+    }
+};
+
+template <>
+class EmplaceResultImpl<void>
+{
+    bool inserted;
+
+public:
+    explicit EmplaceResultImpl(bool inserted) : inserted(inserted) {}
+    bool isInserted() const { return inserted; }
+};
+
+template <typename Mapped>
+class FindResultImpl
+{
+    Mapped * value;
+    bool found;
+
+public:
+    FindResultImpl(Mapped * value, bool found) : value(value), found(found) {}
+    bool isFound() const { return found; }
+    Mapped & getMapped() const { return *value; }
+};
+
+template <>
+class FindResultImpl<void>
+{
+    bool found;
+
+public:
+    explicit FindResultImpl(bool found) : found(found) {}
+    bool isFound() const { return found; }
+};
+
+template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
+class HashMethodBase
+{
+public:
+    using EmplaceResult = EmplaceResultImpl<Mapped>;
+    using FindResult = FindResultImpl<Mapped>;
+    static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
+    using Cache = LastElementCache<Value, consecutive_keys_optimization>;
+
+    static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
+
+    template <typename Data>
+    ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
+    {
+        auto key = static_cast<Derived &>(*this).getKey(row, pool);
+        return emplaceKeyImpl(key, data, pool);
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
+    {
+        auto key = static_cast<Derived &>(*this).getKey(row, pool);
+        auto res = findKeyImpl(key, data);
+        static_cast<Derived &>(*this).onExistingKey(key, pool);
+        return res;
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
+    {
+        auto key = static_cast<Derived &>(*this).getKey(row, pool);
+        auto res = data.hash(key);
+        static_cast<Derived &>(*this).onExistingKey(key, pool);
+        return res;
+    }
+
+protected:
+    Cache cache;
+
+    HashMethodBase()
+    {
+        if constexpr (consecutive_keys_optimization)
+        {
+            if constexpr (has_mapped)
+            {
+                /// Init PairNoInit elements.
+                cache.value.second = Mapped();
+                using Key = decltype(cache.value.first);
+                cache.value.first = Key();
+            }
+            else
+                cache.value = Value();
+        }
+    }
+
+    template <typename Key>
+    static ALWAYS_INLINE void onNewKey(Key & /*key*/, Arena & /*pool*/) {}
+    template <typename Key>
+    static ALWAYS_INLINE void onExistingKey(Key & /*key*/, Arena & /*pool*/) {}
+
+    template <typename Data, typename Key>
+    ALWAYS_INLINE EmplaceResult emplaceKeyImpl(Key key, Data & data, Arena & pool)
+    {
+        if constexpr (Cache::consecutive_keys_optimization)
+        {
+            if (cache.found && cache.check(key))
+            {
+                static_cast<Derived &>(*this).onExistingKey(key, pool);
+
+                if constexpr (has_mapped)
+                    return EmplaceResult(cache.value.second, cache.value.second, false);
+                else
+                    return EmplaceResult(false);
+            }
+        }
+
+        typename Data::iterator it;
+        bool inserted = false;
+        data.emplace(key, it, inserted);
+
+        [[maybe_unused]] Mapped * cached = nullptr;
+        if constexpr (has_mapped)
+            cached = &it->second;
+
+        if (inserted)
+        {
+            if constexpr (has_mapped)
+            {
+                new(&it->second) Mapped();
+                static_cast<Derived &>(*this).onNewKey(it->first, pool);
+            }
+            else
+                static_cast<Derived &>(*this).onNewKey(*it, pool);
+        }
+        else
+            static_cast<Derived &>(*this).onExistingKey(key, pool);
+
+        if constexpr (consecutive_keys_optimization)
+        {
+            cache.value = *it;
+            cache.found = true;
+            cache.empty = false;
+
+            if constexpr (has_mapped)
+                cached = &cache.value.second;
+        }
+
+        if constexpr (has_mapped)
+            return EmplaceResult(it->second, *cached, inserted);
+        else
+            return EmplaceResult(inserted);
+    }
+
+    template <typename Data, typename Key>
+    ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
+    {
+        if constexpr (Cache::consecutive_keys_optimization)
+        {
+            if (cache.check(key))
+            {
+                if constexpr (has_mapped)
+                    return FindResult(&cache.value.second, cache.found);
+                else
+                    return FindResult(cache.found);
+            }
+        }
+
+        auto it = data.find(key);
+        bool found = it != data.end();
+
+        if constexpr (consecutive_keys_optimization)
+        {
+            cache.found = found;
+            cache.empty = false;
+
+            if (found)
+                cache.value = *it;
+            else
+            {
+                if constexpr (has_mapped)
+                    cache.value.first = key;
+                else
+                    cache.value = key;
+            }
+        }
+
+        if constexpr (has_mapped)
+            return FindResult(found ? &it->second : nullptr, found);
+        else
+            return FindResult(found);
+    }
+};
+
+
+template <typename T>
+struct MappedCache : public PaddedPODArray<T> {};
+
+template <>
+struct MappedCache<void> {};
+
+
+/// This class is designed to provide the functionality that is required for
+/// supporting nullable keys in HashMethodKeysFixed. If there are
+/// no nullable keys, this class is merely implemented as an empty shell.
+template <typename Key, bool has_nullable_keys>
+class BaseStateKeysFixed;
+
+/// Case where nullable keys are supported.
+template <typename Key>
+class BaseStateKeysFixed<Key, true>
+{
+protected:
+    BaseStateKeysFixed(const ColumnRawPtrs & key_columns)
+    {
+        null_maps.reserve(key_columns.size());
+        actual_columns.reserve(key_columns.size());
+
+        for (const auto & col : key_columns)
+        {
+            if (col->isColumnNullable())
+            {
+                const auto & nullable_col = static_cast<const ColumnNullable &>(*col);
+                actual_columns.push_back(&nullable_col.getNestedColumn());
+                null_maps.push_back(&nullable_col.getNullMapColumn());
+            }
+            else
+            {
+                actual_columns.push_back(col);
+                null_maps.push_back(nullptr);
+            }
+        }
+    }
+
+    /// Return the columns which actually contain the values of the keys.
+    /// For a given key column, if it is nullable, we return its nested
+    /// column. Otherwise we return the key column itself.
+    inline const ColumnRawPtrs & getActualColumns() const
+    {
+        return actual_columns;
+    }
+
+    /// Create a bitmap that indicates whether, for a particular row,
+    /// a key column bears a null value or not.
+    KeysNullMap<Key> createBitmap(size_t row) const
+    {
+        KeysNullMap<Key> bitmap{};
+
+        for (size_t k = 0; k < null_maps.size(); ++k)
+        {
+            if (null_maps[k] != nullptr)
+            {
+                const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
+                if (null_map[row] == 1)
+                {
+                    size_t bucket = k / 8;
+                    size_t offset = k % 8;
+                    bitmap[bucket] |= UInt8(1) << offset;
+                }
+            }
+        }
+
+        return bitmap;
+    }
+
+private:
+    ColumnRawPtrs actual_columns;
+    ColumnRawPtrs null_maps;
+};
+
+/// Case where nullable keys are not supported.
+template <typename Key>
+class BaseStateKeysFixed<Key, false>
+{
+protected:
+    BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {}
+
+    const ColumnRawPtrs & getActualColumns() const { return actual_columns; }
+
+    KeysNullMap<Key> createBitmap(size_t) const
+    {
+        throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
+                        " is forbidden", ErrorCodes::LOGICAL_ERROR};
+    }
+
+private:
+    ColumnRawPtrs actual_columns;
+};
+
+}
+
+}
+
+}
--- a/dbms/src/Common/SipHash.h
+++ b/dbms/src/Common/SipHash.h
@ -17,6 +17,7 @@
 #include <common/unaligned.h>
 #include <string>
 #include <type_traits>
+#include <Core/Defines.h>

 #define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))

@ -49,7 +50,7 @@ private:
        UInt8 current_bytes[8];
    };

-    void finalize()
+    ALWAYS_INLINE void finalize()
    {
        /// In the last free byte, we write the remainder of the division by 256.
        current_bytes[7] = cnt;
@ -156,7 +157,7 @@ public:

    /// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos
    template <typename T>
-    void get128(T & lo, T & hi)
+    ALWAYS_INLINE void get128(T & lo, T & hi)
    {
        static_assert(sizeof(T) == 8);
        finalize();
--- a/dbms/src/DataStreams/DistinctBlockInputStream.cpp
+++ b/dbms/src/DataStreams/DistinctBlockInputStream.cpp
@ -85,24 +85,15 @@ void DistinctBlockInputStream::buildFilter(
    size_t rows,
    SetVariants & variants) const
 {
-    typename Method::State state;
-    state.init(columns);
+    typename Method::State state(columns, key_sizes, nullptr);

    for (size_t i = 0; i < rows; ++i)
    {
-        /// Make a key.
-        typename Method::Key key = state.getKey(columns, columns.size(), i, key_sizes);
-
-        typename Method::Data::iterator it;
-        bool inserted;
-        method.data.emplace(key, it, inserted);
-
-        if (inserted)
-            method.onNewKey(*it, columns.size(), variants.string_pool);
+        auto emplace_result = state.emplaceKey(method.data, i, variants.string_pool);

        /// Emit the record if there is no such key in the current set yet.
        /// Skip it otherwise.
-        filter[i] = inserted;
+        filter[i] = emplace_result.isInserted();
    }
 }

--- a/dbms/src/DataStreams/DistinctSortedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/DistinctSortedBlockInputStream.cpp
@ -85,8 +85,7 @@ bool DistinctSortedBlockInputStream::buildFilter(
    size_t rows,
    ClearableSetVariants & variants) const
 {
-    typename Method::State state;
-    state.init(columns);
+    typename Method::State state(columns, key_sizes, nullptr);

    /// Compare last row of previous block and first row of current block,
    /// If rows not equal, we can clear HashSet,
@ -106,21 +105,14 @@ bool DistinctSortedBlockInputStream::buildFilter(
        if (i > 0 && !clearing_hint_columns.empty() && !rowsEqual(clearing_hint_columns, i, clearing_hint_columns, i - 1))
            method.data.clear();

-        /// Make a key.
-        typename Method::Key key = state.getKey(columns, columns.size(), i, key_sizes);
-        typename Method::Data::iterator it = method.data.find(key);
-        bool inserted;
-        method.data.emplace(key, it, inserted);
+        auto emplace_result = state.emplaceKey(method.data, i, variants.string_pool);

-        if (inserted)
-        {
-            method.onNewKey(*it, columns.size(), variants.string_pool);
+        if (emplace_result.isInserted())
            has_new_data = true;
-        }

        /// Emit the record if there is no such key in the current set yet.
        /// Skip it otherwise.
-        filter[i] = inserted;
+        filter[i] = emplace_result.isInserted();
    }
    return has_new_data;
 }
--- a/dbms/src/Functions/arrayEnumerateExtended.h
+++ b/dbms/src/Functions/arrayEnumerateExtended.h
@ -8,6 +8,7 @@
 #include <Columns/ColumnString.h>
 #include <Interpreters/AggregationCommon.h>
 #include <Common/HashTable/ClearableHashMap.h>
+#include <Common/ColumnsHashing.h>


 namespace DB
@ -60,11 +61,56 @@ private:
    /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
    static constexpr size_t INITIAL_SIZE_DEGREE = 9;

+    template <typename T>
+    struct MethodOneNumber
+    {
+        using Set = ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
+        using Method = ColumnsHashing::HashMethodOneNumber<typename Set::value_type, UInt32, T, false>;
+    };
+
+    struct MethodString
+    {
+        using Set = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+        using Method = ColumnsHashing::HashMethodString<typename Set::value_type, UInt32, false, false>;
+    };
+
+    struct MethodFixedString
+    {
+        using Set = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+        using Method = ColumnsHashing::HashMethodFixedString<typename Set::value_type, UInt32, false, false>;
+    };
+
+    struct MethodFixed
+    {
+        using Set = ClearableHashMap<UInt128, UInt32, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        using Method = ColumnsHashing::HashMethodKeysFixed<typename Set::value_type, UInt128, UInt32, false, false, false>;
+    };
+
+    struct MethodHashed
+    {
+        using Set =  ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        using Method = ColumnsHashing::HashMethodHashed<typename Set::value_type, UInt32, false>;
+    };
+
+    template <typename Method>
+    void executeMethod(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes,
+                       const NullMap * null_map, ColumnUInt32::Container & res_values);
+
+    template <typename Method, bool has_null_map>
+    void executeMethodImpl(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes,
+                           const NullMap * null_map, ColumnUInt32::Container & res_values);
+
    template <typename T>
    bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
    bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
+    bool executeFixedString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
    bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
-    bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
+    void executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
 };


@ -131,7 +177,7 @@ void FunctionArrayEnumerateExtended<Derived>::executeImpl(Block & block, const C

    if (num_arguments == 1)
    {
-        executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
+        if (!(executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<UInt16>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<UInt32>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<UInt64>(*offsets, *data_columns[0], null_map, res_values)
@ -142,47 +188,56 @@ void FunctionArrayEnumerateExtended<Derived>::executeImpl(Block & block, const C
            || executeNumber<Float32>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<Float64>(*offsets, *data_columns[0], null_map, res_values)
            || executeString(*offsets, *data_columns[0], null_map, res_values)
-            || executeHashed(*offsets, data_columns, res_values);
+            || executeFixedString(*offsets, *data_columns[0], null_map, res_values)))
+            executeHashed(*offsets, data_columns, res_values);
    }
    else
    {
-        execute128bit(*offsets, data_columns, res_values)
-            || executeHashed(*offsets, data_columns, res_values);
+        if (!execute128bit(*offsets, data_columns, res_values))
+            executeHashed(*offsets, data_columns, res_values);
    }

    block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_column);
 }

-
 template <typename Derived>
-template <typename T>
-bool FunctionArrayEnumerateExtended<Derived>::executeNumber(
-    const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
+template <typename Method, bool has_null_map>
+void FunctionArrayEnumerateExtended<Derived>::executeMethodImpl(
+        const ColumnArray::Offsets & offsets,
+        const ColumnRawPtrs & columns,
+        const Sizes & key_sizes,
+        [[maybe_unused]] const NullMap * null_map,
+        ColumnUInt32::Container & res_values)
 {
-    const ColumnVector<T> * data_concrete = checkAndGetColumn<ColumnVector<T>>(&data);
-    if (!data_concrete)
-        return false;
-    const auto & values = data_concrete->getData();
+    typename Method::Set indices;
+    typename Method::Method method(columns, key_sizes, nullptr);
+    Arena pool; /// Won't use it;

-    using ValuesToIndices = ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
+    ColumnArray::Offset prev_off = 0;

-    ValuesToIndices indices;
-    size_t prev_off = 0;
    if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
    {
        // Unique
-        for (size_t i = 0; i < offsets.size(); ++i)
+        for (size_t off : offsets)
        {
            indices.clear();
            UInt32 null_count = 0;
-            size_t off = offsets[i];
            for (size_t j = prev_off; j < off; ++j)
            {
-                if (null_map && (*null_map)[j])
+                if constexpr (has_null_map)
+                {
+                    if ((*null_map)[j])
+                    {
                        res_values[j] = ++null_count;
-                else
-                    res_values[j] = ++indices[values[j]];
+                        continue;
+                    }
+                }
+
+                auto emplace_result = method.emplaceKey(indices, j, pool);
+                auto idx = emplace_result.getMapped() + 1;
+                emplace_result.setMapped(idx);
+
+                res_values[j] = idx;
            }
            prev_off = off;
        }
@ -190,31 +245,67 @@ bool FunctionArrayEnumerateExtended<Derived>::executeNumber(
    else
    {
        // Dense
-        for (size_t i = 0; i < offsets.size(); ++i)
+        for (size_t off : offsets)
        {
            indices.clear();
-            size_t rank = 0;
-            UInt32 null_index = 0;
-            size_t off = offsets[i];
+            UInt32 rank = 0;
+            [[maybe_unused]] UInt32 null_index = 0;
            for (size_t j = prev_off; j < off; ++j)
            {
-                if (null_map && (*null_map)[j])
+                if constexpr (has_null_map)
+                {
+                    if ((*null_map)[j])
                    {
                        if (!null_index)
                            null_index = ++rank;
+
                        res_values[j] = null_index;
+                        continue;
                    }
-                else
-                {
-                    auto & idx = indices[values[j]];
+                }
+
+                auto emplace_result = method.emplaceKey(indices, j, pool);
+                auto idx = emplace_result.getMapped();
+
                if (!idx)
+                {
                    idx = ++rank;
-                    res_values[j] = idx;
+                    emplace_result.setMapped(idx);
                }
+
+                res_values[j] = idx;
            }
            prev_off = off;
        }
    }
+}
+
+template <typename Derived>
+template <typename Method>
+void FunctionArrayEnumerateExtended<Derived>::executeMethod(
+    const ColumnArray::Offsets & offsets,
+    const ColumnRawPtrs & columns,
+    const Sizes & key_sizes,
+    const NullMap * null_map,
+    ColumnUInt32::Container & res_values)
+{
+    if (null_map)
+        executeMethodImpl<Method, true>(offsets, columns, key_sizes, null_map, res_values);
+    else
+        executeMethodImpl<Method, false>(offsets, columns, key_sizes, null_map, res_values);
+
+}
+
+template <typename Derived>
+template <typename T>
+bool FunctionArrayEnumerateExtended<Derived>::executeNumber(
+    const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
+{
+    const auto * nested = checkAndGetColumn<ColumnVector<T>>(&data);
+    if (!nested)
+        return false;
+
+    executeMethod<MethodOneNumber<T>>(offsets, {nested}, {}, null_map, res_values);
    return true;
 }

@ -222,62 +313,22 @@ template <typename Derived>
 bool FunctionArrayEnumerateExtended<Derived>::executeString(
    const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
 {
-    const ColumnString * values = checkAndGetColumn<ColumnString>(&data);
-    if (!values)
-        return false;
+    const auto * nested = checkAndGetColumn<ColumnString>(&data);
+    if (nested)
+        executeMethod<MethodString>(offsets, {nested}, {}, null_map, res_values);

-    size_t prev_off = 0;
-    using ValuesToIndices = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+    return nested;
+}

-    ValuesToIndices indices;
-    if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
-    {
-        // Unique
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            indices.clear();
-            UInt32 null_count = 0;
-            size_t off = offsets[i];
-            for (size_t j = prev_off; j < off; ++j)
-            {
-                if (null_map && (*null_map)[j])
-                    res_values[j] = ++null_count;
-                else
-                    res_values[j] = ++indices[values->getDataAt(j)];
-            }
-            prev_off = off;
-        }
-    }
-    else
-    {
-        // Dense
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            indices.clear();
-            size_t rank = 0;
-            UInt32 null_index = 0;
-            size_t off = offsets[i];
-            for (size_t j = prev_off; j < off; ++j)
-            {
-                if (null_map && (*null_map)[j])
-                {
-                    if (!null_index)
-                        null_index = ++rank;
-                    res_values[j] = null_index;
-                }
-                else
-                {
-                    auto & idx = indices[values->getDataAt(j)];
-                    if (!idx)
-                        idx = ++rank;
-                    res_values[j] = idx;
-                }
-            }
-            prev_off = off;
-        }
-    }
-    return true;
+template <typename Derived>
+bool FunctionArrayEnumerateExtended<Derived>::executeFixedString(
+        const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
+{
+    const auto * nested = checkAndGetColumn<ColumnString>(&data);
+    if (nested)
+        executeMethod<MethodFixedString>(offsets, {nested}, {}, null_map, res_values);
+
+    return nested;
 }

 template <typename Derived>
@ -298,95 +349,17 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
        keys_bytes += key_sizes[j];
    }

-    if (keys_bytes > 16)
-        return false;
-
-    using ValuesToIndices = ClearableHashMap<UInt128, UInt32, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
-
-    ValuesToIndices indices;
-    size_t prev_off = 0;
-    if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
-    {
-        // Unique
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            indices.clear();
-            size_t off = offsets[i];
-            for (size_t j = prev_off; j < off; ++j)
-                res_values[j] = ++indices[packFixed<UInt128>(j, count, columns, key_sizes)];
-            prev_off = off;
-        }
-    }
-    else
-    {
-        // Dense
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            indices.clear();
-            size_t off = offsets[i];
-            size_t rank = 0;
-            for (size_t j = prev_off; j < off; ++j)
-            {
-                auto & idx = indices[packFixed<UInt128>(j, count, columns, key_sizes)];
-                if (!idx)
-                    idx = ++rank;
-                res_values[j] = idx;
-            }
-            prev_off = off;
-        }
-    }
-
+    executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
    return true;
 }

 template <typename Derived>
-bool FunctionArrayEnumerateExtended<Derived>::executeHashed(
+void FunctionArrayEnumerateExtended<Derived>::executeHashed(
    const ColumnArray::Offsets & offsets,
    const ColumnRawPtrs & columns,
    ColumnUInt32::Container & res_values)
 {
-    size_t count = columns.size();
-
-    using ValuesToIndices = ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
-
-    ValuesToIndices indices;
-    size_t prev_off = 0;
-    if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
-    {
-        // Unique
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            indices.clear();
-            size_t off = offsets[i];
-            for (size_t j = prev_off; j < off; ++j)
-            {
-                res_values[j] = ++indices[hash128(j, count, columns)];
-            }
-            prev_off = off;
-        }
-    }
-    else
-    {
-        // Dense
-        for (size_t i = 0; i < offsets.size(); ++i)
-        {
-            indices.clear();
-            size_t off = offsets[i];
-            size_t rank = 0;
-            for (size_t j = prev_off; j < off; ++j)
-            {
-                auto & idx = indices[hash128(j, count, columns)];
-                if (!idx)
-                    idx = ++rank;
-                res_values[j] = idx;
-            }
-            prev_off = off;
-        }
-    }
-
-    return true;
+    executeMethod<MethodHashed>(offsets, columns, {}, nullptr, res_values);
 }

 }
--- a/dbms/src/Functions/arrayUniq.cpp
+++ b/dbms/src/Functions/arrayUniq.cpp
@ -10,6 +10,7 @@
 #include <Common/HashTable/ClearableHashSet.h>
 #include <Interpreters/AggregationCommon.h>
 #include <IO/WriteHelpers.h>
+#include <Common/ColumnsHashing.h>


 namespace DB
@ -62,11 +63,56 @@ private:
    /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
    static constexpr size_t INITIAL_SIZE_DEGREE = 9;

+    template <typename T>
+    struct MethodOneNumber
+    {
+        using Set = ClearableHashSet<T, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
+        using Method = ColumnsHashing::HashMethodOneNumber<typename Set::value_type, void, T, false>;
+    };
+
+    struct MethodString
+    {
+        using Set = ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+        using Method = ColumnsHashing::HashMethodString<typename Set::value_type, void, false, false>;
+    };
+
+    struct MethodFixedString
+    {
+        using Set = ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
+        using Method = ColumnsHashing::HashMethodFixedString<typename Set::value_type, void, false, false>;
+    };
+
+    struct MethodFixed
+    {
+        using Set = ClearableHashSet<UInt128, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        using Method = ColumnsHashing::HashMethodKeysFixed<typename Set::value_type, UInt128, void, false, false, false>;
+    };
+
+    struct MethodHashed
+    {
+        using Set = ClearableHashSet<UInt128, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
+                HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
+        using Method = ColumnsHashing::HashMethodHashed<typename Set::value_type, void, false>;
+    };
+
+    template <typename Method>
+    void executeMethod(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes,
+            const NullMap * null_map, ColumnUInt32::Container & res_values);
+
+    template <typename Method, bool has_null_map>
+    void executeMethodImpl(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes,
+            const NullMap * null_map, ColumnUInt32::Container & res_values);
+
    template <typename T>
    bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
    bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
+    bool executeFixedString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
    bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
-    bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
+    void executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
 };


@ -126,7 +172,7 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen

    if (num_arguments == 1)
    {
-        executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
+        if (!(executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<UInt16>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<UInt32>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<UInt64>(*offsets, *data_columns[0], null_map, res_values)
@ -136,30 +182,31 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen
            || executeNumber<Int64>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<Float32>(*offsets, *data_columns[0], null_map, res_values)
            || executeNumber<Float64>(*offsets, *data_columns[0], null_map, res_values)
-            || executeString(*offsets, *data_columns[0], null_map, res_values)
-            || executeHashed(*offsets, data_columns, res_values);
+            || executeFixedString(*offsets, *data_columns[0], null_map, res_values)
+            || executeString(*offsets, *data_columns[0], null_map, res_values)))
+            executeHashed(*offsets, data_columns, res_values);
    }
    else
    {
-        execute128bit(*offsets, data_columns, res_values)
-            || executeHashed(*offsets, data_columns, res_values);
+        if (!execute128bit(*offsets, data_columns, res_values))
+            executeHashed(*offsets, data_columns, res_values);
    }

    block.getByPosition(result).column = std::move(res);
 }

-template <typename T>
-bool FunctionArrayUniq::executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
+template <typename Method, bool has_null_map>
+void FunctionArrayUniq::executeMethodImpl(
+    const ColumnArray::Offsets & offsets,
+    const ColumnRawPtrs & columns,
+    const Sizes & key_sizes,
+    [[maybe_unused]] const NullMap * null_map,
+    ColumnUInt32::Container & res_values)
 {
-    const ColumnVector<T> * nested = checkAndGetColumn<ColumnVector<T>>(&data);
-    if (!nested)
-        return false;
-    const auto & values = nested->getData();
+    typename Method::Set set;
+    typename Method::Method method(columns, key_sizes, nullptr);
+    Arena pool; /// Won't use it;

-    using Set = ClearableHashSet<T, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
-
-    Set set;
    ColumnArray::Offset prev_off = 0;
    for (size_t i = 0; i < offsets.size(); ++i)
    {
@ -168,48 +215,66 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray::Offsets & offsets, cons
        ColumnArray::Offset off = offsets[i];
        for (ColumnArray::Offset j = prev_off; j < off; ++j)
        {
-            if (null_map && (*null_map)[j])
+            if constexpr (has_null_map)
+            {
+                if ((*null_map)[j])
+                {
                    found_null = true;
-            else
-                set.insert(values[j]);
+                    continue;
+                }
+            }
+
+            method.emplaceKey(set, j, pool);
        }

        res_values[i] = set.size() + found_null;
        prev_off = off;
    }
+}
+
+template <typename Method>
+void FunctionArrayUniq::executeMethod(
+    const ColumnArray::Offsets & offsets,
+    const ColumnRawPtrs & columns,
+    const Sizes & key_sizes,
+    const NullMap * null_map,
+    ColumnUInt32::Container & res_values)
+{
+    if (null_map)
+        executeMethodImpl<Method, true>(offsets, columns, key_sizes, null_map, res_values);
+    else
+        executeMethodImpl<Method, false>(offsets, columns, key_sizes, null_map, res_values);
+
+}
+
+template <typename T>
+bool FunctionArrayUniq::executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
+{
+    const auto * nested = checkAndGetColumn<ColumnVector<T>>(&data);
+    if (!nested)
+        return false;
+
+    executeMethod<MethodOneNumber<T>>(offsets, {nested}, {}, null_map, res_values);
    return true;
 }

 bool FunctionArrayUniq::executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
 {
-    const ColumnString * nested = checkAndGetColumn<ColumnString>(&data);
-    if (!nested)
-        return false;
+    const auto * nested = checkAndGetColumn<ColumnString>(&data);
+    if (nested)
+        executeMethod<MethodString>(offsets, {nested}, {}, null_map, res_values);

-    using Set = ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
-
-    Set set;
-    ColumnArray::Offset prev_off = 0;
-    for (size_t i = 0; i < offsets.size(); ++i)
-    {
-        set.clear();
-        bool found_null = false;
-        ColumnArray::Offset off = offsets[i];
-        for (ColumnArray::Offset j = prev_off; j < off; ++j)
-        {
-            if (null_map && (*null_map)[j])
-                found_null = true;
-            else
-                set.insert(nested->getDataAt(j));
-        }
-
-        res_values[i] = set.size() + found_null;
-        prev_off = off;
-    }
-    return true;
+    return nested;
 }

+bool FunctionArrayUniq::executeFixedString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
+{
+    const auto * nested = checkAndGetColumn<ColumnFixedString>(&data);
+    if (nested)
+        executeMethod<MethodFixedString>(offsets, {nested}, {}, null_map, res_values);
+
+    return nested;
+}

 bool FunctionArrayUniq::execute128bit(
        const ColumnArray::Offsets & offsets,
@ -231,49 +296,16 @@ bool FunctionArrayUniq::execute128bit(
    if (keys_bytes > 16)
        return false;

-    using Set = ClearableHashSet<UInt128, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
-
-    Set set;
-    ColumnArray::Offset prev_off = 0;
-    for (ColumnArray::Offset i = 0; i < offsets.size(); ++i)
-    {
-        set.clear();
-        ColumnArray::Offset off = offsets[i];
-        for (ColumnArray::Offset j = prev_off; j < off; ++j)
-            set.insert(packFixed<UInt128>(j, count, columns, key_sizes));
-
-        res_values[i] = set.size();
-        prev_off = off;
-    }
-
+    executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
    return true;
 }

-bool FunctionArrayUniq::executeHashed(
+void FunctionArrayUniq::executeHashed(
        const ColumnArray::Offsets & offsets,
        const ColumnRawPtrs & columns,
        ColumnUInt32::Container & res_values)
 {
-    size_t count = columns.size();
-
-    using Set = ClearableHashSet<UInt128, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
-        HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
-
-    Set set;
-    ColumnArray::Offset prev_off = 0;
-    for (ColumnArray::Offset i = 0; i < offsets.size(); ++i)
-    {
-        set.clear();
-        ColumnArray::Offset off = offsets[i];
-        for (ColumnArray::Offset j = prev_off; j < off; ++j)
-            set.insert(hash128(j, count, columns));
-
-        res_values[i] = set.size();
-        prev_off = off;
-    }
-
-    return true;
+    executeMethod<MethodHashed>(offsets, columns, {}, nullptr, res_values);
 }


--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@ -6,9 +6,11 @@
 #include <Common/setThreadName.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
+#include <Columns/ColumnLowCardinality.h>
 #include <AggregateFunctions/AggregateFunctionCount.h>
 #include <DataStreams/IBlockInputStream.h>
 #include <DataStreams/NativeBlockOutputStream.h>
@ -22,11 +24,9 @@
 #include <Common/CurrentThread.h>
 #include <Common/typeid_cast.h>
 #include <common/demangle.h>
+
 #if __has_include(<Interpreters/config_compile.h>)
 #include <Interpreters/config_compile.h>
-#include <Columns/ColumnLowCardinality.h>
-#include <DataTypes/DataTypeLowCardinality.h>
-
 #endif


@ -188,7 +188,7 @@ Aggregator::Aggregator(const Params & params_)
    }

    method_chosen = chooseAggregationMethod();
-    AggregationStateCache::Settings cache_settings;
+    HashMethodContext::Settings cache_settings;
    cache_settings.max_threads = params.max_threads;
    aggregation_state_cache = AggregatedDataVariants::createCache(method_chosen, cache_settings);
 }
@ -586,11 +586,7 @@ void NO_INLINE Aggregator::executeImpl(
    bool no_more_keys,
    AggregateDataPtr overflow_row) const
 {
-    typename Method::State state;
-    if constexpr (Method::low_cardinality_optimization)
-        state.init(key_columns, aggregation_state_cache);
-    else
-        state.init(key_columns);
+    typename Method::State state(key_columns, key_sizes, aggregation_state_cache);

    if (!no_more_keys)
        executeImplCase<false>(method, state, aggregates_pool, rows, key_columns, aggregate_instructions, keys, overflow_row);
@ -605,97 +601,51 @@ void NO_INLINE Aggregator::executeImplCase(
    typename Method::State & state,
    Arena * aggregates_pool,
    size_t rows,
-    ColumnRawPtrs & key_columns,
+    ColumnRawPtrs & /*key_columns*/,
    AggregateFunctionInstruction * aggregate_instructions,
-    StringRefs & keys,
+    StringRefs & /*keys*/,
    AggregateDataPtr overflow_row) const
 {
    /// NOTE When editing this code, also pay attention to SpecializedAggregator.h.

    /// For all rows.
-    typename Method::Key prev_key{};
-    AggregateDataPtr value = nullptr;
    for (size_t i = 0; i < rows; ++i)
    {
-        bool inserted = false; /// Inserted a new key, or was this key already?
+        AggregateDataPtr aggregate_data = nullptr;

-        /// Get the key to insert into the hash table.
-        typename Method::Key key;
-        if constexpr (!Method::low_cardinality_optimization)
-            key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *aggregates_pool);
-
-        AggregateDataPtr * aggregate_data = nullptr;
-        typename Method::iterator it; /// Is not used if Method::low_cardinality_optimization
-
-        if (!no_more_keys)  /// Insert.
+        if constexpr (!no_more_keys)  /// Insert.
        {
-            /// Optimization for consecutive identical keys.
-            if (!Method::no_consecutive_keys_optimization)
-            {
-                if (i != 0 && key == prev_key)
-                {
-                    /// Add values to the aggregate functions.
-                    for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
-                        (*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i, aggregates_pool);
+            auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool);

-                    method.onExistingKey(key, keys, *aggregates_pool);
-                    continue;
+            /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
+            if (emplace_result.isInserted())
+            {
+                /// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
+                emplace_result.setMapped(nullptr);
+
+                aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+                createAggregateStates(aggregate_data);
+
+                emplace_result.setMapped(aggregate_data);
            }
            else
-                    prev_key = key;
-            }
-
-            if constexpr (Method::low_cardinality_optimization)
-                aggregate_data = state.emplaceKeyFromRow(method.data, i, inserted, params.keys_size, keys, *aggregates_pool);
-            else
-            {
-                method.data.emplace(key, it, inserted);
-                aggregate_data = &Method::getAggregateData(it->second);
-            }
+                aggregate_data = emplace_result.getMapped();
        }
        else
        {
            /// Add only if the key already exists.
-
-            if constexpr (Method::low_cardinality_optimization)
-                aggregate_data = state.findFromRow(method.data, i);
-            else
-            {
-                it = method.data.find(key);
-                if (method.data.end() != it)
-                    aggregate_data = &Method::getAggregateData(it->second);
-            }
+            auto find_result = state.findKey(method.data, i, *aggregates_pool);
+            if (find_result.isFound())
+                aggregate_data = find_result.getMapped();
        }

        /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.

        /// If the key does not fit, and the data does not need to be aggregated in a separate row, then there's nothing to do.
        if (!aggregate_data && !overflow_row)
-        {
-            method.onExistingKey(key, keys, *aggregates_pool);
            continue;
-        }

-        /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
-        if (inserted)
-        {
-            /// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
-            *aggregate_data = nullptr;
-
-            if constexpr (!Method::low_cardinality_optimization)
-                method.onNewKey(*it, params.keys_size, keys, *aggregates_pool);
-
-            AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
-            createAggregateStates(place);
-            *aggregate_data = place;
-
-            if constexpr (Method::low_cardinality_optimization)
-                state.cacheAggregateData(i, place);
-        }
-        else
-            method.onExistingKey(key, keys, *aggregates_pool);
-
-        value = aggregate_data ? *aggregate_data : overflow_row;
+        AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row;

        /// Add values to the aggregate functions.
        for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
@ -1174,7 +1124,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(

        for (size_t i = 0; i < params.aggregates_size; ++i)
            aggregate_functions[i]->insertResultInto(
-                Method::getAggregateData(value.second) + offsets_of_aggregate_states[i],
+                value.second + offsets_of_aggregate_states[i],
                *final_aggregate_columns[i]);
    }

@ -1205,9 +1155,9 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal(

        /// reserved, so push_back does not throw exceptions
        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_columns[i]->push_back(Method::getAggregateData(value.second) + offsets_of_aggregate_states[i]);
+            aggregate_columns[i]->push_back(value.second + offsets_of_aggregate_states[i]);

-        Method::getAggregateData(value.second) = nullptr;
+        value.second = nullptr;
    }
 }

@ -1551,20 +1501,20 @@ void NO_INLINE Aggregator::mergeDataImpl(
        {
            for (size_t i = 0; i < params.aggregates_size; ++i)
                aggregate_functions[i]->merge(
-                    Method::getAggregateData(res_it->second) + offsets_of_aggregate_states[i],
-                    Method::getAggregateData(it->second) + offsets_of_aggregate_states[i],
+                    res_it->second + offsets_of_aggregate_states[i],
+                    it->second + offsets_of_aggregate_states[i],
                    arena);

            for (size_t i = 0; i < params.aggregates_size; ++i)
                aggregate_functions[i]->destroy(
-                    Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]);
+                    it->second + offsets_of_aggregate_states[i]);
        }
        else
        {
            res_it->second = it->second;
        }

-        Method::getAggregateData(it->second) = nullptr;
+        it->second = nullptr;
    }

    table_src.clearAndShrink();
@ -1588,19 +1538,18 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl(

        AggregateDataPtr res_data = table_dst.end() == res_it
            ? overflows
-            : Method::getAggregateData(res_it->second);
+            : res_it->second;

        for (size_t i = 0; i < params.aggregates_size; ++i)
            aggregate_functions[i]->merge(
                res_data + offsets_of_aggregate_states[i],
-                Method::getAggregateData(it->second) + offsets_of_aggregate_states[i],
+                it->second + offsets_of_aggregate_states[i],
                arena);

        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->destroy(
-                Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);

-        Method::getAggregateData(it->second) = nullptr;
+        it->second = nullptr;
    }

    table_src.clearAndShrink();
@ -1623,19 +1572,18 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
        if (table_dst.end() == res_it)
            continue;

-        AggregateDataPtr res_data = Method::getAggregateData(res_it->second);
+        AggregateDataPtr res_data = res_it->second;

        for (size_t i = 0; i < params.aggregates_size; ++i)
            aggregate_functions[i]->merge(
                res_data + offsets_of_aggregate_states[i],
-                Method::getAggregateData(it->second) + offsets_of_aggregate_states[i],
+                it->second + offsets_of_aggregate_states[i],
                arena);

        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->destroy(
-                Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);

-        Method::getAggregateData(it->second) = nullptr;
+        it->second = nullptr;
    }

    table_src.clearAndShrink();
@ -1986,7 +1934,7 @@ template <bool no_more_keys, typename Method, typename Table>
 void NO_INLINE Aggregator::mergeStreamsImplCase(
    Block & block,
    Arena * aggregates_pool,
-    Method & method,
+    Method & method [[maybe_unused]],
    Table & data,
    AggregateDataPtr overflow_row) const
 {
@ -2000,77 +1948,43 @@ void NO_INLINE Aggregator::mergeStreamsImplCase(
    for (size_t i = 0; i < params.aggregates_size; ++i)
        aggregate_columns[i] = &typeid_cast<const ColumnAggregateFunction &>(*block.safeGetByPosition(params.keys_size + i).column).getData();

-    typename Method::State state;
-    if constexpr (Method::low_cardinality_optimization)
-        state.init(key_columns, aggregation_state_cache);
-    else
-        state.init(key_columns);
+    typename Method::State state(key_columns, key_sizes, aggregation_state_cache);

    /// For all rows.
-    StringRefs keys(params.keys_size);
    size_t rows = block.rows();
    for (size_t i = 0; i < rows; ++i)
    {
-        typename Table::iterator it;
-        AggregateDataPtr * aggregate_data = nullptr;
-
-        bool inserted = false; /// Inserted a new key, or was this key already?
-
-        /// Get the key to insert into the hash table.
-        typename Method::Key key;
-        if constexpr (!Method::low_cardinality_optimization)
-            key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *aggregates_pool);
+        AggregateDataPtr aggregate_data = nullptr;

        if (!no_more_keys)
        {
-            if constexpr (Method::low_cardinality_optimization)
-                aggregate_data = state.emplaceKeyFromRow(data, i, inserted, params.keys_size, keys, *aggregates_pool);
-            else
+            auto emplace_result = state.emplaceKey(data, i, *aggregates_pool);
+            if (emplace_result.isInserted())
            {
-                data.emplace(key, it, inserted);
-                aggregate_data = &Method::getAggregateData(it->second);
+                emplace_result.setMapped(nullptr);
+
+                aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+                createAggregateStates(aggregate_data);
+
+                emplace_result.setMapped(aggregate_data);
            }
+            else
+                aggregate_data = emplace_result.getMapped();
        }
        else
        {
-            if constexpr (Method::low_cardinality_optimization)
-                aggregate_data = state.findFromRow(data, i);
-            else
-            {
-                it = data.find(key);
-                if (data.end() != it)
-                    aggregate_data = &Method::getAggregateData(it->second);
-            }
+            auto find_result = state.findKey(data, i, *aggregates_pool);
+            if (find_result.isFound())
+                aggregate_data = find_result.getMapped();
        }

        /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.

        /// If the key does not fit, and the data does not need to be aggregated into a separate row, then there's nothing to do.
        if (!aggregate_data && !overflow_row)
-        {
-            method.onExistingKey(key, keys, *aggregates_pool);
            continue;
-        }

-        /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
-        if (inserted)
-        {
-            *aggregate_data = nullptr;
-
-            if constexpr (!Method::low_cardinality_optimization)
-                method.onNewKey(*it, params.keys_size, keys, *aggregates_pool);
-
-            AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
-            createAggregateStates(place);
-            *aggregate_data = place;
-
-            if constexpr (Method::low_cardinality_optimization)
-                state.cacheAggregateData(i, place);
-        }
-        else
-            method.onExistingKey(key, keys, *aggregates_pool);
-
-        AggregateDataPtr value = aggregate_data ? *aggregate_data : overflow_row;
+        AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row;

        /// Merge state of aggregate functions.
        for (size_t j = 0; j < params.aggregates_size; ++j)
@ -2165,7 +2079,7 @@ void Aggregator::mergeStream(const BlockInputStreamPtr & stream, AggregatedDataV
      * If there is at least one block with a bucket number greater or equal than zero, then there was a two-level aggregation.
      */
    auto max_bucket = bucket_to_blocks.rbegin()->first;
-    size_t has_two_level = max_bucket >= 0;
+    bool has_two_level = max_bucket >= 0;

    if (has_two_level)
    {
@ -2395,15 +2309,11 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl(
    Method & method,
    Arena * pool,
    ColumnRawPtrs & key_columns,
-    StringRefs & keys,
+    StringRefs & keys [[maybe_unused]],
    const Block & source,
    std::vector<Block> & destinations) const
 {
-    typename Method::State state;
-    if constexpr (Method::low_cardinality_optimization)
-        state.init(key_columns, aggregation_state_cache);
-    else
-        state.init(key_columns);
+    typename Method::State state(key_columns, key_sizes, aggregation_state_cache);

    size_t rows = source.rows();
    size_t columns = source.columns();
@ -2423,16 +2333,11 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl(
            }
        }

-        /// Obtain a key. Calculate bucket number from it.
-        typename Method::Key key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *pool);
-
-        auto hash = method.data.hash(key);
+        /// Calculate bucket number from row hash.
+        auto hash = state.getHash(method.data, i, *pool);
        auto bucket = method.data.getBucketFromHash(hash);

        selector[i] = bucket;
-
-        /// We don't need to store this key in pool.
-        method.onExistingKey(key, keys, *pool);
    }

    size_t num_buckets = destinations.size();
@ -2523,7 +2428,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const
 {
    for (auto elem : table)
    {
-        AggregateDataPtr & data = Method::getAggregateData(elem.second);
+        AggregateDataPtr & data = elem.second;

        /** If an exception (usually a lack of memory, the MemoryTracker throws) arose
          *  after inserting the key into a hash table, but before creating all states of aggregate functions,
--- a/dbms/src/Interpreters/Aggregator.h
+++ b/dbms/src/Interpreters/Aggregator.h
@ -15,6 +15,7 @@
 #include <Common/ThreadPool.h>
 #include <Common/UInt128.h>
 #include <Common/LRUCache.h>
+#include <Common/ColumnsHashing.h>

 #include <DataStreams/IBlockInputStream.h>
 #include <DataStreams/SizeLimits.h>
@ -138,18 +139,6 @@ using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTw
        TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
        TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;

-/// Cache which can be used by aggregations method's states. Object is shared in all threads.
-struct AggregationStateCache
-{
-    virtual ~AggregationStateCache() = default;
-
-    struct Settings
-    {
-        size_t max_threads;
-    };
-};
-
-using AggregationStateCachePtr = std::shared_ptr<AggregationStateCache>;

 /// For the case where there is one numeric key.
 template <typename FieldType, typename TData>    /// UInt8/16/32/64 for any type with corresponding bit width.
@ -169,65 +158,16 @@ struct AggregationMethodOneNumber
    AggregationMethodOneNumber(const Other & other) : data(other.data) {}

    /// To use one `Method` in different threads, use different `State`.
-    struct State
-    {
-        const char * vec;
+    using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type, Mapped, FieldType>;

-        /** Called at the start of each block processing.
-          * Sets the variables needed for the other methods called in inner loops.
-          */
-        void init(ColumnRawPtrs & key_columns)
-        {
-            vec = key_columns[0]->getRawData().data;
-        }
-
-        /// Get the key from the key columns for insertion into the hash table.
-        ALWAYS_INLINE Key getKey(
-            const ColumnRawPtrs & /*key_columns*/,
-            size_t /*keys_size*/,         /// Number of key columns.
-            size_t i,                     /// From which row of the block, get the key.
-            const Sizes & /*key_sizes*/,  /// If the keys of a fixed length - their lengths. It is not used in aggregation methods for variable length keys.
-            StringRefs & /*keys*/,        /// Here references to key data in columns can be written. They can be used in the future.
-            Arena & /*pool*/) const
-        {
-            return unalignedLoad<FieldType>(vec + i * sizeof(FieldType));
-        }
-    };
-
-    /// From the value in the hash table, get AggregateDataPtr.
-    static AggregateDataPtr & getAggregateData(Mapped & value)                { return value; }
-    static const AggregateDataPtr & getAggregateData(const Mapped & value)    { return value; }
-
-    /** Place additional data, if necessary, in case a new key was inserted into the hash table.
-      */
-    static ALWAYS_INLINE void onNewKey(typename Data::value_type & /*value*/, size_t /*keys_size*/, StringRefs & /*keys*/, Arena & /*pool*/)
-    {
-    }
-
-    /** The action to be taken if the key is not new. For example, roll back the memory allocation in the pool.
-      */
-    static ALWAYS_INLINE void onExistingKey(const Key & /*key*/, StringRefs & /*keys*/, Arena & /*pool*/) {}
-
-    /** Do not use optimization for consecutive keys.
-      */
-    static const bool no_consecutive_keys_optimization = false;
    /// Use optimization for low cardinality.
    static const bool low_cardinality_optimization = false;

-    /** Insert the key from the hash table into columns.
-      */
+    // Insert the key from the hash table into columns.
    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/)
    {
        static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.first));
    }
-
-    /// Get StringRef from value which can be inserted into column.
-    static StringRef getValueRef(const typename Data::value_type & value)
-    {
-        return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
-    }
-
-    static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; }
 };


@ -248,58 +188,14 @@ struct AggregationMethodString
    template <typename Other>
    AggregationMethodString(const Other & other) : data(other.data) {}

-    struct State
-    {
-        const IColumn::Offset * offsets;
-        const UInt8 * chars;
+    using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;

-        void init(ColumnRawPtrs & key_columns)
-        {
-            const IColumn & column = *key_columns[0];
-            const ColumnString & column_string = static_cast<const ColumnString &>(column);
-            offsets = column_string.getOffsets().data();
-            chars = column_string.getChars().data();
-        }
-
-        ALWAYS_INLINE Key getKey(
-            const ColumnRawPtrs & /*key_columns*/,
-            size_t /*keys_size*/,
-            ssize_t i,
-            const Sizes & /*key_sizes*/,
-            StringRefs & /*keys*/,
-            Arena & /*pool*/) const
-        {
-            return StringRef(
-                chars + offsets[i - 1],
-                offsets[i] - offsets[i - 1] - 1);
-        }
-    };
-
-    static AggregateDataPtr & getAggregateData(Mapped & value)                { return value; }
-    static const AggregateDataPtr & getAggregateData(const Mapped & value)    { return value; }
-
-    static ALWAYS_INLINE void onNewKey(typename Data::value_type & value, size_t /*keys_size*/, StringRefs & /*keys*/, Arena & pool)
-    {
-        if (value.first.size)
-            value.first.data = pool.insert(value.first.data, value.first.size);
-    }
-
-    static ALWAYS_INLINE void onExistingKey(const Key & /*key*/, StringRefs & /*keys*/, Arena & /*pool*/) {}
-
-    static const bool no_consecutive_keys_optimization = false;
    static const bool low_cardinality_optimization = false;

-    static StringRef getValueRef(const typename Data::value_type & value)
-    {
-        return StringRef(value.first.data, value.first.size);
-    }
-
    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
    {
        key_columns[0]->insertData(value.first.data, value.first.size);
    }
-
-    static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; }
 };


@ -320,101 +216,14 @@ struct AggregationMethodFixedString
    template <typename Other>
    AggregationMethodFixedString(const Other & other) : data(other.data) {}

-    struct State
-    {
-        size_t n;
-        const ColumnFixedString::Chars * chars;
+    using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped>;

-        void init(ColumnRawPtrs & key_columns)
-        {
-            const IColumn & column = *key_columns[0];
-            const ColumnFixedString & column_string = static_cast<const ColumnFixedString &>(column);
-            n = column_string.getN();
-            chars = &column_string.getChars();
-        }
-
-        ALWAYS_INLINE Key getKey(
-            const ColumnRawPtrs &,
-            size_t,
-            size_t i,
-            const Sizes &,
-            StringRefs &,
-            Arena &) const
-        {
-            return StringRef(&(*chars)[i * n], n);
-        }
-    };
-
-    static AggregateDataPtr & getAggregateData(Mapped & value)                { return value; }
-    static const AggregateDataPtr & getAggregateData(const Mapped & value)    { return value; }
-
-    static ALWAYS_INLINE void onNewKey(typename Data::value_type & value, size_t, StringRefs &, Arena & pool)
-    {
-        value.first.data = pool.insert(value.first.data, value.first.size);
-    }
-
-    static ALWAYS_INLINE void onExistingKey(const Key &, StringRefs &, Arena &) {}
-
-    static const bool no_consecutive_keys_optimization = false;
    static const bool low_cardinality_optimization = false;

-    static StringRef getValueRef(const typename Data::value_type & value)
-    {
-        return StringRef(value.first.data, value.first.size);
-    }
-
    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
    {
        key_columns[0]->insertData(value.first.data, value.first.size);
    }
-
-    static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; }
-};
-
-/// Cache stores dictionaries and saved_hash per dictionary key.
-class LowCardinalityDictionaryCache : public AggregationStateCache
-{
-public:
-    /// Will assume that dictionaries with same hash has the same keys.
-    /// Just in case, check that they have also the same size.
-    struct DictionaryKey
-    {
-        UInt128 hash;
-        UInt64 size;
-
-        bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; }
-    };
-
-    struct DictionaryKeyHash
-    {
-        size_t operator()(const DictionaryKey & key) const
-        {
-            SipHash hash;
-            hash.update(key.hash.low);
-            hash.update(key.hash.high);
-            hash.update(key.size);
-            return hash.get64();
-        }
-    };
-
-    struct CachedValues
-    {
-        /// Store ptr to dictionary to be sure it won't be deleted.
-        ColumnPtr dictionary_holder;
-        /// Hashes for dictionary keys.
-        const UInt64 * saved_hash = nullptr;
-    };
-
-    using CachedValuesPtr = std::shared_ptr<CachedValues>;
-
-    explicit LowCardinalityDictionaryCache(const AggregationStateCache::Settings & settings) : cache(settings.max_threads) {}
-
-    CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); }
-    void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); }
-
-private:
-    using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>;
-    Cache cache;
 };

 /// Single low cardinality column.
@ -432,342 +241,23 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod

    using Base::data;

-    static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & settings)
-    {
-        return std::make_shared<LowCardinalityDictionaryCache>(settings);
-    }
-
    AggregationMethodSingleLowCardinalityColumn() = default;

    template <typename Other>
    explicit AggregationMethodSingleLowCardinalityColumn(const Other & other) : Base(other) {}

-    struct State : public BaseState
-    {
-        ColumnRawPtrs key_columns;
-        const IColumn * positions = nullptr;
-        size_t size_of_index_type = 0;
+    using State = ColumnsHashing::HashMethodSingleLowCardinalityColumn<BaseState, Mapped, true>;

-        /// saved hash is from current column or from cache.
-        const UInt64 * saved_hash = nullptr;
-        /// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted.
-        ColumnPtr dictionary_holder;
-
-        /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages.
-        PaddedPODArray<AggregateDataPtr> aggregate_data_cache;
-
-        /// If initialized column is nullable.
-        bool is_nullable = false;
-
-        void init(ColumnRawPtrs &)
-        {
-            throw Exception("Expected cache for AggregationMethodSingleLowCardinalityColumn::init", ErrorCodes::LOGICAL_ERROR);
-        }
-
-        void init(ColumnRawPtrs & key_columns_low_cardinality, const AggregationStateCachePtr & cache_ptr)
-        {
-            auto column = typeid_cast<const ColumnLowCardinality *>(key_columns_low_cardinality[0]);
-            if (!column)
-                throw Exception("Invalid aggregation key type for AggregationMethodSingleLowCardinalityColumn method. "
-                                "Excepted LowCardinality, got " + key_columns_low_cardinality[0]->getName(), ErrorCodes::LOGICAL_ERROR);
-
-            if (!cache_ptr)
-                throw Exception("Cache wasn't created for AggregationMethodSingleLowCardinalityColumn", ErrorCodes::LOGICAL_ERROR);
-
-            auto cache = typeid_cast<LowCardinalityDictionaryCache *>(cache_ptr.get());
-            if (!cache)
-            {
-                const auto & cached_val = *cache_ptr;
-                throw Exception("Invalid type for AggregationMethodSingleLowCardinalityColumn cache: "
-                                + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR);
-            }
-
-            auto * dict = column->getDictionary().getNestedNotNullableColumn().get();
-            is_nullable = column->getDictionary().nestedColumnIsNullable();
-            key_columns = {dict};
-            bool is_shared_dict = column->isSharedDictionary();
-
-            typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key;
-            typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values;
-
-            if (is_shared_dict)
-            {
-                dictionary_key = {column->getDictionary().getHash(), dict->size()};
-                cached_values = cache->get(dictionary_key);
-            }
-
-            if (cached_values)
-            {
-                saved_hash = cached_values->saved_hash;
-                dictionary_holder = cached_values->dictionary_holder;
-            }
-            else
-            {
-                saved_hash = column->getDictionary().tryGetSavedHash();
-                dictionary_holder = column->getDictionaryPtr();
-
-                if (is_shared_dict)
-                {
-                    cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>();
-                    cached_values->saved_hash = saved_hash;
-                    cached_values->dictionary_holder = dictionary_holder;
-
-                    cache->set(dictionary_key, cached_values);
-                }
-            }
-
-            AggregateDataPtr default_data = nullptr;
-            aggregate_data_cache.assign(key_columns[0]->size(), default_data);
-
-            size_of_index_type = column->getSizeOfIndexType();
-            positions = column->getIndexesPtr().get();
-
-            BaseState::init(key_columns);
-        }
-
-        ALWAYS_INLINE size_t getIndexAt(size_t row) const
-        {
-            switch (size_of_index_type)
-            {
-                case sizeof(UInt8): return static_cast<const ColumnUInt8 *>(positions)->getElement(row);
-                case sizeof(UInt16): return static_cast<const ColumnUInt16 *>(positions)->getElement(row);
-                case sizeof(UInt32): return static_cast<const ColumnUInt32 *>(positions)->getElement(row);
-                case sizeof(UInt64): return static_cast<const ColumnUInt64 *>(positions)->getElement(row);
-                default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
-            }
-        }
-
-        /// Get the key from the key columns for insertion into the hash table.
-        ALWAYS_INLINE Key getKey(
-            const ColumnRawPtrs & /*key_columns*/,
-            size_t /*keys_size*/,
-            size_t i,
-            const Sizes & key_sizes,
-            StringRefs & keys,
-            Arena & pool) const
-        {
-            size_t row = getIndexAt(i);
-            return BaseState::getKey(key_columns, 1, row, key_sizes, keys, pool);
-        }
-
-        template <typename D>
-        ALWAYS_INLINE AggregateDataPtr * emplaceKeyFromRow(
-            D & data,
-            size_t i,
-            bool & inserted,
-            size_t keys_size,
-            StringRefs & keys,
-            Arena & pool)
-        {
-            size_t row = getIndexAt(i);
-
-            if (is_nullable && row == 0)
-            {
-                inserted = !data.hasNullKeyData();
-                data.hasNullKeyData() = true;
-                return &data.getNullKeyData();
-            }
-
-            if (aggregate_data_cache[row])
-            {
-                inserted = false;
-                return &aggregate_data_cache[row];
-            }
-            else
-            {
-                Sizes key_sizes;
-                auto key = getKey({}, 0, i, key_sizes, keys, pool);
-
-                typename D::iterator it;
-                if (saved_hash)
-                    data.emplace(key, it, inserted, saved_hash[row]);
-                else
-                    data.emplace(key, it, inserted);
-
-                if (inserted)
-                    Base::onNewKey(*it, keys_size, keys, pool);
-                else
-                    aggregate_data_cache[row] = Base::getAggregateData(it->second);
-
-                return &Base::getAggregateData(it->second);
-            }
-        }
-
-        ALWAYS_INLINE bool isNullAt(size_t i)
-        {
-            if (!is_nullable)
-                return false;
-
-            return getIndexAt(i) == 0;
-        }
-
-        ALWAYS_INLINE void cacheAggregateData(size_t i, AggregateDataPtr data)
-        {
-            size_t row = getIndexAt(i);
-            aggregate_data_cache[row] = data;
-        }
-
-        template <typename D>
-        ALWAYS_INLINE AggregateDataPtr * findFromRow(D & data, size_t i)
-        {
-            size_t row = getIndexAt(i);
-
-            if (is_nullable && row == 0)
-                return data.hasNullKeyData() ? &data.getNullKeyData() : nullptr;
-
-            if (!aggregate_data_cache[row])
-            {
-                Sizes key_sizes;
-                StringRefs keys;
-                Arena pool;
-                auto key = getKey({}, 0, i, key_sizes, keys, pool);
-
-                typename D::iterator it;
-                if (saved_hash)
-                    it = data.find(key, saved_hash[row]);
-                else
-                    it = data.find(key);
-
-                if (it != data.end())
-                    aggregate_data_cache[row] = Base::getAggregateData(it->second);
-            }
-            return &aggregate_data_cache[row];
-        }
-    };
-
-    static AggregateDataPtr & getAggregateData(Mapped & value)                { return Base::getAggregateData(value); }
-    static const AggregateDataPtr & getAggregateData(const Mapped & value)    { return Base::getAggregateData(value); }
-
-    static void onNewKey(typename Data::value_type & value, size_t keys_size, StringRefs & keys, Arena & pool)
-    {
-        return Base::onNewKey(value, keys_size, keys, pool);
-    }
-
-    static void onExistingKey(const Key & key, StringRefs & keys, Arena & pool)
-    {
-        return Base::onExistingKey(key, keys, pool);
-    }
-
-    static const bool no_consecutive_keys_optimization = true;
    static const bool low_cardinality_optimization = true;

    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns_low_cardinality, const Sizes & /*key_sizes*/)
    {
-        auto ref = Base::getValueRef(value);
+        auto ref = BaseState::getValueRef(value);
        static_cast<ColumnLowCardinality *>(key_columns_low_cardinality[0].get())->insertData(ref.data, ref.size);
    }
 };


-namespace aggregator_impl
-{
-
-/// This class is designed to provide the functionality that is required for
-/// supporting nullable keys in AggregationMethodKeysFixed. If there are
-/// no nullable keys, this class is merely implemented as an empty shell.
-template <typename Key, bool has_nullable_keys>
-class BaseStateKeysFixed;
-
-/// Case where nullable keys are supported.
-template <typename Key>
-class BaseStateKeysFixed<Key, true>
-{
-protected:
-    void init(const ColumnRawPtrs & key_columns)
-    {
-        null_maps.reserve(key_columns.size());
-        actual_columns.reserve(key_columns.size());
-
-        for (const auto & col : key_columns)
-        {
-            if (col->isColumnNullable())
-            {
-                const auto & nullable_col = static_cast<const ColumnNullable &>(*col);
-                actual_columns.push_back(&nullable_col.getNestedColumn());
-                null_maps.push_back(&nullable_col.getNullMapColumn());
-            }
-            else
-            {
-                actual_columns.push_back(col);
-                null_maps.push_back(nullptr);
-            }
-        }
-    }
-
-    /// Return the columns which actually contain the values of the keys.
-    /// For a given key column, if it is nullable, we return its nested
-    /// column. Otherwise we return the key column itself.
-    inline const ColumnRawPtrs & getActualColumns() const
-    {
-        return actual_columns;
-    }
-
-    /// Create a bitmap that indicates whether, for a particular row,
-    /// a key column bears a null value or not.
-    KeysNullMap<Key> createBitmap(size_t row) const
-    {
-        KeysNullMap<Key> bitmap{};
-
-        for (size_t k = 0; k < null_maps.size(); ++k)
-        {
-            if (null_maps[k] != nullptr)
-            {
-                const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
-                if (null_map[row] == 1)
-                {
-                    size_t bucket = k / 8;
-                    size_t offset = k % 8;
-                    bitmap[bucket] |= UInt8(1) << offset;
-                }
-            }
-        }
-
-        return bitmap;
-    }
-
-private:
-    ColumnRawPtrs actual_columns;
-    ColumnRawPtrs null_maps;
-};
-
-/// Case where nullable keys are not supported.
-template <typename Key>
-class BaseStateKeysFixed<Key, false>
-{
-protected:
-    void init(const ColumnRawPtrs &)
-    {
-        throw Exception{"Internal error: calling init() for non-nullable"
-            " keys is forbidden", ErrorCodes::LOGICAL_ERROR};
-    }
-
-    const ColumnRawPtrs & getActualColumns() const
-    {
-        throw Exception{"Internal error: calling getActualColumns() for non-nullable"
-            " keys is forbidden", ErrorCodes::LOGICAL_ERROR};
-    }
-
-    KeysNullMap<Key> createBitmap(size_t) const
-    {
-        throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
-            " is forbidden", ErrorCodes::LOGICAL_ERROR};
-    }
-};
-
-}
-
-// Oprional mask for low cardinality columns.
-template <bool has_low_cardinality>
-struct LowCardinalityKeys
-{
-    ColumnRawPtrs nested_columns;
-    ColumnRawPtrs positions;
-    Sizes position_sizes;
-};
-
-template <>
-struct LowCardinalityKeys<false> {};
-
 /// For the case where all keys are of fixed length, and they fit in N (for example, 128) bits.
 template <typename TData, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false>
 struct AggregationMethodKeysFixed
@ -787,71 +277,8 @@ struct AggregationMethodKeysFixed
    template <typename Other>
    AggregationMethodKeysFixed(const Other & other) : data(other.data) {}

-    class State final : private aggregator_impl::BaseStateKeysFixed<Key, has_nullable_keys>
-    {
-        LowCardinalityKeys<has_low_cardinality> low_cardinality_keys;
+    using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, Mapped, has_nullable_keys, has_low_cardinality>;

-    public:
-        using Base = aggregator_impl::BaseStateKeysFixed<Key, has_nullable_keys>;
-
-        void init(ColumnRawPtrs & key_columns)
-        {
-            if constexpr (has_low_cardinality)
-            {
-                low_cardinality_keys.nested_columns.resize(key_columns.size());
-                low_cardinality_keys.positions.assign(key_columns.size(), nullptr);
-                low_cardinality_keys.position_sizes.resize(key_columns.size());
-                for (size_t i = 0; i < key_columns.size(); ++i)
-                {
-                    if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
-                    {
-                        low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
-                        low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();
-                        low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType();
-                    }
-                    else
-                        low_cardinality_keys.nested_columns[i] = key_columns[i];
-                }
-            }
-
-            if (has_nullable_keys)
-                Base::init(key_columns);
-        }
-
-        ALWAYS_INLINE Key getKey(
-            const ColumnRawPtrs & key_columns,
-            size_t keys_size,
-            size_t i,
-            const Sizes & key_sizes,
-            StringRefs &,
-            Arena &) const
-        {
-            if (has_nullable_keys)
-            {
-                auto bitmap = Base::createBitmap(i);
-                return packFixed<Key>(i, keys_size, Base::getActualColumns(), key_sizes, bitmap);
-            }
-            else
-            {
-                if constexpr (has_low_cardinality)
-                    return packFixed<Key, true>(i, keys_size, low_cardinality_keys.nested_columns, key_sizes,
-                                                &low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
-
-                return packFixed<Key>(i, keys_size, key_columns, key_sizes);
-            }
-        }
-    };
-
-    static AggregateDataPtr & getAggregateData(Mapped & value)                { return value; }
-    static const AggregateDataPtr & getAggregateData(const Mapped & value)    { return value; }
-
-    static ALWAYS_INLINE void onNewKey(typename Data::value_type &, size_t, StringRefs &, Arena &)
-    {
-    }
-
-    static ALWAYS_INLINE void onExistingKey(const Key &, StringRefs &, Arena &) {}
-
-    static const bool no_consecutive_keys_optimization = false;
    static const bool low_cardinality_optimization = false;

    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & key_sizes)
@ -904,8 +331,6 @@ struct AggregationMethodKeysFixed
            }
        }
    }
-
-    static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; }
 };


@ -930,53 +355,24 @@ struct AggregationMethodSerialized
    template <typename Other>
    AggregationMethodSerialized(const Other & other) : data(other.data) {}

-    struct State
-    {
-        void init(ColumnRawPtrs &)
-        {
-        }
+    using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;

-        ALWAYS_INLINE Key getKey(
-            const ColumnRawPtrs & key_columns,
-            size_t keys_size,
-            size_t i,
-            const Sizes &,
-            StringRefs &,
-            Arena & pool) const
-        {
-            return serializeKeysToPoolContiguous(i, keys_size, key_columns, pool);
-        }
-    };
-
-    static AggregateDataPtr & getAggregateData(Mapped & value)                { return value; }
-    static const AggregateDataPtr & getAggregateData(const Mapped & value)    { return value; }
-
-    static ALWAYS_INLINE void onNewKey(typename Data::value_type &, size_t, StringRefs &, Arena &)
-    {
-    }
-
-    static ALWAYS_INLINE void onExistingKey(const Key & key, StringRefs &, Arena & pool)
-    {
-        pool.rollback(key.size);
-    }
-
-    /// If the key already was, it is removed from the pool (overwritten), and the next key can not be compared with it.
-    static const bool no_consecutive_keys_optimization = true;
    static const bool low_cardinality_optimization = false;

    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
    {
        auto pos = value.first.data;
-        for (size_t i = 0; i < key_columns.size(); ++i)
-            pos = key_columns[i]->deserializeAndInsertFromArena(pos);
+        for (auto & column : key_columns)
+            pos = column->deserializeAndInsertFromArena(pos);
    }
-
-    static AggregationStateCachePtr createCache(const AggregationStateCache::Settings & /*settings*/) { return nullptr; }
 };


 class Aggregator;

+using ColumnsHashing::HashMethodContext;
+using ColumnsHashing::HashMethodContextPtr;
+
 struct AggregatedDataVariants : private boost::noncopyable
 {
    /** Working with states of aggregate functions in the pool is arranged in the following (inconvenient) way:
@ -1298,7 +694,7 @@ struct AggregatedDataVariants : private boost::noncopyable
        }
    }

-    static AggregationStateCachePtr createCache(Type type, const AggregationStateCache::Settings & settings)
+    static HashMethodContextPtr createCache(Type type, const HashMethodContext::Settings & settings)
    {
        switch (type)
        {
@ -1309,7 +705,7 @@ struct AggregatedDataVariants : private boost::noncopyable
            { \
                using TPtr ## NAME = decltype(AggregatedDataVariants::NAME); \
                using T ## NAME = typename TPtr ## NAME ::element_type; \
-                return T ## NAME ::createCache(settings); \
+                return T ## NAME ::State::createContext(settings); \
            }

            APPLY_FOR_AGGREGATED_VARIANTS(M)
@ -1496,7 +892,7 @@ protected:
    AggregatedDataVariants::Type method_chosen;
    Sizes key_sizes;

-    AggregationStateCachePtr aggregation_state_cache;
+    HashMethodContextPtr aggregation_state_cache;

    AggregateFunctionsPlainPtrs aggregate_functions;

--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@ -170,18 +170,54 @@ static size_t getTotalByteCountImpl(const Maps & maps, Join::Type type)
 }


-template <Join::Type type>
-struct KeyGetterForType;
+template <Join::Type type, typename Value, typename Mapped>
+struct KeyGetterForTypeImpl;

-template <> struct KeyGetterForType<Join::Type::key8> { using Type = JoinKeyGetterOneNumber<UInt8>; };
-template <> struct KeyGetterForType<Join::Type::key16> { using Type = JoinKeyGetterOneNumber<UInt16>; };
-template <> struct KeyGetterForType<Join::Type::key32> { using Type = JoinKeyGetterOneNumber<UInt32>; };
-template <> struct KeyGetterForType<Join::Type::key64> { using Type = JoinKeyGetterOneNumber<UInt64>; };
-template <> struct KeyGetterForType<Join::Type::key_string> { using Type = JoinKeyGetterString; };
-template <> struct KeyGetterForType<Join::Type::key_fixed_string> { using Type = JoinKeyGetterFixedString; };
-template <> struct KeyGetterForType<Join::Type::keys128> { using Type = JoinKeyGetterFixed<UInt128>; };
-template <> struct KeyGetterForType<Join::Type::keys256> { using Type = JoinKeyGetterFixed<UInt256>; };
-template <> struct KeyGetterForType<Join::Type::hashed> { using Type = JoinKeyGetterHashed; };
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key8, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key16, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key32, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key64, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key_string, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::key_fixed_string, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::keys128, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::keys256, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt256, Mapped, false, false, false>;
+};
+template <typename Value, typename Mapped> struct KeyGetterForTypeImpl<Join::Type::hashed, Value, Mapped>
+{
+    using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false>;
+};
+
+template <Join::Type type, typename Data>
+struct KeyGetterForType
+{
+    using Value = typename Data::value_type;
+    using Mapped_t = typename Data::mapped_type;
+    using Mapped = std::conditional_t<std::is_const_v<Data>, const Mapped_t, Mapped_t>;
+    using Type = typename KeyGetterForTypeImpl<type, Value, Mapped>::Type;
+};


 /// Do I need to use the hash table maps_*_full, in which we remember whether the row was joined.
@ -316,42 +352,30 @@ namespace
    template <ASTTableJoin::Strictness STRICTNESS, typename Map, typename KeyGetter>
    struct Inserter
    {
-        static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool);
+        static void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool);
    };

    template <typename Map, typename KeyGetter>
    struct Inserter<ASTTableJoin::Strictness::Any, Map, KeyGetter>
    {
-        static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool)
+        static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
        {
-            typename Map::iterator it;
-            bool inserted;
-            map.emplace(key, it, inserted);
+            auto emplace_result = key_getter.emplaceKey(map, i, pool);

-            if (inserted)
-            {
-                KeyGetter::onNewKey(it->first, pool);
-                new (&it->second) typename Map::mapped_type(stored_block, i);
-            }
-            else if (it->second.overwrite)
-                new (&it->second) typename Map::mapped_type(stored_block, i);
+            if (emplace_result.isInserted() || emplace_result.getMapped().overwrite)
+                new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
        }
    };

    template <typename Map, typename KeyGetter>
    struct Inserter<ASTTableJoin::Strictness::All, Map, KeyGetter>
    {
-        static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool)
+        static ALWAYS_INLINE void insert(Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
        {
-            typename Map::iterator it;
-            bool inserted;
-            map.emplace(key, it, inserted);
+            auto emplace_result = key_getter.emplaceKey(map, i, pool);

-            if (inserted)
-            {
-                KeyGetter::onNewKey(it->first, pool);
-                new (&it->second) typename Map::mapped_type(stored_block, i);
-            }
+            if (emplace_result.isInserted())
+                new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
            else
            {
                /** The first element of the list is stored in the value of the hash table, the rest in the pool.
@ -359,9 +383,10 @@ namespace
                 * That is, the former second element, if it was, will be the third, and so on.
                 */
                auto elem = pool.alloc<typename Map::mapped_type>();
+                auto & mapped = emplace_result.getMapped();

-                elem->next = it->second.next;
-                it->second.next = elem;
+                elem->next = mapped.next;
+                mapped.next = elem;
                elem->block = stored_block;
                elem->row_num = i;
            }
@ -372,17 +397,16 @@ namespace
    template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
    void NO_INLINE insertFromBlockImplTypeCase(
        Map & map, size_t rows, const ColumnRawPtrs & key_columns,
-        size_t keys_size, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
+        const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
    {
-        KeyGetter key_getter(key_columns);
+        KeyGetter key_getter(key_columns, key_sizes, nullptr);

        for (size_t i = 0; i < rows; ++i)
        {
            if (has_null_map && (*null_map)[i])
                continue;

-            auto key = key_getter.getKey(key_columns, keys_size, i, key_sizes);
-            Inserter<STRICTNESS, Map, KeyGetter>::insert(map, key, stored_block, i, pool);
+            Inserter<STRICTNESS, Map, KeyGetter>::insert(map, key_getter, stored_block, i, pool);
        }
    }

@ -390,19 +414,19 @@ namespace
    template <ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
    void insertFromBlockImplType(
        Map & map, size_t rows, const ColumnRawPtrs & key_columns,
-        size_t keys_size, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
+        const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
    {
        if (null_map)
-            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(map, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
+            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(map, rows, key_columns, key_sizes, stored_block, null_map, pool);
        else
-            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(map, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
+            insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(map, rows, key_columns, key_sizes, stored_block, null_map, pool);
    }


    template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
    void insertFromBlockImpl(
        Join::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
-        size_t keys_size, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
+        const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
    {
        switch (type)
        {
@ -411,8 +435,8 @@ namespace

        #define M(TYPE) \
            case Join::Type::TYPE: \
-                insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\
-                    *maps.TYPE, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); \
+                insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<Join::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
+                    *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, pool); \
                    break;
            APPLY_FOR_JOIN_VARIANTS(M)
        #undef M
@ -499,7 +523,7 @@ bool Join::insertFromBlock(const Block & block)
    {
        dispatch([&](auto, auto strictness_, auto & map)
        {
-            insertFromBlockImpl<strictness_>(type, map, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
+            insertFromBlockImpl<strictness_>(type, map, rows, key_columns, key_sizes, stored_block, null_map, pool);
        });
    }

@ -515,14 +539,14 @@ namespace
    template <typename Map>
    struct Adder<true, ASTTableJoin::Strictness::Any, Map>
    {
-        static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns,
+        static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns,
            size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/,
            const std::vector<size_t> & right_indexes)
        {
            filter[i] = 1;

            for (size_t j = 0; j < num_columns_to_add; ++j)
-                added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num);
+                added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num);
        }

        static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns,
@ -538,14 +562,14 @@ namespace
    template <typename Map>
    struct Adder<false, ASTTableJoin::Strictness::Any, Map>
    {
-        static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns,
+        static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns,
            size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/,
            const std::vector<size_t> & right_indexes)
        {
            filter[i] = 1;

            for (size_t j = 0; j < num_columns_to_add; ++j)
-                added_columns[j]->insertFrom(*it->second.block->getByPosition(right_indexes[j]).column.get(), it->second.row_num);
+                added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num);
        }

        static void addNotFound(size_t /*num_columns_to_add*/, MutableColumns & /*added_columns*/,
@ -558,14 +582,14 @@ namespace
    template <bool fill_left, typename Map>
    struct Adder<fill_left, ASTTableJoin::Strictness::All, Map>
    {
-        static void addFound(const typename Map::const_iterator & it, size_t num_columns_to_add, MutableColumns & added_columns,
+        static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns,
            size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets,
            const std::vector<size_t> & right_indexes)
        {
            filter[i] = 1;

            size_t rows_joined = 0;
-            for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(it->second); current != nullptr; current = current->next)
+            for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(mapped); current != nullptr; current = current->next)
            {
                for (size_t j = 0; j < num_columns_to_add; ++j)
                    added_columns[j]->insertFrom(*current->block->getByPosition(right_indexes[j]).column.get(), current->row_num);
@ -605,10 +629,10 @@ namespace
        const std::vector<size_t> & right_indexes)
    {
        IColumn::Offset current_offset = 0;
-        size_t keys_size = key_columns.size();
        size_t num_columns_to_add = right_indexes.size();

-        KeyGetter key_getter(key_columns);
+        Arena pool;
+        KeyGetter key_getter(key_columns, key_sizes, nullptr);

        for (size_t i = 0; i < rows; ++i)
        {
@ -619,14 +643,14 @@ namespace
            }
            else
            {
-                auto key = key_getter.getKey(key_columns, keys_size, i, key_sizes);
-                typename Map::const_iterator it = map.find(key);
+                auto find_result = key_getter.findKey(map, i, pool);

-                if (it != map.end())
+                if (find_result.isFound())
                {
-                    it->second.setUsed();
+                    auto & mapped = find_result.getMapped();
+                    mapped.setUsed();
                    Adder<Join::KindTrait<KIND>::fill_left, STRICTNESS, Map>::addFound(
-                        it, num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get(), right_indexes);
+                        mapped, num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get(), right_indexes);
                }
                else
                    Adder<Join::KindTrait<KIND>::fill_left, STRICTNESS, Map>::addNotFound(
@ -753,7 +777,7 @@ void Join::joinBlockImpl(
    #define M(TYPE) \
        case Join::Type::TYPE: \
            std::tie(filter, offsets_to_replicate) = \
-                joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\
+                joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type>(\
                *maps_.TYPE, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes); \
            break;
        APPLY_FOR_JOIN_VARIANTS(M)
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@ -8,6 +8,7 @@
 #include <Interpreters/SettingsCommon.h>

 #include <Common/Arena.h>
+#include <Common/ColumnsHashing.h>
 #include <Common/HashTable/HashMap.h>

 #include <Columns/ColumnString.h>
@ -21,148 +22,6 @@

 namespace DB
 {
-
-/// Helpers to obtain keys (to use in a hash table or similar data structure) for various equi-JOINs.
-
-/// UInt8/16/32/64 or another types with same number of bits.
-template <typename FieldType>
-struct JoinKeyGetterOneNumber
-{
-    using Key = FieldType;
-
-    const char * vec;
-
-    /** Created before processing of each block.
-      * Initialize some members, used in another methods, called in inner loops.
-      */
-    JoinKeyGetterOneNumber(const ColumnRawPtrs & key_columns)
-    {
-        vec = key_columns[0]->getRawData().data;
-    }
-
-    Key getKey(
-        const ColumnRawPtrs & /*key_columns*/,
-        size_t /*keys_size*/,                 /// number of key columns.
-        size_t i,                             /// row number to get key from.
-        const Sizes & /*key_sizes*/) const    /// If keys are of fixed size - their sizes. Not used for methods with variable-length keys.
-    {
-        return unalignedLoad<FieldType>(vec + i * sizeof(FieldType));
-    }
-
-    /// Place additional data into memory pool, if needed, when new key was inserted into hash table.
-    static void onNewKey(Key & /*key*/, Arena & /*pool*/) {}
-};
-
-/// For single String key.
-struct JoinKeyGetterString
-{
-    using Key = StringRef;
-
-    const IColumn::Offset * offsets;
-    const UInt8 * chars;
-
-    JoinKeyGetterString(const ColumnRawPtrs & key_columns)
-    {
-        const IColumn & column = *key_columns[0];
-        const ColumnString & column_string = static_cast<const ColumnString &>(column);
-        offsets = column_string.getOffsets().data();
-        chars = column_string.getChars().data();
-    }
-
-    Key getKey(
-        const ColumnRawPtrs &,
-        size_t,
-        ssize_t i,
-        const Sizes &) const
-    {
-        return StringRef(
-            chars + offsets[i - 1],
-            offsets[i] - offsets[i - 1] - 1);
-    }
-
-    static void onNewKey(Key & key, Arena & pool)
-    {
-        if (key.size)
-            key.data = pool.insert(key.data, key.size);
-    }
-};
-
-/// For single FixedString key.
-struct JoinKeyGetterFixedString
-{
-    using Key = StringRef;
-
-    size_t n;
-    const ColumnFixedString::Chars * chars;
-
-    JoinKeyGetterFixedString(const ColumnRawPtrs & key_columns)
-    {
-        const IColumn & column = *key_columns[0];
-        const ColumnFixedString & column_string = static_cast<const ColumnFixedString &>(column);
-        n = column_string.getN();
-        chars = &column_string.getChars();
-    }
-
-    Key getKey(
-        const ColumnRawPtrs &,
-        size_t,
-        size_t i,
-        const Sizes &) const
-    {
-        return StringRef(&(*chars)[i * n], n);
-    }
-
-    static void onNewKey(Key & key, Arena & pool)
-    {
-        key.data = pool.insert(key.data, key.size);
-    }
-};
-
-/// For keys of fixed size, that could be packed in sizeof TKey width.
-template <typename TKey>
-struct JoinKeyGetterFixed
-{
-    using Key = TKey;
-
-    JoinKeyGetterFixed(const ColumnRawPtrs &)
-    {
-    }
-
-    Key getKey(
-        const ColumnRawPtrs & key_columns,
-        size_t keys_size,
-        size_t i,
-        const Sizes & key_sizes) const
-    {
-        return packFixed<Key>(i, keys_size, key_columns, key_sizes);
-    }
-
-    static void onNewKey(Key &, Arena &) {}
-};
-
-/// Generic method, use crypto hash function.
-struct JoinKeyGetterHashed
-{
-    using Key = UInt128;
-
-    JoinKeyGetterHashed(const ColumnRawPtrs &)
-    {
-    }
-
-    Key getKey(
-        const ColumnRawPtrs & key_columns,
-        size_t keys_size,
-        size_t i,
-        const Sizes &) const
-    {
-        return hash128(i, keys_size, key_columns);
-    }
-
-    static void onNewKey(Key &, Arena &) {}
-};
-
-
-
 /** Data structure for implementation of JOIN.
  * It is just a hash table: keys -> rows of joined ("right") table.
  * Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys.
--- a/dbms/src/Interpreters/Set.cpp
+++ b/dbms/src/Interpreters/Set.cpp
@ -75,30 +75,22 @@ void NO_INLINE Set::insertFromBlockImplCase(
    const ColumnRawPtrs & key_columns,
    size_t rows,
    SetVariants & variants,
-    ConstNullMapPtr null_map,
-    ColumnUInt8::Container * out_filter)
+    [[maybe_unused]] ConstNullMapPtr null_map,
+    [[maybe_unused]] ColumnUInt8::Container * out_filter)
 {
-    typename Method::State state;
-    state.init(key_columns);
+    typename Method::State state(key_columns, key_sizes, nullptr);

    /// For all rows
    for (size_t i = 0; i < rows; ++i)
    {
-        if (has_null_map && (*null_map)[i])
+        if constexpr (has_null_map)
+            if ((*null_map)[i])
                continue;

-        /// Obtain a key to insert to the set
-        typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes);
+        [[maybe_unused]] auto emplace_result = state.emplaceKey(method.data, i, variants.string_pool);

-        typename Method::Data::iterator it;
-        bool inserted;
-        method.data.emplace(key, it, inserted);
-
-        if (inserted)
-            method.onNewKey(*it, keys_size, variants.string_pool);
-
-        if (build_filter)
-            (*out_filter)[i] = inserted;
+        if constexpr (build_filter)
+            (*out_filter)[i] = emplace_result.isInserted();
    }
 }

@ -392,10 +384,10 @@ void NO_INLINE Set::executeImplCase(
    size_t rows,
    ConstNullMapPtr null_map) const
 {
-    typename Method::State state;
-    state.init(key_columns);
+    Arena pool;
+    typename Method::State state(key_columns, key_sizes, nullptr);

-    /// NOTE Optimization is not used for consecutive identical values.
+    /// NOTE Optimization is not used for consecutive identical strings.

    /// For all rows
    for (size_t i = 0; i < rows; ++i)
@ -404,9 +396,8 @@ void NO_INLINE Set::executeImplCase(
            vec_res[i] = negative;
        else
        {
-            /// Build the key
-            typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes);
-            vec_res[i] = negative ^ method.data.has(key);
+            auto find_result = state.findKey(method.data, i, pool);
+            vec_res[i] = negative ^ find_result.isFound();
        }
    }
 }
--- a/dbms/src/Interpreters/SetVariants.h
+++ b/dbms/src/Interpreters/SetVariants.h
@ -3,6 +3,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
 #include <Interpreters/AggregationCommon.h>
+#include <Common/ColumnsHashing.h>

 #include <Common/Arena.h>
 #include <Common/HashTable/HashSet.h>
@ -27,33 +28,7 @@ struct SetMethodOneNumber

    Data data;

-    /// To use one `Method` in different threads, use different `State`.
-    struct State
-    {
-        const char * vec;
-
-        /** Called at the start of each block processing.
-          * Sets the variables required for the other methods called in inner loops.
-          */
-        void init(const ColumnRawPtrs & key_columns)
-        {
-           vec = key_columns[0]->getRawData().data;
-        }
-
-        /// Get key from key columns for insertion into hash table.
-        Key getKey(
-            const ColumnRawPtrs & /*key_columns*/,
-            size_t /*keys_size*/,                 /// Number of key columns.
-            size_t i,                             /// From what row of the block I get the key.
-            const Sizes & /*key_sizes*/) const    /// If keys of a fixed length - their lengths. Not used in methods for variable length keys.
-        {
-            return unalignedLoad<FieldType>(vec + i * sizeof(FieldType));
-        }
-    };
-
-    /** Place additional data, if necessary, in case a new key was inserted into the hash table.
-      */
-    static void onNewKey(typename Data::value_type & /*value*/, size_t /*keys_size*/, Arena & /*pool*/) {}
+    using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type, void, FieldType>;
 };

 /// For the case where there is one string key.
@ -65,36 +40,7 @@ struct SetMethodString

    Data data;

-    struct State
-    {
-        const IColumn::Offset * offsets;
-        const UInt8 * chars;
-
-        void init(const ColumnRawPtrs & key_columns)
-        {
-            const IColumn & column = *key_columns[0];
-            const ColumnString & column_string = static_cast<const ColumnString &>(column);
-            offsets = column_string.getOffsets().data();
-            chars = column_string.getChars().data();
-        }
-
-        Key getKey(
-            const ColumnRawPtrs &,
-            size_t,
-            ssize_t i,
-            const Sizes &) const
-        {
-            return StringRef(
-                chars + offsets[i - 1],
-                offsets[i] - offsets[i - 1] - 1);
-        }
-    };
-
-    static void onNewKey(typename Data::value_type & value, size_t, Arena & pool)
-    {
-        if (value.size)
-            value.data = pool.insert(value.data, value.size);
-    }
+    using State = ColumnsHashing::HashMethodString<typename Data::value_type, void, true, false>;
 };

 /// For the case when there is one fixed-length string key.
@ -106,33 +52,7 @@ struct SetMethodFixedString

    Data data;

-    struct State
-    {
-        size_t n;
-        const ColumnFixedString::Chars * chars;
-
-        void init(const ColumnRawPtrs & key_columns)
-        {
-            const IColumn & column = *key_columns[0];
-            const ColumnFixedString & column_string = static_cast<const ColumnFixedString &>(column);
-            n = column_string.getN();
-            chars = &column_string.getChars();
-        }
-
-        Key getKey(
-            const ColumnRawPtrs &,
-            size_t,
-            size_t i,
-            const Sizes &) const
-        {
-            return StringRef(&(*chars)[i * n], n);
-        }
-    };
-
-    static void onNewKey(typename Data::value_type & value, size_t, Arena & pool)
-    {
-        value.data = pool.insert(value.data, value.size);
-    }
+    using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, void, true, false>;
 };

 namespace set_impl
@ -242,34 +162,7 @@ struct SetMethodKeysFixed

    Data data;

-    class State : private set_impl::BaseStateKeysFixed<Key, has_nullable_keys>
-    {
-    public:
-        using Base = set_impl::BaseStateKeysFixed<Key, has_nullable_keys>;
-
-        void init(const ColumnRawPtrs & key_columns)
-        {
-            if (has_nullable_keys)
-                Base::init(key_columns);
-        }
-
-        Key getKey(
-            const ColumnRawPtrs & key_columns,
-            size_t keys_size,
-            size_t i,
-            const Sizes & key_sizes) const
-        {
-            if (has_nullable_keys)
-            {
-                auto bitmap = Base::createBitmap(i);
-                return packFixed<Key>(i, keys_size, Base::getActualColumns(), key_sizes, bitmap);
-            }
-            else
-                return packFixed<Key>(i, keys_size, key_columns, key_sizes);
-        }
-    };
-
-    static void onNewKey(typename Data::value_type &, size_t, Arena &) {}
+    using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, void, has_nullable_keys, false>;
 };

 /// For other cases. 128 bit hash from the key.
@ -281,23 +174,7 @@ struct SetMethodHashed

    Data data;

-    struct State
-    {
-        void init(const ColumnRawPtrs &)
-        {
-        }
-
-        Key getKey(
-            const ColumnRawPtrs & key_columns,
-            size_t keys_size,
-            size_t i,
-            const Sizes &) const
-        {
-            return hash128(i, keys_size, key_columns);
-        }
-    };
-
-    static void onNewKey(typename Data::value_type &, size_t, Arena &) {}
+    using State = ColumnsHashing::HashMethodHashed<typename Data::value_type, void>;
 };


--- a/dbms/src/Interpreters/SpecializedAggregator.h
+++ b/dbms/src/Interpreters/SpecializedAggregator.h
@ -107,11 +107,7 @@ void NO_INLINE Aggregator::executeSpecialized(
    bool no_more_keys,
    AggregateDataPtr overflow_row) const
 {
-    typename Method::State state;
-    if constexpr (Method::low_cardinality_optimization)
-        state.init(key_columns, aggregation_state_cache);
-    else
-        state.init(key_columns);
+    typename Method::State state(key_columns, key_sizes, aggregation_state_cache);

    if (!no_more_keys)
        executeSpecializedCase<false, Method, AggregateFunctionsList>(
@ -130,94 +126,48 @@ void NO_INLINE Aggregator::executeSpecializedCase(
    typename Method::State & state,
    Arena * aggregates_pool,
    size_t rows,
-    ColumnRawPtrs & key_columns,
+    ColumnRawPtrs & /*key_columns*/,
    AggregateColumns & aggregate_columns,
-    StringRefs & keys,
+    StringRefs & /*keys*/,
    AggregateDataPtr overflow_row) const
 {
    /// For all rows.
-    typename Method::Key prev_key{};
-    AggregateDataPtr value = nullptr;
    for (size_t i = 0; i < rows; ++i)
    {
-        bool inserted = false;            /// Inserted a new key, or was this key already?
-
-        /// Get the key to insert into the hash table.
-        typename Method::Key key;
-        if constexpr (!Method::low_cardinality_optimization)
-            key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *aggregates_pool);
-
-        AggregateDataPtr * aggregate_data = nullptr;
-        typename Method::iterator it; /// Is not used if Method::low_cardinality_optimization
+        AggregateDataPtr aggregate_data = nullptr;

        if (!no_more_keys)    /// Insert.
        {
-            /// Optimization for frequently repeating keys.
-            if (!Method::no_consecutive_keys_optimization)
-            {
-                if (i != 0 && key == prev_key)
-                {
-                    /// Add values into aggregate functions.
-                    AggregateFunctionsList::forEach(AggregateFunctionsUpdater(
-                        aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i, aggregates_pool));
+            auto emplace_result = state.emplaceKey(method.data, i, *aggregates_pool);

-                    method.onExistingKey(key, keys, *aggregates_pool);
-                    continue;
+            /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
+            if (emplace_result.isInserted())
+            {
+                /// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
+                emplace_result.setMapped(nullptr);
+
+                aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+                AggregateFunctionsList::forEach(AggregateFunctionsCreator(
+                    aggregate_functions, offsets_of_aggregate_states, aggregate_data));
+
+                emplace_result.setMapped(aggregate_data);
            }
            else
-                    prev_key = key;
-            }
-
-            if constexpr (Method::low_cardinality_optimization)
-                aggregate_data = state.emplaceKeyFromRow(method.data, i, inserted, params.keys_size, keys, *aggregates_pool);
-            else
-            {
-                method.data.emplace(key, it, inserted);
-                aggregate_data = &Method::getAggregateData(it->second);
-            }
+                aggregate_data = emplace_result.getMapped();
        }
        else
        {
            /// Add only if the key already exists.
-            if constexpr (Method::low_cardinality_optimization)
-                aggregate_data = state.findFromRow(method.data, i);
-            else
-            {
-                it = method.data.find(key);
-                if (method.data.end() != it)
-                    aggregate_data = &Method::getAggregateData(it->second);
-            }
+            auto find_result = state.findKey(method.data, i, *aggregates_pool);
+            if (find_result.isFound())
+                aggregate_data = find_result.getMapped();
        }

        /// If the key does not fit, and the data does not need to be aggregated in a separate row, then there's nothing to do.
        if (!aggregate_data && !overflow_row)
-        {
-            method.onExistingKey(key, keys, *aggregates_pool);
            continue;
-        }

-        /// If a new key is inserted, initialize the states of the aggregate functions, and possibly some stuff related to the key.
-        if (inserted)
-        {
-            *aggregate_data = nullptr;
-
-            if constexpr (!Method::low_cardinality_optimization)
-                method.onNewKey(*it, params.keys_size, keys, *aggregates_pool);
-
-            AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
-
-            AggregateFunctionsList::forEach(AggregateFunctionsCreator(
-                aggregate_functions, offsets_of_aggregate_states, place));
-
-            *aggregate_data = place;
-
-            if constexpr (Method::low_cardinality_optimization)
-                state.cacheAggregateData(i, place);
-        }
-        else
-            method.onExistingKey(key, keys, *aggregates_pool);
-
-        value = aggregate_data ? *aggregate_data : overflow_row;
+        auto value = aggregate_data ? aggregate_data : overflow_row;

        /// Add values into the aggregate functions.
        AggregateFunctionsList::forEach(AggregateFunctionsUpdater(
--- a/dbms/tests/performance/columns_hashing/columns_hashing.xml
+++ b/dbms/tests/performance/columns_hashing/columns_hashing.xml
@ -0,0 +1,48 @@
+<test>
+    <name>Benchmark</name>
+
+    <tags>
+        <tag>columns_hashing</tag>
+    </tags>
+
+    <preconditions>
+        <table_exists>hits_100m_single</table_exists>
+        <table_exists>hits_1000m_single</table_exists>
+    </preconditions>
+
+    <type>loop</type>
+
+    <stop_conditions>
+        <all_of>
+            <iterations>5</iterations>
+            <min_time_not_changing_for_ms>60000</min_time_not_changing_for_ms>
+        </all_of>
+        <any_of>
+            <iterations>10</iterations>
+            <total_time_ms>150000</total_time_ms>
+        </any_of>
+    </stop_conditions>
+
+<!--
+    <query><![CDATA[select count() from hits_100m_single any left join  hits_100m_single using (UserID, RegionID)]]></query>
+    <query><![CDATA[select count() from hits_100m_single any left join  hits_100m_single using (UserID)]]></query>
+    <query><![CDATA[select count() from hits_100m_single any left join  hits_100m_single using URL where URL != '']]></query>
+    <query><![CDATA[select count() from hits_1000m_single any left join  hits_1000m_single using MobilePhoneModel where MobilePhoneModel != '']]></query>
+    <query><![CDATA[select count() from hits_100m_single any left join  hits_100m_single using (MobilePhoneModel, UserID) where MobilePhoneModel != '']]></query>
+
+    <query><![CDATA[select count() from (select count() from hits_1000m_single group by (UserID))]]></query>
+    <query><![CDATA[select count() from (select count() from hits_100m_single group by (UserID, RegionID))]]></query>
+    <query><![CDATA[select count() from (select count() from hits_100m_single where URL != '' group by URL)]]></query>
+    <query><![CDATA[select count() from (select count() from hits_1000m_single where MobilePhoneModel != '' group by MobilePhoneModel)]]></query>
+    <query><![CDATA[select count() from (select count() from hits_1000m_single  where MobilePhoneModel != '' group by (MobilePhoneModel, UserID))]]></query>
+-->
+    <query><![CDATA[select sum(UserID + 1 in (select UserID from hits_100m_single)) from hits_100m_single]]></query>
+    <query><![CDATA[select sum((UserID + 1, RegionID) in (select UserID, RegionID from hits_100m_single)) from hits_100m_single]]></query>
+    <query><![CDATA[select sum(URL in (select URL from hits_100m where URL != '')) from hits_100m_single]]></query>
+    <query><![CDATA[select sum(MobilePhoneModel in (select MobilePhoneModel from hits_1000m where MobilePhoneModel != '')) from hits_1000m_single]]></query>
+    <query><![CDATA[select sum((MobilePhoneModel, UserID + 1) in (select MobilePhoneModel, UserID from hits_1000m where MobilePhoneModel != '')) from hits_1000m_single]]></query>
+
+    <main_metric>
+        <min_time/>
+    </main_metric>
+</test>