Merge branch 'master' of github.com:yandex/ClickHouse

2024-11-24 00:22:29 +00:00 · 2019-03-04 18:35:18 +03:00 · 2019-03-04 18:35:18 +03:00 · 98d999f95f
commit 98d999f95f
parent 9ce36b963f ec97d0f51b
198 changed files with 2562 additions and 1208 deletions
--- a/cmake/use_libcxx.cmake
+++ b/cmake/use_libcxx.cmake
@ -24,7 +24,7 @@ if (OS_LINUX AND COMPILER_CLANG)
        endif ()

        if (LIBCXX_PATH)
-#            include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
+            include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
            link_directories ("${LIBCXX_PATH}/lib")
        endif ()
    endif ()
--- a/contrib/librdkafka-cmake/CMakeLists.txt
+++ b/contrib/librdkafka-cmake/CMakeLists.txt
@ -51,10 +51,6 @@ set(SRCS
  ${RDKAFKA_SOURCE_DIR}/snappy.c
  ${RDKAFKA_SOURCE_DIR}/tinycthread.c
  ${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
-  #${RDKAFKA_SOURCE_DIR}/xxhash.c
-  #${RDKAFKA_SOURCE_DIR}/lz4.c
-  #${RDKAFKA_SOURCE_DIR}/lz4frame.c
-  #${RDKAFKA_SOURCE_DIR}/lz4hc.c
  ${RDKAFKA_SOURCE_DIR}/rdgz.c
 )

--- a/dbms/programs/copier/ClusterCopier.cpp
+++ b/dbms/programs/copier/ClusterCopier.cpp
@ -33,6 +33,7 @@
 #include <Common/CurrentThread.h>
 #include <Common/escapeForFileName.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/ThreadStatus.h>
 #include <Client/Connection.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/Cluster.h>
@ -2121,6 +2122,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
 void ClusterCopierApp::mainImpl()
 {
    StatusFile status_file(process_path + "/status");
+    ThreadStatus thread_status;

    auto log = &logger();
    LOG_INFO(log, "Starting clickhouse-copier ("
--- a/dbms/programs/obfuscator/Obfuscator.cpp
+++ b/dbms/programs/obfuscator/Obfuscator.cpp
@ -577,7 +577,7 @@ public:
        {
            for (auto & elem : table)
            {
-                Histogram & histogram = elem.second;
+                Histogram & histogram = elem.getSecond();

                if (histogram.buckets.size() < params.num_buckets_cutoff)
                {
@ -591,7 +591,7 @@ public:
        {
            for (auto & elem : table)
            {
-                Histogram & histogram = elem.second;
+                Histogram & histogram = elem.getSecond();
                if (!histogram.total)
                    continue;

@ -623,7 +623,7 @@ public:
        {
            for (auto & elem : table)
            {
-                Histogram & histogram = elem.second;
+                Histogram & histogram = elem.getSecond();
                if (!histogram.total)
                    continue;

@ -639,7 +639,7 @@ public:
        {
            for (auto & elem : table)
            {
-                Histogram & histogram = elem.second;
+                Histogram & histogram = elem.getSecond();
                if (!histogram.total)
                    continue;

@ -674,7 +674,7 @@ public:
            while (true)
            {
                it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
-                if (table.end() != it && it->second.total + it->second.count_end != 0)
+                if (table.end() != it && it->getSecond().total + it->getSecond().count_end != 0)
                    break;

                if (context_size == 0)
@ -708,7 +708,7 @@ public:
            if (num_bytes_after_desired_size > 0)
                end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);

-            CodePoint code = it->second.sample(determinator, end_probability_multiplier);
+            CodePoint code = it->getSecond().sample(determinator, end_probability_multiplier);

            if (code == END)
                break;
--- a/dbms/programs/server/TCPHandler.cpp
+++ b/dbms/programs/server/TCPHandler.cpp
@ -724,7 +724,7 @@ bool TCPHandler::receiveData()
                query_context.addExternalTable(external_table_name, storage);
            }
            /// The data will be written directly to the table.
-            state.io.out = storage->write(ASTPtr(), query_context.getSettingsRef());
+            state.io.out = storage->write(ASTPtr(), query_context);
        }
        if (block)
            state.io.out->write(block);
--- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
@ -54,7 +54,7 @@ struct EntropyData
    void merge(const EntropyData & rhs)
    {
        for (const auto & pair : rhs.map)
-            map[pair.first] += pair.second;
+            map[pair.getFirst()] += pair.getSecond();
    }

    void serialize(WriteBuffer & buf) const
@ -68,7 +68,7 @@ struct EntropyData
        while (reader.next())
        {
            const auto & pair = reader.get();
-            map[pair.first] = pair.second;
+            map[pair.getFirst()] = pair.getSecond();
        }
    }

@ -76,12 +76,12 @@ struct EntropyData
    {
        UInt64 total_value = 0;
        for (const auto & pair : map)
-            total_value += pair.second;
+            total_value += pair.getSecond();

        Float64 shannon_entropy = 0;
        for (const auto & pair : map)
        {
-            Float64 frequency = Float64(pair.second) / total_value;
+            Float64 frequency = Float64(pair.getSecond()) / total_value;
            shannon_entropy -= frequency * log2(frequency);
        }

--- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
@ -94,7 +94,7 @@ public:

        size_t i = 0;
        for (auto it = set.begin(); it != set.end(); ++it, ++i)
-            data_to[old_size + i] = *it;
+            data_to[old_size + i] = it->getValue();
    }

    const char * getHeaderFilePath() const override { return __FILE__; }
@ -150,7 +150,7 @@ public:

        for (const auto & elem : set)
        {
-            writeStringBinary(elem, buf);
+            writeStringBinary(elem.getValue(), buf);
        }
    }

@ -185,7 +185,7 @@ public:
        else
        {
            if (inserted)
-                it->data = arena->insert(str_serialized.data, str_serialized.size);
+                it->getValueMutable().data = arena->insert(str_serialized.data, str_serialized.size);
        }
    }

@ -198,9 +198,9 @@ public:
        State::Set::iterator it;
        for (auto & rhs_elem : rhs_set)
        {
-            cur_set.emplace(rhs_elem, it, inserted);
-            if (inserted && it->size)
-                it->data = arena->insert(it->data, it->size);
+            cur_set.emplace(rhs_elem.getValue(), it, inserted);
+            if (inserted && it->getValue().size)
+                it->getValueMutable().data = arena->insert(it->getValue().data, it->getValue().size);
        }
    }

@ -215,7 +215,7 @@ public:

        for (auto & elem : set)
        {
-            deserializeAndInsert(elem, data_to);
+            deserializeAndInsert(elem.getValue(), data_to);
        }
    }

--- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h
@ -33,6 +33,8 @@ namespace ErrorCodes
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }

+template <typename> class QuantileTiming;
+

 /** Generic aggregate function for calculation of quantiles.
  * It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -82,6 +84,14 @@ public:
    {
        if (!returns_many && levels.size() > 1)
            throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        if constexpr (std::is_same_v<Data, QuantileTiming<Value>>)
+        {
+            /// QuantileTiming only supports integers (it works only for unsigned integers but signed are also accepted for convenience).
+            if (!isInteger(argument_type))
+                throw Exception("Argument for function " + std::string(Name::name) + " must be integer, but it has type "
+                    + argument_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
    }

    String getName() const override { return Name::name; }
--- a/dbms/src/AggregateFunctions/QuantileExactWeighted.h
+++ b/dbms/src/AggregateFunctions/QuantileExactWeighted.h
@ -48,7 +48,7 @@ struct QuantileExactWeighted
    void merge(const QuantileExactWeighted & rhs)
    {
        for (const auto & pair : rhs.map)
-            map[pair.first] += pair.second;
+            map[pair.getFirst()] += pair.getSecond();
    }

    void serialize(WriteBuffer & buf) const
@ -62,7 +62,7 @@ struct QuantileExactWeighted
        while (reader.next())
        {
            const auto & pair = reader.get();
-            map[pair.first] = pair.second;
+            map[pair.getFirst()] = pair.getSecond();
        }
    }

@ -83,12 +83,12 @@ struct QuantileExactWeighted
        UInt64 sum_weight = 0;
        for (const auto & pair : map)
        {
-            sum_weight += pair.second;
-            array[i] = pair;
+            sum_weight += pair.getSecond();
+            array[i] = pair.getValue();
            ++i;
        }

-        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
+        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.getFirst() < b.getFirst(); });

        UInt64 threshold = std::ceil(sum_weight * level);
        UInt64 accumulated = 0;
@ -97,7 +97,7 @@ struct QuantileExactWeighted
        const Pair * end = array + size;
        while (it < end)
        {
-            accumulated += it->second;
+            accumulated += it->getSecond();

            if (accumulated >= threshold)
                break;
@ -108,7 +108,7 @@ struct QuantileExactWeighted
        if (it == end)
            --it;

-        return it->first;
+        return it->getFirst();
    }

    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
@ -133,12 +133,12 @@ struct QuantileExactWeighted
        UInt64 sum_weight = 0;
        for (const auto & pair : map)
        {
-            sum_weight += pair.second;
-            array[i] = pair;
+            sum_weight += pair.getSecond();
+            array[i] = pair.getValue();
            ++i;
        }

-        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
+        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.getFirst() < b.getFirst(); });

        UInt64 accumulated = 0;

@ -150,11 +150,11 @@ struct QuantileExactWeighted

        while (it < end)
        {
-            accumulated += it->second;
+            accumulated += it->getSecond();

            while (accumulated >= threshold)
            {
-                result[indices[level_index]] = it->first;
+                result[indices[level_index]] = it->getFirst();
                ++level_index;

                if (level_index == num_levels)
@ -168,7 +168,7 @@ struct QuantileExactWeighted

        while (level_index < num_levels)
        {
-            result[indices[level_index]] = array[size - 1].first;
+            result[indices[level_index]] = array[size - 1].getFirst();
            ++level_index;
        }
    }
--- a/dbms/src/Columns/ColumnLowCardinality.cpp
+++ b/dbms/src/Columns/ColumnLowCardinality.cpp
@ -33,7 +33,7 @@ namespace

        data.resize(hash_map.size());
        for (auto val : hash_map)
-            data[val.second] = val.first;
+            data[val.getSecond()] = val.getFirst();

        for (auto & ind : index)
            ind = hash_map[ind];
--- a/dbms/src/Columns/ReverseIndex.h
+++ b/dbms/src/Columns/ReverseIndex.h
@ -414,7 +414,7 @@ UInt64 ReverseIndex<IndexType, ColumnType>::insert(const StringRef & data)
            column->popBack(1);
    }

-    return *iterator;
+    return iterator->getValue();
 }

 template <typename IndexType, typename ColumnType>
@ -429,7 +429,7 @@ UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPoint(const StringRef &
    auto hash = getHash(data);
    iterator = index->find(data, hash);

-    return iterator == index->end() ? size() + base_index : *iterator;
+    return iterator == index->end() ? size() + base_index : iterator->getValue();
 }

 }
--- a/dbms/src/Common/ColumnsHashing.h
+++ b/dbms/src/Common/ColumnsHashing.h
@ -56,7 +56,7 @@ struct HashMethodOneNumber
    /// Get StringRef from value which can be inserted into column.
    static StringRef getValueRef(const Value & value)
    {
-        return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
+        return StringRef(reinterpret_cast<const char *>(&value.getFirst()), sizeof(value.getFirst()));
    }
 };

@ -85,7 +85,7 @@ struct HashMethodString
        return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
    }

-    static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
+    static StringRef getValueRef(const Value & value) { return StringRef(value.getFirst().data, value.getFirst().size); }

 protected:
    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
@ -122,7 +122,7 @@ struct HashMethodFixedString

    StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); }

-    static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
+    static StringRef getValueRef(const Value & value) { return StringRef(value.getFirst().data, value.getFirst().size); }

 protected:
    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
@ -356,8 +356,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
        {
            if constexpr (has_mapped)
            {
-                new(&it->second) Mapped();
-                Base::onNewKey(it->first, pool);
+                new(&it->getSecond()) Mapped();
+                Base::onNewKey(it->getFirstMutable(), pool);
            }
            else
                Base::onNewKey(*it, pool);
@ -365,8 +365,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod

        if constexpr (has_mapped)
        {
-            mapped_cache[row] = it->second;
-            return EmplaceResult(it->second, mapped_cache[row], inserted);
+            mapped_cache[row] = it->getSecond();
+            return EmplaceResult(it->getSecond(), mapped_cache[row], inserted);
        }
        else
            return EmplaceResult(inserted);
--- a/dbms/src/Common/ColumnsHashingImpl.h
+++ b/dbms/src/Common/ColumnsHashingImpl.h
@ -39,7 +39,7 @@ struct LastElementCache
    bool check(const Value & value_) { return !empty && value == value_; }

    template <typename Key>
-    bool check(const Key & key) { return !empty && value.first == key; }
+    bool check(const Key & key) { return !empty && value.getFirst() == key; }
 };

 template <typename Data>
@ -147,9 +147,8 @@ protected:
            if constexpr (has_mapped)
            {
                /// Init PairNoInit elements.
-                cache.value.second = Mapped();
-                using Key = decltype(cache.value.first);
-                cache.value.first = Key();
+                cache.value.getSecond() = Mapped();
+                cache.value.getFirstMutable() = {};
            }
            else
                cache.value = Value();
@ -171,7 +170,7 @@ protected:
                static_cast<Derived &>(*this).onExistingKey(key, pool);

                if constexpr (has_mapped)
-                    return EmplaceResult(cache.value.second, cache.value.second, false);
+                    return EmplaceResult(cache.value.getSecond(), cache.value.getSecond(), false);
                else
                    return EmplaceResult(false);
            }
@ -183,33 +182,33 @@ protected:

        [[maybe_unused]] Mapped * cached = nullptr;
        if constexpr (has_mapped)
-            cached = &it->second;
+            cached = &it->getSecond();

        if (inserted)
        {
            if constexpr (has_mapped)
            {
-                new(&it->second) Mapped();
-                static_cast<Derived &>(*this).onNewKey(it->first, pool);
+                new(&it->getSecond()) Mapped();
+                static_cast<Derived &>(*this).onNewKey(it->getFirstMutable(), pool);
            }
            else
-                static_cast<Derived &>(*this).onNewKey(*it, pool);
+                static_cast<Derived &>(*this).onNewKey(it->getValueMutable(), pool);
        }
        else
            static_cast<Derived &>(*this).onExistingKey(key, pool);

        if constexpr (consecutive_keys_optimization)
        {
-            cache.value = *it;
+            cache.value = it->getValue();
            cache.found = true;
            cache.empty = false;

            if constexpr (has_mapped)
-                cached = &cache.value.second;
+                cached = &cache.value.getSecond();
        }

        if constexpr (has_mapped)
-            return EmplaceResult(it->second, *cached, inserted);
+            return EmplaceResult(it->getSecond(), *cached, inserted);
        else
            return EmplaceResult(inserted);
    }
@ -222,7 +221,7 @@ protected:
            if (cache.check(key))
            {
                if constexpr (has_mapped)
-                    return FindResult(&cache.value.second, cache.found);
+                    return FindResult(&cache.value.getSecond(), cache.found);
                else
                    return FindResult(cache.found);
            }
@ -237,18 +236,18 @@ protected:
            cache.empty = false;

            if (found)
-                cache.value = *it;
+                cache.value = it->getValue();
            else
            {
                if constexpr (has_mapped)
-                    cache.value.first = key;
+                    cache.value.getFirstMutable() = key;
                else
                    cache.value = key;
            }
        }

        if constexpr (has_mapped)
-            return FindResult(found ? &it->second : nullptr, found);
+            return FindResult(found ? &it->getSecond() : nullptr, found);
        else
            return FindResult(found);
    }
--- a/dbms/src/Common/CombinedCardinalityEstimator.h
+++ b/dbms/src/Common/CombinedCardinalityEstimator.h
@ -137,12 +137,12 @@ public:
        if (rhs.getContainerType() == details::ContainerType::SMALL)
        {
            for (const auto & x : rhs.small)
-                insert(x);
+                insert(x.getValue());
        }
        else if (rhs.getContainerType() == details::ContainerType::MEDIUM)
        {
            for (const auto & x : rhs.getContainer<Medium>())
-                insert(x);
+                insert(x.getValue());
        }
        else if (rhs.getContainerType() == details::ContainerType::LARGE)
            getContainer<Large>().merge(rhs.getContainer<Large>());
@ -234,7 +234,7 @@ private:
        auto tmp_medium = std::make_unique<Medium>();

        for (const auto & x : small)
-            tmp_medium->insert(x);
+            tmp_medium->insert(x.getValue());

        medium = tmp_medium.release();
        setContainerType(details::ContainerType::MEDIUM);
@ -253,12 +253,12 @@ private:
        if (container_type == details::ContainerType::SMALL)
        {
            for (const auto & x : small)
-                tmp_large->insert(x);
+                tmp_large->insert(x.getValue());
        }
        else if (container_type == details::ContainerType::MEDIUM)
        {
            for (const auto & x : getContainer<Medium>())
-                tmp_large->insert(x);
+                tmp_large->insert(x.getValue());

            destroy();
        }
--- a/dbms/src/Common/HashTable/ClearableHashMap.h
+++ b/dbms/src/Common/HashTable/ClearableHashMap.h
@ -37,9 +37,9 @@ public:
        this->emplace(x, it, inserted);

        if (inserted)
-            new(&it->second) mapped_type();
+            new(&it->getSecond()) mapped_type();

-        return it->second;
+        return it->getSecond();
    }

    void clear()
--- a/dbms/src/Common/HashTable/FixedClearableHashMap.h
+++ b/dbms/src/Common/HashTable/FixedClearableHashMap.h
@ -0,0 +1,73 @@
+#pragma once
+
+#include <Common/HashTable/ClearableHashMap.h>
+#include <Common/HashTable/FixedHashMap.h>
+
+
+template <typename Key, typename TMapped>
+struct FixedClearableHashMapCell
+{
+    using Mapped = TMapped;
+    using State = ClearableHashSetState;
+
+    using value_type = PairNoInit<Key, Mapped>;
+    UInt32 version;
+    Mapped mapped;
+
+    FixedClearableHashMapCell() {}
+    FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
+    FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
+
+    Mapped & getSecond() { return mapped; }
+    const Mapped & getSecond() const { return mapped; }
+    bool isZero(const State & state) const { return version != state.version; }
+    void setZero() { version = 0; }
+    static constexpr bool need_zero_value_storage = false;
+    void setMapped(const value_type & value) { mapped = value.getSecond(); }
+
+    struct CellExt
+    {
+        CellExt() {}
+        CellExt(Key && key_, FixedClearableHashMapCell * ptr_) : key(key_), ptr(ptr_) {}
+        void update(Key && key_, FixedClearableHashMapCell * ptr_)
+        {
+            key = key_;
+            ptr = ptr_;
+        }
+        Key key;
+        FixedClearableHashMapCell * ptr;
+        Key & getFirstMutable() { return key; }
+        const Key & getFirst() const { return key; }
+        Mapped & getSecond() { return ptr->mapped; }
+        const Mapped & getSecond() const { return *ptr->mapped; }
+        const value_type getValue() const { return {key, *ptr->mapped}; }
+    };
+};
+
+
+template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
+class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator>
+{
+public:
+    using key_type = Key;
+    using mapped_type = Mapped;
+    using value_type = typename FixedClearableHashMap::cell_type::value_type;
+
+    mapped_type & operator[](Key x)
+    {
+        typename FixedClearableHashMap::iterator it;
+        bool inserted;
+        this->emplace(x, it, inserted);
+
+        if (inserted)
+            new (&it->second) mapped_type();
+
+        return it->second;
+    }
+
+    void clear()
+    {
+        ++this->version;
+        this->m_size = 0;
+    }
+};
--- a/dbms/src/Common/HashTable/FixedClearableHashSet.h
+++ b/dbms/src/Common/HashTable/FixedClearableHashSet.h
@ -0,0 +1,45 @@
+#pragma once
+
+#include <Common/HashTable/ClearableHashSet.h>
+#include <Common/HashTable/FixedHashTable.h>
+
+
+template <typename Key>
+struct FixedClearableHashTableCell
+{
+    using State = ClearableHashSetState;
+
+    using value_type = Key;
+    UInt32 version;
+
+    FixedClearableHashTableCell() {}
+    FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
+
+    bool isZero(const State & state) const { return version != state.version; }
+    void setZero() { version = 0; }
+    static constexpr bool need_zero_value_storage = false;
+    void setMapped(const value_type & /*value*/) {}
+
+    struct CellExt
+    {
+        Key key;
+        value_type & getValueMutable() { return key; }
+        const value_type & getValue() const { return key; }
+        void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
+    };
+};
+
+
+template <typename Key, typename Allocator = HashTableAllocator>
+class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>
+{
+public:
+    using key_type = Key;
+    using value_type = typename FixedClearableHashSet::cell_type::value_type;
+
+    void clear()
+    {
+        ++this->version;
+        this->m_size = 0;
+    }
+};
--- a/dbms/src/Common/HashTable/FixedHashMap.h
+++ b/dbms/src/Common/HashTable/FixedHashMap.h
@ -0,0 +1,72 @@
+#pragma once
+
+#include <Common/HashTable/FixedHashTable.h>
+#include <Common/HashTable/HashMap.h>
+
+
+template <typename Key, typename TMapped, typename TState = HashTableNoState>
+struct FixedHashMapCell
+{
+    using Mapped = TMapped;
+    using State = TState;
+
+    using value_type = PairNoInit<Key, Mapped>;
+    bool full;
+    Mapped mapped;
+
+    FixedHashMapCell() {}
+    FixedHashMapCell(const Key &, const State &) : full(true) {}
+    FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
+
+    Mapped & getSecond() { return mapped; }
+    const Mapped & getSecond() const { return mapped; }
+    bool isZero(const State &) const { return !full; }
+    void setZero() { full = false; }
+    static constexpr bool need_zero_value_storage = false;
+    void setMapped(const value_type & value) { mapped = value.getSecond(); }
+
+    /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
+    ///  Note that we have to assemble a continuous layout for the value_type on each call of getValue().
+    struct CellExt
+    {
+        CellExt() {}
+        CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
+        void update(Key && key_, const FixedHashMapCell * ptr_)
+        {
+            key = key_;
+            ptr = const_cast<FixedHashMapCell *>(ptr_);
+        }
+        Key key;
+        FixedHashMapCell * ptr;
+
+        Key & getFirstMutable() { return key; }
+        const Key & getFirst() const { return key; }
+        Mapped & getSecond() { return ptr->mapped; }
+        const Mapped & getSecond() const { return ptr->mapped; }
+        const value_type getValue() const { return {key, ptr->mapped}; }
+    };
+};
+
+
+template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
+class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
+{
+public:
+    using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
+    using key_type = Key;
+    using mapped_type = Mapped;
+    using value_type = typename Base::cell_type::value_type;
+
+    using Base::Base;
+
+    mapped_type & ALWAYS_INLINE operator[](Key x)
+    {
+        typename Base::iterator it;
+        bool inserted;
+        this->emplace(x, it, inserted);
+        if (inserted)
+            new (&it->getSecond()) mapped_type();
+
+        return it->getSecond();
+    }
+};
--- a/dbms/src/Common/HashTable/FixedHashSet.h
+++ b/dbms/src/Common/HashTable/FixedHashSet.h
@ -0,0 +1,25 @@
+#pragma once
+
+#include <Common/HashTable/FixedHashTable.h>
+
+template <typename Key, typename Allocator = HashTableAllocator>
+class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
+{
+public:
+    using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>;
+    using Self = FixedHashSet;
+
+    void merge(const Self & rhs)
+    {
+        for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
+            if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
+                Base::buf[i] = rhs.buf[i];
+    }
+
+    /// NOTE: Currently this method isn't used. When it does, the ReadBuffer should
+    ///  contain the Key explicitly.
+    // void readAndMerge(DB::ReadBuffer & rb)
+    // {
+
+    // }
+};
--- a/dbms/src/Common/HashTable/FixedHashTable.h
+++ b/dbms/src/Common/HashTable/FixedHashTable.h
@ -0,0 +1,420 @@
+#pragma once
+
+#include <Common/HashTable/HashTable.h>
+
+template <typename Key, typename TState = HashTableNoState>
+struct FixedHashTableCell
+{
+    using State = TState;
+
+    using value_type = Key;
+    bool full;
+
+    FixedHashTableCell() {}
+    FixedHashTableCell(const Key &, const State &) : full(true) {}
+
+    bool isZero(const State &) const { return !full; }
+    void setZero() { full = false; }
+    static constexpr bool need_zero_value_storage = false;
+    void setMapped(const value_type & /*value*/) {}
+
+    /// This Cell is only stored inside an iterator. It's used to accomodate the fact
+    ///  that the iterator based API always provide a reference to a continuous memory
+    ///  containing the Key. As a result, we have to instantiate a real Key field.
+    /// All methods that return a mutable reference to the Key field are named with
+    ///  -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
+    ///  tables, it's totally fine to discard the Key mutations.
+    struct CellExt
+    {
+        Key key;
+
+        value_type & getValueMutable() { return key; }
+        const value_type & getValue() const { return key; }
+        void update(Key && key_, FixedHashTableCell *) { key = key_; }
+    };
+};
+
+
+/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
+  *  than a HashTable in that keys are not stored in the Cell buf, but inferred
+  *  inside each iterator. There are a bunch of to make it faster than using
+  *  HashTable: a) It doesn't have a conflict chain; b) There is no key
+  *  comparision; c) The number of cycles for checking cell empty is halved; d)
+  *  Memory layout is tighter, especially the Clearable variants.
+  *
+  * NOTE: For Set variants this should always be better. For Map variants
+  *  however, as we need to assemble the real cell inside each iterator, there
+  *  might be some cases we fall short.
+  *
+  * TODO: Deprecate the cell API so that end users don't rely on the structure
+  *  of cell. Instead iterator should be used for operations such as cell
+  *  transfer, key updates (f.g. StringRef) and serde. This will allow
+  *  TwoLevelHashSet(Map) to contain different type of sets(maps).
+  */
+template <typename Key, typename Cell, typename Allocator>
+class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
+{
+    static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8);
+
+protected:
+    friend class const_iterator;
+    friend class iterator;
+    friend class Reader;
+
+    using Self = FixedHashTable;
+    using cell_type = Cell;
+
+    size_t m_size = 0; /// Amount of elements
+    Cell * buf; /// A piece of memory for all elements except the element with zero key.
+
+    void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); }
+
+    void free()
+    {
+        if (buf)
+        {
+            Allocator::free(buf, getBufferSizeInBytes());
+            buf = nullptr;
+        }
+    }
+
+    void destroyElements()
+    {
+        if (!std::is_trivially_destructible_v<Cell>)
+            for (iterator it = begin(), it_end = end(); it != it_end; ++it)
+                it.ptr->~Cell();
+    }
+
+
+    template <typename Derived, bool is_const>
+    class iterator_base
+    {
+        using Container = std::conditional_t<is_const, const Self, Self>;
+        using cell_type = std::conditional_t<is_const, const Cell, Cell>;
+
+        Container * container;
+        cell_type * ptr;
+
+        friend class FixedHashTable;
+
+    public:
+        iterator_base() {}
+        iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_)
+        {
+            cell.update(ptr - container->buf, ptr);
+        }
+
+        bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
+        bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
+
+        Derived & operator++()
+        {
+            ++ptr;
+
+            /// Skip empty cells in the main buffer.
+            auto buf_end = container->buf + container->BUFFER_SIZE;
+            while (ptr < buf_end && ptr->isZero(*container))
+                ++ptr;
+
+            return static_cast<Derived &>(*this);
+        }
+
+        auto & operator*()
+        {
+            if (cell.key != ptr - container->buf)
+                cell.update(ptr - container->buf, ptr);
+            return cell;
+        }
+        auto * operator-> ()
+        {
+            if (cell.key != ptr - container->buf)
+                cell.update(ptr - container->buf, ptr);
+            return &cell;
+        }
+
+        auto getPtr() const { return ptr; }
+        size_t getHash() const { return ptr - container->buf; }
+        size_t getCollisionChainLength() const { return 0; }
+        typename cell_type::CellExt cell;
+    };
+
+
+public:
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+
+    size_t hash(const Key & x) const { return x; }
+
+    FixedHashTable() { alloc(); }
+
+    FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
+
+    ~FixedHashTable()
+    {
+        destroyElements();
+        free();
+    }
+
+    FixedHashTable & operator=(FixedHashTable && rhs)
+    {
+        destroyElements();
+        free();
+
+        std::swap(buf, rhs.buf);
+        std::swap(m_size, rhs.m_size);
+
+        Allocator::operator=(std::move(rhs));
+        Cell::State::operator=(std::move(rhs));
+
+        return *this;
+    }
+
+    class Reader final : private Cell::State
+    {
+    public:
+        Reader(DB::ReadBuffer & in_) : in(in_) {}
+
+        Reader(const Reader &) = delete;
+        Reader & operator=(const Reader &) = delete;
+
+        bool next()
+        {
+            if (!is_initialized)
+            {
+                Cell::State::read(in);
+                DB::readVarUInt(size, in);
+                is_initialized = true;
+            }
+
+            if (read_count == size)
+            {
+                is_eof = true;
+                return false;
+            }
+
+            cell.read(in);
+            ++read_count;
+
+            return true;
+        }
+
+        inline const value_type & get() const
+        {
+            if (!is_initialized || is_eof)
+                throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
+
+            return cell.getValue();
+        }
+
+    private:
+        DB::ReadBuffer & in;
+        Cell cell;
+        size_t read_count = 0;
+        size_t size;
+        bool is_eof = false;
+        bool is_initialized = false;
+    };
+
+
+    class iterator : public iterator_base<iterator, false>
+    {
+    public:
+        using iterator_base<iterator, false>::iterator_base;
+    };
+
+    class const_iterator : public iterator_base<const_iterator, true>
+    {
+    public:
+        using iterator_base<const_iterator, true>::iterator_base;
+    };
+
+
+    const_iterator begin() const
+    {
+        if (!buf)
+            return end();
+
+        const Cell * ptr = buf;
+        auto buf_end = buf + BUFFER_SIZE;
+        while (ptr < buf_end && ptr->isZero(*this))
+            ++ptr;
+
+        return const_iterator(this, ptr);
+    }
+
+    const_iterator cbegin() const { return begin(); }
+
+    iterator begin()
+    {
+        if (!buf)
+            return end();
+
+        Cell * ptr = buf;
+        auto buf_end = buf + BUFFER_SIZE;
+        while (ptr < buf_end && ptr->isZero(*this))
+            ++ptr;
+
+        return iterator(this, ptr);
+    }
+
+    const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); }
+    const_iterator cend() const { return end(); }
+    iterator end() { return iterator(this, buf + BUFFER_SIZE); }
+
+
+protected:
+    void ALWAYS_INLINE emplaceImpl(Key x, iterator & it, bool & inserted)
+    {
+        it = iterator(this, &buf[x]);
+
+        if (!buf[x].isZero(*this))
+        {
+            inserted = false;
+            return;
+        }
+
+        new (&buf[x]) Cell(x, *this);
+        inserted = true;
+        ++m_size;
+    }
+
+
+public:
+    std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
+    {
+        std::pair<iterator, bool> res;
+        emplaceImpl(Cell::getKey(x), res.first, res.second);
+        if (res.second)
+            res.first.ptr->setMapped(x);
+
+        return res;
+    }
+
+
+    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) { emplaceImpl(x, it, inserted); }
+    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t) { emplaceImpl(x, it, inserted); }
+
+    template <typename ObjectToCompareWith>
+    iterator ALWAYS_INLINE find(ObjectToCompareWith x)
+    {
+        return !buf[x].isZero(*this) ? iterator(this, &buf[x]) : end();
+    }
+
+    template <typename ObjectToCompareWith>
+    const_iterator ALWAYS_INLINE find(ObjectToCompareWith x) const
+    {
+        return !buf[x].isZero(*this) ? const_iterator(this, &buf[x]) : end();
+    }
+
+    template <typename ObjectToCompareWith>
+    iterator ALWAYS_INLINE find(ObjectToCompareWith, size_t hash_value)
+    {
+        return !buf[hash_value].isZero(*this) ? iterator(this, &buf[hash_value]) : end();
+    }
+
+    template <typename ObjectToCompareWith>
+    const_iterator ALWAYS_INLINE find(ObjectToCompareWith, size_t hash_value) const
+    {
+        return !buf[hash_value].isZero(*this) ? const_iterator(this, &buf[hash_value]) : end();
+    }
+
+    bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); }
+    bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
+
+    void write(DB::WriteBuffer & wb) const
+    {
+        Cell::State::write(wb);
+        DB::writeVarUInt(m_size, wb);
+
+        for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
+            if (!ptr->isZero(*this))
+            {
+                DB::writeVarUInt(ptr - buf);
+                ptr->write(wb);
+            }
+    }
+
+    void writeText(DB::WriteBuffer & wb) const
+    {
+        Cell::State::writeText(wb);
+        DB::writeText(m_size, wb);
+
+        for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
+        {
+            if (!ptr->isZero(*this))
+            {
+                DB::writeChar(',', wb);
+                DB::writeText(ptr - buf, wb);
+                DB::writeChar(',', wb);
+                ptr->writeText(wb);
+            }
+        }
+    }
+
+    void read(DB::ReadBuffer & rb)
+    {
+        Cell::State::read(rb);
+        destroyElements();
+        DB::readVarUInt(m_size, rb);
+        free();
+        alloc();
+
+        for (size_t i = 0; i < m_size; ++i)
+        {
+            size_t place_value = 0;
+            DB::readVarUInt(place_value, rb);
+            Cell x;
+            x.read(rb);
+            new (&buf[place_value]) Cell(x, *this);
+        }
+    }
+
+    void readText(DB::ReadBuffer & rb)
+    {
+        Cell::State::readText(rb);
+        destroyElements();
+        DB::readText(m_size, rb);
+        free();
+        alloc();
+
+        for (size_t i = 0; i < m_size; ++i)
+        {
+            size_t place_value = 0;
+            DB::assertChar(',', rb);
+            DB::readText(place_value, rb);
+            Cell x;
+            DB::assertChar(',', rb);
+            x.readText(rb);
+            new (&buf[place_value]) Cell(x, *this);
+        }
+    }
+
+    size_t size() const { return m_size; }
+
+    bool empty() const { return 0 == m_size; }
+
+    void clear()
+    {
+        destroyElements();
+        m_size = 0;
+
+        memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf));
+    }
+
+    /// After executing this function, the table can only be destroyed,
+    ///  and also you can use the methods `size`, `empty`, `begin`, `end`.
+    void clearAndShrink()
+    {
+        destroyElements();
+        m_size = 0;
+        free();
+    }
+
+    size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); }
+
+    size_t getBufferSizeInCells() const { return BUFFER_SIZE; }
+
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+    size_t getCollisions() const { return 0; }
+#endif
+};
--- a/dbms/src/Common/HashTable/HashMap.h
+++ b/dbms/src/Common/HashTable/HashMap.h
@ -15,11 +15,14 @@ struct NoInitTag {};

 /// A pair that does not initialize the elements, if not needed.
 template <typename First, typename Second>
-struct PairNoInit
+class PairNoInit
 {
    First first;
    Second second;
+    template <typename, typename, typename, typename>
+    friend class HashMapCell;

+public:
    PairNoInit() {}

    template <typename First_>
@ -29,6 +32,11 @@ struct PairNoInit
    template <typename First_, typename Second_>
    PairNoInit(First_ && first_, Second_ && second_)
        : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) {}
+
+    First & getFirstMutable() { return first; }
+    const First & getFirst() const { return first; }
+    Second & getSecond() { return second; }
+    const Second & getSecond() const { return second; }
 };


@ -45,10 +53,14 @@ struct HashMapCell
    HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
    HashMapCell(const value_type & value_, const State &) : value(value_) {}

-    value_type & getValue() { return value; }
+    Key & getFirstMutable() { return value.first; }
+    const Key & getFirst() const { return value.first; }
+    Mapped & getSecond() { return value.second; }
+    const Mapped & getSecond() const { return value.second; }
+
+    value_type & getValueMutable() { return value; }
    const value_type & getValue() const { return value; }

-    static Key & getKey(value_type & value) { return value.first; }
    static const Key & getKey(const value_type & value) { return value.first; }

    bool keyEquals(const Key & key_) const { return value.first == key_; }
@ -111,8 +123,8 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>

    using Base::Base;

-    bool keyEquals(const Key & key_) const { return this->value.first == key_; }
-    bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
+    bool keyEquals(const Key & key_) const { return this->value.getFirst() == key_; }
+    bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.getFirst() == key_; }
    bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }

    void setHash(size_t hash_value) { saved_hash = hash_value; }
@ -158,9 +170,9 @@ public:
          *  the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
          */
        if (inserted)
-            new(&it->second) mapped_type();
+            new(&it->getSecond()) mapped_type();

-        return it->second;
+        return it->getSecond();
    }
 };

--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@ -98,11 +98,10 @@ struct HashTableCell
 /// HashTableCell(const value_type & value_, const State & state) : key(value_) {}

    /// Get what the value_type of the container will be.
-    value_type & getValue()             { return key; }
+    value_type & getValueMutable() { return key; }
    const value_type & getValue() const { return key; }

    /// Get the key.
-    static Key & getKey(value_type & value)             { return value; }
    static const Key & getKey(const value_type & value) { return value; }

    /// Are the keys at the cells equal?
@ -459,8 +458,8 @@ protected:
            return static_cast<Derived &>(*this);
        }

-        auto & operator* () const { return ptr->getValue(); }
-        auto * operator->() const { return &ptr->getValue(); }
+        auto & operator* () const { return *ptr; }
+        auto * operator->() const { return ptr; }

        auto getPtr() const { return ptr; }
        size_t getHash() const { return ptr->getHash(*container); }
--- a/dbms/src/Common/HashTable/SmallTable.h
+++ b/dbms/src/Common/HashTable/SmallTable.h
@ -148,8 +148,8 @@ public:
            return *this;
        }

-        value_type & operator* () const { return ptr->getValue(); }
-        value_type * operator->() const { return &ptr->getValue(); }
+        Cell & operator* () const { return *ptr; }
+        Cell * operator->() const { return ptr; }

        Cell * getPtr() const { return ptr; }
    };
@ -176,8 +176,8 @@ public:
            return *this;
        }

-        const value_type & operator* () const { return ptr->getValue(); }
-        const value_type * operator->() const { return &ptr->getValue(); }
+        const Cell & operator* () const { return *ptr; }
+        const Cell * operator->() const { return ptr; }

        const Cell * getPtr() const { return ptr; }
    };
@ -399,8 +399,8 @@ public:
        typename SmallMapTable::iterator it;
        bool inserted;
        this->emplace(x, it, inserted);
-        new(&it->second) mapped_type();
-        return it->second;
+        new(&it->getSecond()) mapped_type();
+        return it->getSecond();
    }
 };

--- a/dbms/src/Common/HashTable/TwoLevelHashMap.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashMap.h
@ -29,9 +29,9 @@ public:
        this->emplace(x, it, inserted);

        if (inserted)
-            new(&it->second) mapped_type();
+            new(&it->getSecond()) mapped_type();

-        return it->second;
+        return it->getSecond();
    }
 };

--- a/dbms/src/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h
@ -98,7 +98,7 @@ public:
        /// It is assumed that the zero key (stored separately) is first in iteration order.
        if (it != src.end() && it.getPtr()->isZero(src))
        {
-            insert(*it);
+            insert(it->getValue());
            ++it;
        }

@ -141,8 +141,8 @@ public:
            return *this;
        }

-        value_type & operator* () const { return *current_it; }
-        value_type * operator->() const { return &*current_it; }
+        Cell & operator* () const { return *current_it; }
+        Cell * operator->() const { return current_it.getPtr(); }

        Cell * getPtr() const { return current_it.getPtr(); }
        size_t getHash() const { return current_it.getHash(); }
@ -179,8 +179,8 @@ public:
            return *this;
        }

-        const value_type & operator* () const { return *current_it; }
-        const value_type * operator->() const { return &*current_it; }
+        const Cell & operator* () const { return *current_it; }
+        const Cell * operator->() const { return current_it->getPtr(); }

        const Cell * getPtr() const { return current_it.getPtr(); }
        size_t getHash() const { return current_it.getHash(); }
--- a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
+++ b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
@ -45,7 +45,7 @@ private:
        Large * tmp_large = new Large;

        for (const auto & x : small)
-            tmp_large->insert(x);
+            tmp_large->insert(x.getValue());

        large = tmp_large;
    }
@ -99,7 +99,7 @@ public:
        else
        {
            for (const auto & x : rhs.small)
-                insert(x);
+                insert(x.getValue());
        }
    }

--- a/dbms/src/Common/RWLock.cpp
+++ b/dbms/src/Common/RWLock.cpp
@ -33,27 +33,28 @@ namespace ErrorCodes
 }


-class RWLockImpl::LockHandlerImpl
+class RWLockImpl::LockHolderImpl
 {
    RWLock parent;
    GroupsContainer::iterator it_group;
    ClientsContainer::iterator it_client;
-    ThreadToHandler::iterator it_handler;
+    ThreadToHolder::iterator it_thread;
+    QueryIdToHolder::iterator it_query;
    CurrentMetrics::Increment active_client_increment;

-    LockHandlerImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);
+    LockHolderImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);

 public:

-    LockHandlerImpl(const LockHandlerImpl & other) = delete;
+    LockHolderImpl(const LockHolderImpl & other) = delete;

-    ~LockHandlerImpl();
+    ~LockHolderImpl();

    friend class RWLockImpl;
 };


-RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
+RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id)
 {
    Stopwatch watch(CLOCK_MONOTONIC_COARSE);
    CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders
@ -66,28 +67,33 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
                                                : ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds());
    };

-    auto this_thread_id = std::this_thread::get_id();
    GroupsContainer::iterator it_group;
    ClientsContainer::iterator it_client;

    std::unique_lock lock(mutex);

-    /// Check if the same thread is acquiring previously acquired lock
-    auto it_handler = thread_to_handler.find(this_thread_id);
-    if (it_handler != thread_to_handler.end())
+    /// Check if the same query is acquiring previously acquired lock
+    LockHolder existing_holder_ptr;
+
+    auto this_thread_id = std::this_thread::get_id();
+    auto it_thread = thread_to_holder.find(this_thread_id);
+
+    auto it_query = query_id_to_holder.end();
+    if (query_id != RWLockImpl::NO_QUERY)
+        it_query = query_id_to_holder.find(query_id);
+
+    if (it_thread != thread_to_holder.end())
+        existing_holder_ptr = it_thread->second.lock();
+    else if (it_query != query_id_to_holder.end())
+        existing_holder_ptr = it_query->second.lock();
+
+    if (existing_holder_ptr)
    {
-        auto handler_ptr = it_handler->second.lock();
+        /// XXX: it means we can't upgrade lock from read to write - with proper waiting!
+        if (type != Read || existing_holder_ptr->it_group->type != Read)
+            throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);

-        /// Lock may be released in another thread, but not yet deleted inside |~LogHandlerImpl()|
-
-        if (handler_ptr)
-        {
-            /// XXX: it means we can't upgrade lock from read to write - with proper waiting!
-            if (type != Read || handler_ptr->it_group->type != Read)
-                throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);
-
-            return handler_ptr;
-        }
+        return existing_holder_ptr;
    }

    if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
@ -115,11 +121,15 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
        throw;
    }

-    LockHandler res(new LockHandlerImpl(shared_from_this(), it_group, it_client));
+    LockHolder res(new LockHolderImpl(shared_from_this(), it_group, it_client));

-    /// Insert myself (weak_ptr to the handler) to threads set to implement recursive lock
-    it_handler = thread_to_handler.emplace(this_thread_id, res).first;
-    res->it_handler = it_handler;
+    /// Insert myself (weak_ptr to the holder) to threads set to implement recursive lock
+    it_thread = thread_to_holder.emplace(this_thread_id, res).first;
+    res->it_thread = it_thread;
+
+    if (query_id != RWLockImpl::NO_QUERY)
+        it_query = query_id_to_holder.emplace(query_id, res).first;
+    res->it_query = it_query;

    /// We are first, we should not wait anything
    /// If we are not the first client in the group, a notification could be already sent
@ -137,12 +147,15 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
 }


-RWLockImpl::LockHandlerImpl::~LockHandlerImpl()
+RWLockImpl::LockHolderImpl::~LockHolderImpl()
 {
    std::unique_lock lock(parent->mutex);

-    /// Remove weak_ptr to the handler, since there are no owners of the current lock
-    parent->thread_to_handler.erase(it_handler);
+    /// Remove weak_ptrs to the holder, since there are no owners of the current lock
+    parent->thread_to_holder.erase(it_thread);
+
+    if (it_query != parent->query_id_to_holder.end())
+        parent->query_id_to_holder.erase(it_query);

    /// Removes myself from client list of our group
    it_group->clients.erase(it_client);
@ -161,7 +174,7 @@ RWLockImpl::LockHandlerImpl::~LockHandlerImpl()
 }


-RWLockImpl::LockHandlerImpl::LockHandlerImpl(RWLock && parent, RWLockImpl::GroupsContainer::iterator it_group,
+RWLockImpl::LockHolderImpl::LockHolderImpl(RWLock && parent, RWLockImpl::GroupsContainer::iterator it_group,
                                             RWLockImpl::ClientsContainer::iterator it_client)
    : parent{std::move(parent)}, it_group{it_group}, it_client{it_client},
      active_client_increment{(*it_client == RWLockImpl::Read) ? CurrentMetrics::RWLockActiveReaders
--- a/dbms/src/Common/RWLock.h
+++ b/dbms/src/Common/RWLock.h
@ -1,4 +1,5 @@
 #pragma once
+#include <Core/Types.h>
 #include <boost/core/noncopyable.hpp>
 #include <list>
 #include <vector>
@ -17,7 +18,13 @@ using RWLock = std::shared_ptr<RWLockImpl>;


 /// Implements shared lock with FIFO service
-/// Can be acquired recursively (several calls from the same thread) in Read mode
+/// Can be acquired recursively (several calls for the same query or the same OS thread) in Read mode
+///
+/// NOTE: it is important to allow acquiring the same lock in Read mode without waiting if it is already
+/// acquired by another thread of the same query. Otherwise the following deadlock is possible:
+/// - SELECT thread 1 locks in the Read mode
+/// - ALTER tries to lock in the Write mode (waits for SELECT thread 1)
+/// - SELECT thread 2 tries to lock in the Read mode (waits for ALTER)
 class RWLockImpl : public std::enable_shared_from_this<RWLockImpl>
 {
 public:
@ -29,14 +36,17 @@ public:

    static RWLock create() { return RWLock(new RWLockImpl); }

-    /// Just use LockHandler::reset() to release the lock
-    class LockHandlerImpl;
-    friend class LockHandlerImpl;
-    using LockHandler = std::shared_ptr<LockHandlerImpl>;
-
+    /// Just use LockHolder::reset() to release the lock
+    class LockHolderImpl;
+    friend class LockHolderImpl;
+    using LockHolder = std::shared_ptr<LockHolderImpl>;

    /// Waits in the queue and returns appropriate lock
-    LockHandler getLock(Type type);
+    /// Empty query_id means the lock is acquired out of the query context (e.g. in a background thread).
+    LockHolder getLock(Type type, const String & query_id);
+
+    /// Use as query_id to acquire a lock outside the query context.
+    inline static const String NO_QUERY = String();

 private:
    RWLockImpl() = default;
@ -44,7 +54,8 @@ private:
    struct Group;
    using GroupsContainer = std::list<Group>;
    using ClientsContainer = std::list<Type>;
-    using ThreadToHandler = std::map<std::thread::id, std::weak_ptr<LockHandlerImpl>>;
+    using ThreadToHolder = std::map<std::thread::id, std::weak_ptr<LockHolderImpl>>;
+    using QueryIdToHolder = std::map<String, std::weak_ptr<LockHolderImpl>>;

    /// Group of clients that should be executed concurrently
    /// i.e. a group could contain several readers, but only one writer
@ -61,7 +72,8 @@ private:

    mutable std::mutex mutex;
    GroupsContainer queue;
-    ThreadToHandler thread_to_handler;
+    ThreadToHolder thread_to_holder;
+    QueryIdToHolder query_id_to_holder;
 };


--- a/dbms/src/Common/SpaceSaving.h
+++ b/dbms/src/Common/SpaceSaving.h
@ -152,7 +152,7 @@ public:
        auto it = counter_map.find(key, hash);
        if (it != counter_map.end())
        {
-            auto c = it->second;
+            auto c = it->getSecond();
            c->count += increment;
            c->error += error;
            percolate(c);
@ -189,8 +189,8 @@ public:
            min->error = alpha + error;
            percolate(min);

-            it->second = min;
-            it->first = min->key;
+            it->getSecond() = min;
+            it->getFirstMutable() = min->key;
            counter_map.reinsert(it, hash);
        }
    }
--- a/dbms/src/Common/ThreadProfileEvents.h
+++ b/dbms/src/Common/ThreadProfileEvents.h
@ -137,8 +137,8 @@ struct TasksStatsCounters
        profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds,
                                 safeDiff(prev.stat.cpu_run_virtual_total, curr.stat.cpu_run_virtual_total) / 1000U);

-        /// Too old struct version, do not read new fields
-        if (curr.stat.version < TASKSTATS_VERSION)
+        /// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available.
+        if (curr.stat.version < 3)
            return;

        profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.stat.read_char, curr.stat.read_char));
--- a/dbms/src/Common/tests/auto_array.cpp
+++ b/dbms/src/Common/tests/auto_array.cpp
@ -155,10 +155,10 @@ int main(int argc, char ** argv)
            map.emplace(rand(), it, inserted);
            if (inserted)
            {
-                new(&it->second) Arr(n);
+                new(&it->getSecond()) Arr(n);

                for (size_t j = 0; j < n; ++j)
-                    it->second[j] = field;
+                    it->getSecond()[j] = field;
            }
        }

--- a/dbms/src/Common/tests/gtest_rw_lock.cpp
+++ b/dbms/src/Common/tests/gtest_rw_lock.cpp
@ -39,7 +39,7 @@ TEST(Common, RWLock_1)
            auto type = (std::uniform_int_distribution<>(0, 9)(gen) >= round) ? RWLockImpl::Read : RWLockImpl::Write;
            auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));

-            auto lock = fifo_lock->getLock(type);
+            auto lock = fifo_lock->getLock(type, RWLockImpl::NO_QUERY);

            if (type == RWLockImpl::Write)
            {
@ -99,7 +99,7 @@ TEST(Common, RWLock_Recursive)
    {
        for (int i = 0; i < 2 * cycles; ++i)
        {
-            auto lock = fifo_lock->getLock(RWLockImpl::Write);
+            auto lock = fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);

            auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
            std::this_thread::sleep_for(sleep_for);
@ -110,17 +110,17 @@ TEST(Common, RWLock_Recursive)
    {
        for (int i = 0; i < cycles; ++i)
        {
-            auto lock1 = fifo_lock->getLock(RWLockImpl::Read);
+            auto lock1 = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);

            auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
            std::this_thread::sleep_for(sleep_for);

-            auto lock2 = fifo_lock->getLock(RWLockImpl::Read);
+            auto lock2 = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);

-            EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write);});
+            EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);});
        }

-        fifo_lock->getLock(RWLockImpl::Write);
+        fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);
    });

    t1.join();
@ -143,7 +143,7 @@ TEST(Common, RWLock_PerfTest_Readers)
            {
                for (auto i = 0; i < cycles; ++i)
                {
-                    auto lock = fifo_lock->getLock(RWLockImpl::Read);
+                    auto lock = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
                }
            };

--- a/dbms/src/Common/tests/hash_table.cpp
+++ b/dbms/src/Common/tests/hash_table.cpp
@ -21,13 +21,13 @@ int main(int, char **)
        bool inserted;

        cont.emplace(3, it, inserted);
-        std::cerr << inserted << ", " << *it << std::endl;
+        std::cerr << inserted << ", " << it->getValue() << std::endl;

        cont.emplace(3, it, inserted);
-        std::cerr << inserted << ", " << *it << std::endl;
+        std::cerr << inserted << ", " << it->getValue() << std::endl;

        for (auto x : cont)
-            std::cerr << x << std::endl;
+            std::cerr << x.getValue() << std::endl;

        DB::WriteBufferFromOwnString wb;
        cont.writeText(wb);
--- a/dbms/src/Common/tests/parallel_aggregation.cpp
+++ b/dbms/src/Common/tests/parallel_aggregation.cpp
@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
    {
        if (*it == *prev_it)
        {
-            ++found->second;
+            ++found->getSecond();
            continue;
        }
        prev_it = it;

        bool inserted;
        map.emplace(*it, found, inserted);
-        ++found->second;
+        ++found->getSecond();
    }
 }

@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
    {
        if (*it == *prev_it)
        {
-            ++found->second;
+            ++found->getSecond();
            continue;
        }
        prev_it = it;

        bool inserted;
        map.emplace(*it, found, inserted);
-        ++found->second;
+        ++found->getSecond();
    }
 }

@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
 {
    for (size_t i = 1; i < num_threads; ++i)
        for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
-            maps[0].impls[bucket][it->first] += it->second;
+            maps[0].impls[bucket][it->getFirst()] += it->getSecond();
 }

 void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
        Map::iterator found = local_map.find(*it);

        if (found != local_map.end())
-            ++found->second;
+            ++found->getSecond();
        else if (local_map.size() < threshold)
            ++local_map[*it];    /// TODO You could do one lookup, not two.
        else
@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
        Map::iterator found;
        bool inserted;
        local_map.emplace(*it, found, inserted);
-        ++found->second;
+        ++found->getSecond();

        if (inserted && local_map.size() == threshold)
        {
            std::lock_guard<Mutex> lock(mutex);
            for (auto & value_type : local_map)
-                global_map[value_type.first] += value_type.second;
+                global_map[value_type.getFirst()] += value_type.getSecond();

            local_map.clear();
        }
@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
                Map::iterator found = local_map.find(*it);

                if (found != local_map.end())
-                    ++found->second;
+                    ++found->getSecond();
                else
                {
                    size_t hash_value = global_map.hash(*it);
@ -311,7 +311,7 @@ int main(int argc, char ** argv)

        for (size_t i = 1; i < num_threads; ++i)
            for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
-                maps[0][it->first] += it->second;
+                maps[0][it->getFirst()] += it->getSecond();

        watch.stop();
        double time_merged = watch.elapsedSeconds();
@ -365,7 +365,7 @@ int main(int argc, char ** argv)

        for (size_t i = 1; i < num_threads; ++i)
            for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
-                maps[0][it->first] += it->second;
+                maps[0][it->getFirst()] += it->getSecond();

        watch.stop();

@ -435,7 +435,7 @@ int main(int argc, char ** argv)
                    continue;

                finish = false;
-                maps[0][iterators[i]->first] += iterators[i]->second;
+                maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond();
                ++iterators[i];
            }

@ -623,7 +623,7 @@ int main(int argc, char ** argv)

        for (size_t i = 0; i < num_threads; ++i)
            for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
-                global_map[it->first] += it->second;
+                global_map[it->getFirst()] += it->getSecond();

        pool.wait();

@ -689,7 +689,7 @@ int main(int argc, char ** argv)

        for (size_t i = 0; i < num_threads; ++i)
            for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
-                global_map[it->first] += it->second;
+                global_map[it->getFirst()] += it->getSecond();

        pool.wait();

@ -760,7 +760,7 @@ int main(int argc, char ** argv)

        for (size_t i = 0; i < num_threads; ++i)
            for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
-                global_map[it->first] += it->second;
+                global_map[it->getFirst()] += it->getSecond();

        pool.wait();

--- a/dbms/src/Common/tests/parallel_aggregation2.cpp
+++ b/dbms/src/Common/tests/parallel_aggregation2.cpp
@ -51,9 +51,9 @@ struct AggregateIndependent
                    map.emplace(*it, place, inserted);

                    if (inserted)
-                        creator(place->second);
+                        creator(place->getSecond());
                    else
-                        updater(place->second);
+                        updater(place->getSecond());
                }
            });
        }
@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
                {
                    if (it != begin && *it == prev_key)
                    {
-                        updater(place->second);
+                        updater(place->getSecond());
                        continue;
                    }
                    prev_key = *it;
@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
                    map.emplace(*it, place, inserted);

                    if (inserted)
-                        creator(place->second);
+                        creator(place->getSecond());
                    else
-                        updater(place->second);
+                        updater(place->getSecond());
                }
            });
        }
@ -131,7 +131,7 @@ struct MergeSequential
            auto begin = source_maps[i]->begin();
            auto end = source_maps[i]->end();
            for (auto it = begin; it != end; ++it)
-                merger((*source_maps[0])[it->first], it->second);
+                merger((*source_maps[0])[it->getFirst()], it->getSecond());
        }

        result_map = source_maps[0];
@ -161,7 +161,7 @@ struct MergeSequentialTransposed    /// In practice not better than usual.
                    continue;

                finish = false;
-                merger((*result_map)[iterators[i]->first], iterators[i]->second);
+                merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond());
                ++iterators[i];
            }

--- a/dbms/src/Common/tests/small_table.cpp
+++ b/dbms/src/Common/tests/small_table.cpp
@ -20,13 +20,13 @@ int main(int, char **)
        bool inserted;

        cont.emplace(3, it, inserted);
-        std::cerr << inserted << ", " << *it << std::endl;
+        std::cerr << inserted << ", " << it->getValue() << std::endl;

        cont.emplace(3, it, inserted);
-        std::cerr << inserted << ", " << *it << std::endl;
+        std::cerr << inserted << ", " << it->getValue() << std::endl;

        for (auto x : cont)
-            std::cerr << x << std::endl;
+            std::cerr << x.getValue() << std::endl;

        DB::WriteBufferFromOwnString wb;
        cont.writeText(wb);
@ -42,7 +42,7 @@ int main(int, char **)
        cont[1] = "Goodbye.";

        for (auto x : cont)
-            std::cerr << x.first << " -> " << x.second << std::endl;
+            std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl;

        DB::WriteBufferFromOwnString wb;
        cont.writeText(wb);
--- a/dbms/src/Core/ExternalTable.cpp
+++ b/dbms/src/Core/ExternalTable.cpp
@ -163,7 +163,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
    StoragePtr storage = StorageMemory::create(data.second, ColumnsDescription{columns});
    storage->startup();
    context.addExternalTable(data.second, storage);
-    BlockOutputStreamPtr output = storage->write(ASTPtr(), settings);
+    BlockOutputStreamPtr output = storage->write(ASTPtr(), context);

    /// Write data
    data.first->readPrefix();
--- a/dbms/src/Core/Field.cpp
+++ b/dbms/src/Core/Field.cpp
@ -10,7 +10,7 @@

 namespace DB
 {
-    inline void readBinary(Array & x, ReadBuffer & buf)
+    void readBinary(Array & x, ReadBuffer & buf)
    {
        size_t size;
        UInt8 type;
@ -151,12 +151,8 @@ namespace DB
        DB::String res = applyVisitor(DB::FieldVisitorToString(), DB::Field(x));
        buf.write(res.data(), res.size());
    }
-}

-
-namespace DB
-{
-    inline void readBinary(Tuple & x_def, ReadBuffer & buf)
+    void readBinary(Tuple & x_def, ReadBuffer & buf)
    {
        auto & x = x_def.toUnderType();
        size_t size;
--- a/dbms/src/Core/tests/string_pool.cpp
+++ b/dbms/src/Core/tests/string_pool.cpp
@ -222,7 +222,7 @@ int main(int argc, char ** argv)
        size_t i = 0;
        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
        {
-            devnull.write(it->first.data, it->first.size);
+            devnull.write(it->getFirst().data, it->getFirst().size);
            devnull << std::endl;
        }

@ -249,7 +249,7 @@ int main(int argc, char ** argv)
        size_t i = 0;
        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
        {
-            devnull.write(it->first.data, it->first.size);
+            devnull.write(it->getFirst().data, it->getFirst().size);
            devnull << std::endl;
        }
    }
--- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp
+++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp
@ -19,10 +19,14 @@ namespace ErrorCodes
 CreatingSetsBlockInputStream::CreatingSetsBlockInputStream(
    const BlockInputStreamPtr & input,
    const SubqueriesForSets & subqueries_for_sets_,
-    const SizeLimits & network_transfer_limits)
-    : subqueries_for_sets(subqueries_for_sets_),
-    network_transfer_limits(network_transfer_limits)
+    const Context & context_)
+    : subqueries_for_sets(subqueries_for_sets_)
+    , context(context_)
 {
+    const Settings & settings = context.getSettingsRef();
+    network_transfer_limits = SizeLimits(
+        settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode);
+
    for (auto & elem : subqueries_for_sets)
    {
        if (elem.second.source)
@ -92,7 +96,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)

    BlockOutputStreamPtr table_out;
    if (subquery.table)
-        table_out = subquery.table->write({}, {});
+        table_out = subquery.table->write({}, context);

    bool done_with_set = !subquery.set;
    bool done_with_join = !subquery.join;
--- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h
+++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h
@ -20,7 +20,7 @@ public:
    CreatingSetsBlockInputStream(
        const BlockInputStreamPtr & input,
        const SubqueriesForSets & subqueries_for_sets_,
-        const SizeLimits & network_transfer_limits);
+        const Context & context_);

    String getName() const override { return "CreatingSets"; }

@ -35,6 +35,7 @@ protected:

 private:
    SubqueriesForSets subqueries_for_sets;
+    const Context & context;
    bool created = false;

    SizeLimits network_transfer_limits;
--- a/dbms/src/DataStreams/IBlockInputStream.cpp
+++ b/dbms/src/DataStreams/IBlockInputStream.cpp
@ -247,6 +247,26 @@ void IBlockInputStream::checkQuota(Block & block)
    }
 }

+static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds)
+{
+    /// How much time to wait for the average speed to become `max_speed_in_seconds`.
+    UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds;
+
+    if (desired_microseconds > total_elapsed_microseconds)
+    {
+        UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds;
+        ::timespec sleep_ts;
+        sleep_ts.tv_sec = sleep_microseconds / 1000000;
+        sleep_ts.tv_nsec = sleep_microseconds % 1000000 * 1000;
+
+        /// NOTE: Returns early in case of a signal. This is considered normal.
+        /// NOTE: It's worth noting that this behavior affects kill of queries.
+        ::nanosleep(&sleep_ts, nullptr);
+
+        ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds);
+    }
+}
+

 void IBlockInputStream::progressImpl(const Progress & value)
 {
@ -313,8 +333,9 @@ void IBlockInputStream::progressImpl(const Progress & value)
            last_profile_events_update_time = total_elapsed_microseconds;
        }

-        if ((limits.min_execution_speed || (total_rows && limits.timeout_before_checking_execution_speed != 0))
-             && (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
+        if ((limits.min_execution_speed || limits.max_execution_speed || limits.min_execution_speed_bytes ||
+             limits.max_execution_speed_bytes || (total_rows && limits.timeout_before_checking_execution_speed != 0)) &&
+            (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
        {
            /// Do not count sleeps in throttlers
            UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
@ -328,6 +349,11 @@ void IBlockInputStream::progressImpl(const Progress & value)
                        + " rows/sec., minimum: " + toString(limits.min_execution_speed),
                        ErrorCodes::TOO_SLOW);

+                if (limits.min_execution_speed_bytes && progress.bytes / elapsed_seconds < limits.min_execution_speed_bytes)
+                    throw Exception("Query is executing too slow: " + toString(progress.bytes / elapsed_seconds)
+                        + " bytes/sec., minimum: " + toString(limits.min_execution_speed_bytes),
+                        ErrorCodes::TOO_SLOW);
+
                /// If the predicted execution time is longer than `max_execution_time`.
                if (limits.max_execution_time != 0 && total_rows)
                {
@ -339,6 +365,12 @@ void IBlockInputStream::progressImpl(const Progress & value)
                            + ". Estimated rows to process: " + toString(total_rows),
                            ErrorCodes::TOO_SLOW);
                }
+
+                if (limits.max_execution_speed && progress.rows / elapsed_seconds >= limits.max_execution_speed)
+                    limitProgressingSpeed(progress.rows, limits.max_execution_speed, total_elapsed_microseconds);
+
+                if (limits.max_execution_speed_bytes && progress.bytes / elapsed_seconds >= limits.max_execution_speed_bytes)
+                    limitProgressingSpeed(progress.bytes, limits.max_execution_speed_bytes, total_elapsed_microseconds);
            }
        }

--- a/dbms/src/DataStreams/IBlockInputStream.h
+++ b/dbms/src/DataStreams/IBlockInputStream.h
@ -212,6 +212,9 @@ public:

        /// in rows per second
        size_t min_execution_speed = 0;
+        size_t max_execution_speed = 0;
+        size_t min_execution_speed_bytes = 0;
+        size_t max_execution_speed_bytes = 0;
        /// Verify that the speed is not too low after the specified time has elapsed.
        Poco::Timespan timeout_before_checking_execution_speed = 0;
    };
--- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@ -20,7 +20,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
      * Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
      *  but it's clear that here is not the best place for this functionality.
      */
-    addTableLock(storage->lockStructure(true));
+    addTableLock(storage->lockStructure(true, context.getCurrentQueryId()));

    /// If the "root" table deduplactes blocks, there are no need to make deduplication for children
    /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
@ -45,7 +45,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
            auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table);

            if (StoragePtr inner_table = materialized_view.tryGetTargetTable())
-                addTableLock(inner_table->lockStructure(true));
+                addTableLock(inner_table->lockStructure(true, context.getCurrentQueryId()));

            auto query = materialized_view.getInnerQuery();
            BlockOutputStreamPtr out = std::make_shared<PushingToViewsBlockOutputStream>(
@ -57,7 +57,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
    /* Do not push to destination table if the flag is set */
    if (!no_destination)
    {
-        output = storage->write(query_ptr, context.getSettingsRef());
+        output = storage->write(query_ptr, context);
        replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get());
    }
 }
--- a/dbms/src/DataTypes/DataTypeEnum.cpp
+++ b/dbms/src/DataTypes/DataTypeEnum.cpp
@ -74,7 +74,7 @@ void DataTypeEnum<Type>::fillMaps()

        if (!name_to_value_pair.second)
            throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
-                    + " and '" + name_to_value_pair.first->first.toString() + "' = " + toString(name_to_value_pair.first->second),
+                    + " and '" + name_to_value_pair.first->getFirst().toString() + "' = " + toString(name_to_value_pair.first->getSecond()),
                ErrorCodes::SYNTAX_ERROR};

        const auto value_to_name_pair = value_to_name_map.insert(
--- a/dbms/src/DataTypes/DataTypeEnum.h
+++ b/dbms/src/DataTypes/DataTypeEnum.h
@ -81,7 +81,7 @@ public:
        if (it == std::end(name_to_value_map))
            throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR};

-        return it->second;
+        return it->getSecond();
    }

    Field castToName(const Field & value_or_name) const override;
--- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp
+++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp
@ -222,7 +222,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes

    std::vector<size_t> required_rows(outdated_keys.size());
    std::transform(
-        std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
+        std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });

    /// request new values
    update(
--- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h
+++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h
@ -342,7 +342,7 @@ private:

        std::vector<size_t> required_rows(outdated_keys.size());
        std::transform(
-            std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
+            std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });

        /// request new values
        update(
@ -468,7 +468,7 @@ private:
            std::vector<size_t> required_rows(outdated_keys.size());
            std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
            {
-                return pair.second.front();
+                return pair.getSecond().front();
            });

            update(
@ -500,7 +500,7 @@ private:
        {
            const StringRef key = keys_array[row];
            const auto it = map.find(key);
-            const auto string_ref = it != std::end(map) ? it->second : get_default(row);
+            const auto string_ref = it != std::end(map) ? it->getSecond() : get_default(row);
            out->insertData(string_ref.data, string_ref.size);
        }
    }
@ -607,7 +607,7 @@ private:
        /// Check which ids have not been found and require setting null_value
        for (const auto & key_found_pair : remaining_keys)
        {
-            if (key_found_pair.second)
+            if (key_found_pair.getSecond())
            {
                ++found_num;
                continue;
@ -615,7 +615,7 @@ private:

            ++not_found_num;

-            auto key = key_found_pair.first;
+            auto key = key_found_pair.getFirst();
            const auto hash = StringRefHash{}(key);
            const auto find_result = findCellIdx(key, now, hash);
            const auto & cell_idx = find_result.cell_idx;
--- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp
+++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp
@ -611,7 +611,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
        const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);

        const auto it = attr.find(key);
-        set_value(i, it != attr.end() ? static_cast<OutputType>(it->second) : get_default(i));
+        set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));

        /// free memory allocated for the key
        temporary_keys_pool.rollback(key.size);
@ -779,7 +779,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
    std::vector<StringRef> keys;
    keys.reserve(attr.size());
    for (const auto & key : attr)
-        keys.push_back(key.first);
+        keys.push_back(key.getFirst());

    return keys;
 }
--- a/dbms/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/dbms/src/Dictionaries/ExecutableDictionarySource.cpp
@ -2,12 +2,15 @@

 #include <future>
 #include <thread>
+#include <ext/scope_guard.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/OwningBlockInputStream.h>
 #include <Interpreters/Context.h>
+#include <IO/WriteHelpers.h>
 #include <Common/ShellCommand.h>
 #include <Common/ThreadPool.h>
 #include <common/logger_useful.h>
+#include <common/LocalDateTime.h>
 #include "DictionarySourceFactory.h"
 #include "DictionarySourceHelpers.h"
 #include "DictionaryStructure.h"
@ -46,7 +49,6 @@ ExecutableDictionarySource::ExecutableDictionarySource(
    Block & sample_block,
    const Context & context)
    : log(&Logger::get("ExecutableDictionarySource"))
-    , update_time{std::chrono::system_clock::from_time_t(0)}
    , dict_struct{dict_struct_}
    , command{config.getString(config_prefix + ".command")}
    , update_field{config.getString(config_prefix + ".update_field", "")}
@ -68,31 +70,6 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
 {
 }

-std::string ExecutableDictionarySource::getUpdateFieldAndDate()
-{
-    if (update_time != std::chrono::system_clock::from_time_t(0))
-    {
-        auto tmp_time = update_time;
-        update_time = std::chrono::system_clock::now();
-        time_t hr_time = std::chrono::system_clock::to_time_t(tmp_time) - 1;
-        char buffer[80];
-        struct tm * timeinfo;
-        timeinfo = localtime(&hr_time);
-        strftime(buffer, 80, "\"%Y-%m-%d %H:%M:%S\"", timeinfo);
-        std::string str_time(buffer);
-        return command + " " + update_field + " " + str_time;
-        ///Example case: command -T "2018-02-12 12:44:04"
-        ///should return all entries after mentioned date
-        ///if executable is eligible to return entries according to date.
-        ///Where "-T" is passed as update_field.
-    }
-    else
-    {
-        std::string str_time("\"0000-00-00 00:00:00\""); ///for initial load
-        return command + " " + update_field + " " + str_time;
-    }
-}
-
 BlockInputStreamPtr ExecutableDictionarySource::loadAll()
 {
    LOG_TRACE(log, "loadAll " + toString());
@ -103,9 +80,15 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()

 BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
 {
-    std::string command_update = getUpdateFieldAndDate();
-    LOG_TRACE(log, "loadUpdatedAll " + command_update);
-    auto process = ShellCommand::execute(command_update);
+    time_t new_update_time = time(nullptr);
+    SCOPE_EXIT(update_time = new_update_time);
+
+    std::string command_with_update_field = command;
+    if (update_time)
+        command_with_update_field += " " + update_field + " " + DB::toString(LocalDateTime(update_time - 1));
+
+    LOG_TRACE(log, "loadUpdatedAll " + command_with_update_field);
+    auto process = ShellCommand::execute(command_with_update_field);
    auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
    return std::make_shared<ShellCommandOwningBlockInputStream>(input_stream, std::move(process));
 }
--- a/dbms/src/Dictionaries/ExecutableDictionarySource.h
+++ b/dbms/src/Dictionaries/ExecutableDictionarySource.h
@ -12,6 +12,7 @@ class Logger;

 namespace DB
 {
+
 /// Allows loading dictionaries from executable
 class ExecutableDictionarySource final : public IDictionarySource
 {
@ -27,6 +28,10 @@ public:

    BlockInputStreamPtr loadAll() override;

+    /** The logic of this method is flawed, absolutely incorrect and ignorant.
+      * It may lead to skipping some values due to clock sync or timezone changes.
+      * The intended usage of "update_field" is totally different.
+      */
    BlockInputStreamPtr loadUpdatedAll() override;

    BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
@ -43,13 +48,10 @@ public:

    std::string toString() const override;

-
 private:
-    std::string getUpdateFieldAndDate();
-
    Poco::Logger * log;

-    std::chrono::time_point<std::chrono::system_clock> update_time;
+    time_t update_time = 0;
    const DictionaryStructure dict_struct;
    const std::string command;
    const std::string update_field;
--- a/dbms/src/Dictionaries/HashedDictionary.cpp
+++ b/dbms/src/Dictionaries/HashedDictionary.cpp
@ -83,7 +83,7 @@ void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType
        {
            auto it = attr.find(id);
            if (it != std::end(attr))
-                id = it->second;
+                id = it->getSecond();
            else
                break;
        }
@ -605,7 +605,7 @@ void HashedDictionary::getItemsImpl(
    for (const auto i : ext::range(0, rows))
    {
        const auto it = attr.find(ids[i]);
-        set_value(i, it != attr.end() ? static_cast<OutputType>(it->second) : get_default(i));
+        set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));
    }

    query_count.fetch_add(rows, std::memory_order_relaxed);
@ -707,7 +707,7 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
    PaddedPODArray<Key> ids;
    ids.reserve(attr.size());
    for (const auto & value : attr)
-        ids.push_back(value.first);
+        ids.push_back(value.getFirst());

    return ids;
 }
--- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp
+++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp
@ -137,7 +137,7 @@ void RangeHashedDictionary::getString(
        if (it != std::end(attr))
        {
            const auto date = dates[i];
-            const auto & ranges_and_values = it->second;
+            const auto & ranges_and_values = it->getSecond();
            const auto val_it
                = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
                  {
@ -408,7 +408,7 @@ void RangeHashedDictionary::getItemsImpl(
        if (it != std::end(attr))
        {
            const auto date = dates[i];
-            const auto & ranges_and_values = it->second;
+            const auto & ranges_and_values = it->getSecond();
            const auto val_it
                = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
                  {
@ -435,7 +435,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K

    if (it != map.end())
    {
-        auto & values = it->second;
+        auto & values = it->getSecond();

        const auto insert_it
            = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
@ -508,7 +508,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i

            if (it != map.end())
            {
-                auto & values = it->second;
+                auto & values = it->getSecond();

                const auto insert_it = std::lower_bound(
                    std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
@ -620,9 +620,9 @@ void RangeHashedDictionary::getIdsAndDates(

    for (const auto & key : attr)
    {
-        for (const auto & value : key.second)
+        for (const auto & value : key.getSecond())
        {
-            ids.push_back(key.first);
+            ids.push_back(key.getFirst());
            start_dates.push_back(value.range.left);
            end_dates.push_back(value.range.right);

--- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp
+++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp
@ -64,9 +64,9 @@ inline size_t JSONEachRowRowInputStream::columnIndex(const StringRef & name, siz

    if (prev_positions.size() > key_index
        && prev_positions[key_index] != name_map.end()
-        && name == prev_positions[key_index]->first)
+        && name == prev_positions[key_index]->getFirst())
    {
-        return prev_positions[key_index]->second;
+        return prev_positions[key_index]->getSecond();
    }
    else
    {
@ -77,7 +77,7 @@ inline size_t JSONEachRowRowInputStream::columnIndex(const StringRef & name, siz
            if (key_index < prev_positions.size())
                prev_positions[key_index] = it;

-            return it->second;
+            return it->getSecond();
        }
        else
            return UNKNOWN_FIELD;
--- a/dbms/src/Formats/TSKVRowInputStream.cpp
+++ b/dbms/src/Formats/TSKVRowInputStream.cpp
@ -128,7 +128,7 @@ bool TSKVRowInputStream::read(MutableColumns & columns, RowReadExtension & ext)
                }
                else
                {
-                    index = it->second;
+                    index = it->getSecond();

                    if (read_columns[index])
                        throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
--- a/dbms/src/Functions/FunctionJoinGet.cpp
+++ b/dbms/src/Functions/FunctionJoinGet.cpp
@ -65,7 +65,7 @@ FunctionBasePtr FunctionBuilderJoinGet::buildImpl(const ColumnsWithTypeAndName &
    auto join = storage_join->getJoin();
    DataTypes data_types(arguments.size());

-    auto table_lock = storage_join->lockStructure(false);
+    auto table_lock = storage_join->lockStructure(false, context.getCurrentQueryId());
    for (size_t i = 0; i < arguments.size(); ++i)
        data_types[i] = arguments[i].type;

--- a/dbms/src/Functions/FunctionsComparison.h
+++ b/dbms/src/Functions/FunctionsComparison.h
@ -1142,9 +1142,30 @@ public:
        const auto & col_with_type_and_name_right = block.getByPosition(arguments[1]);
        const IColumn * col_left_untyped = col_with_type_and_name_left.column.get();
        const IColumn * col_right_untyped = col_with_type_and_name_right.column.get();
+
        const DataTypePtr & left_type = col_with_type_and_name_left.type;
        const DataTypePtr & right_type = col_with_type_and_name_right.type;

+        /// The case when arguments are the same (tautological comparison). Return constant.
+        /// NOTE: Nullable types are special case. (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.)
+        /// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not).
+        if (left_type->equals(*right_type) && !left_type->isNullable() && col_left_untyped == col_right_untyped)
+        {
+            /// Always true: =, <=, >=
+            if constexpr (std::is_same_v<Op<int, int>, EqualsOp<int, int>>
+                || std::is_same_v<Op<int, int>, LessOrEqualsOp<int, int>>
+                || std::is_same_v<Op<int, int>, GreaterOrEqualsOp<int, int>>)
+            {
+                block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u);
+                return;
+            }
+            else
+            {
+                block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 0u);
+                return;
+            }
+        }
+
        WhichDataType which_left{left_type};
        WhichDataType which_right{right_type};

--- a/dbms/src/Functions/arrayIntersect.cpp
+++ b/dbms/src/Functions/arrayIntersect.cpp
@ -425,15 +425,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable

        for (const auto & pair : map)
        {
-            if (pair.second == args)
+            if (pair.getSecond() == args)
            {
                ++result_offset;
                if constexpr (is_numeric_column)
-                    result_data.insertValue(pair.first);
+                    result_data.insertValue(pair.getFirst());
                else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
-                    result_data.insertData(pair.first.data, pair.first.size);
+                    result_data.insertData(pair.getFirst().data, pair.getFirst().size);
                else
-                    result_data.deserializeAndInsertFromArena(pair.first.data);
+                    result_data.deserializeAndInsertFromArena(pair.getFirst().data);

                if (all_nullable)
                    null_map.push_back(0);
--- a/dbms/src/Functions/flatten.cpp
+++ b/dbms/src/Functions/flatten.cpp
@ -0,0 +1,121 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Columns/ColumnArray.h>
+
+namespace DB
+{
+
+/// flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5] - flatten array.
+class FunctionFlatten : public IFunction
+{
+public:
+    static constexpr auto name = "flatten";
+
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionFlatten>(); }
+
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isArray(arguments[0]))
+            throw Exception("Illegal type " + arguments[0]->getName() +
+                            " of argument of function " + getName() +
+                            ", expected Array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+        DataTypePtr nested_type = arguments[0];
+        while (isArray(nested_type))
+            nested_type = checkAndGetDataType<DataTypeArray>(nested_type.get())->getNestedType();
+
+        return std::make_shared<DataTypeArray>(nested_type);
+    }
+
+    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
+    {
+        /** We create an array column with array elements as the most deep elements of nested arrays,
+          * and construct offsets by selecting elements of most deep offsets by values of ancestor offsets.
+          *
+Example 1:
+
+Source column: Array(Array(UInt8)):
+Row 1: [[1, 2, 3], [4, 5]], Row 2: [[6], [7, 8]]
+data: [1, 2, 3], [4, 5], [6], [7, 8]
+offsets: 2, 4
+data.data: 1 2 3 4 5 6 7 8
+data.offsets: 3 5 6 8
+
+Result column: Array(UInt8):
+Row 1: [1, 2, 3, 4, 5], Row 2: [6, 7, 8]
+data: 1 2 3 4 5 6 7 8
+offsets: 5 8
+
+Result offsets are selected from the most deep (data.offsets) by previous deep (offsets) (and values are decremented by one):
+3 5 6 8
+  ^   ^
+
+Example 2:
+
+Source column: Array(Array(Array(UInt8))):
+Row 1: [[], [[1], [], [2, 3]]], Row 2: [[[4]]]
+
+most deep data: 1 2 3 4
+
+offsets1: 2 3
+offsets2: 0 3 4
+-           ^ ^ - select by prev offsets
+offsets3: 1 1 3 4
+-             ^ ^ - select by prev offsets
+
+result offsets: 3, 4
+result: Row 1: [1, 2, 3], Row2: [4]
+          */
+
+        const ColumnArray * src_col = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get());
+
+        if (!src_col)
+            throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " in argument of function 'flatten'",
+                ErrorCodes::ILLEGAL_COLUMN);
+
+        const IColumn::Offsets & src_offsets = src_col->getOffsets();
+
+        ColumnArray::ColumnOffsets::MutablePtr result_offsets_column;
+        const IColumn::Offsets * prev_offsets = &src_offsets;
+        const IColumn * prev_data = &src_col->getData();
+
+        while (const ColumnArray * next_col = checkAndGetColumn<ColumnArray>(prev_data))
+        {
+            if (!result_offsets_column)
+                result_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count);
+
+            IColumn::Offsets & result_offsets = result_offsets_column->getData();
+
+            const IColumn::Offsets * next_offsets = &next_col->getOffsets();
+
+            for (size_t i = 0; i < input_rows_count; ++i)
+                result_offsets[i] = (*next_offsets)[(*prev_offsets)[i] - 1];    /// -1 array subscript is Ok, see PaddedPODArray
+
+            prev_offsets = &result_offsets;
+            prev_data = &next_col->getData();
+        }
+
+        block.getByPosition(result).column = ColumnArray::create(
+            prev_data->getPtr(),
+            result_offsets_column ? std::move(result_offsets_column) : src_col->getOffsetsPtr());
+    }
+
+private:
+    String getName() const override
+    {
+        return name;
+    }
+};
+
+
+void registerFunctionFlatten(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionFlatten>();
+}
+
+}
--- a/dbms/src/Functions/registerFunctionsArray.cpp
+++ b/dbms/src/Functions/registerFunctionsArray.cpp
@ -30,6 +30,7 @@ void registerFunctionArrayEnumerateUniqRanked(FunctionFactory &);
 void registerFunctionArrayEnumerateDenseRanked(FunctionFactory &);
 void registerFunctionArrayUniq(FunctionFactory &);
 void registerFunctionArrayDistinct(FunctionFactory &);
+void registerFunctionFlatten(FunctionFactory &);
 void registerFunctionArrayWithConstant(FunctionFactory &);

 void registerFunctionsArray(FunctionFactory & factory)
@ -61,6 +62,7 @@ void registerFunctionsArray(FunctionFactory & factory)
    registerFunctionArrayEnumerateDenseRanked(factory);
    registerFunctionArrayUniq(factory);
    registerFunctionArrayDistinct(factory);
+    registerFunctionFlatten(factory);
    registerFunctionArrayWithConstant(factory);
 }

--- a/dbms/src/Functions/transform.cpp
+++ b/dbms/src/Functions/transform.cpp
@ -508,7 +508,7 @@ private:
        {
            auto it = table.find(src[i]);
            if (it != table.end())
-                memcpy(&dst[i], &it->second, sizeof(dst[i]));    /// little endian.
+                memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));    /// little endian.
            else
                dst[i] = dst_default;
        }
@ -524,7 +524,7 @@ private:
        {
            auto it = table.find(src[i]);
            if (it != table.end())
-                memcpy(&dst[i], &it->second, sizeof(dst[i]));    /// little endian.
+                memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));    /// little endian.
            else
                dst[i] = dst_default[i];
        }
@ -540,7 +540,7 @@ private:
        {
            auto it = table.find(src[i]);
            if (it != table.end())
-                memcpy(&dst[i], &it->second, sizeof(dst[i]));
+                memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
            else
                dst[i] = src[i];
        }
@ -557,7 +557,7 @@ private:
        for (size_t i = 0; i < size; ++i)
        {
            auto it = table.find(src[i]);
-            StringRef ref = it != table.end() ? it->second : dst_default;
+            StringRef ref = it != table.end() ? it->getSecond() : dst_default;
            dst_data.resize(current_dst_offset + ref.size);
            memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
            current_dst_offset += ref.size;
@ -581,7 +581,7 @@ private:
            StringRef ref;

            if (it != table.end())
-                ref = it->second;
+                ref = it->getSecond();
            else
            {
                ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
@ -611,7 +611,7 @@ private:
            current_src_offset = src_offsets[i];
            auto it = table.find(ref);
            if (it != table.end())
-                memcpy(&dst[i], &it->second, sizeof(dst[i]));
+                memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
            else
                dst[i] = dst_default;
        }
@ -632,7 +632,7 @@ private:
            current_src_offset = src_offsets[i];
            auto it = table.find(ref);
            if (it != table.end())
-                memcpy(&dst[i], &it->second, sizeof(dst[i]));
+                memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
            else
                dst[i] = dst_default[i];
        }
@ -655,7 +655,7 @@ private:

            auto it = table.find(src_ref);

-            StringRef dst_ref = it != table.end() ? it->second : (with_default ? dst_default : src_ref);
+            StringRef dst_ref = it != table.end() ? it->getSecond() : (with_default ? dst_default : src_ref);
            dst_data.resize(current_dst_offset + dst_ref.size);
            memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
            current_dst_offset += dst_ref.size;
@ -697,7 +697,7 @@ private:
            StringRef dst_ref;

            if (it != table.end())
-                dst_ref = it->second;
+                dst_ref = it->getSecond();
            else
            {
                dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
--- a/dbms/src/IO/tests/CMakeLists.txt
+++ b/dbms/src/IO/tests/CMakeLists.txt
@ -54,7 +54,10 @@ add_check (hashing_read_buffer)
 add_executable (io_operators io_operators.cpp)
 target_link_libraries (io_operators PRIVATE clickhouse_common_io)

-if (OS_LINUX)
+add_executable (write_int write_int.cpp)
+target_link_libraries (write_int PRIVATE clickhouse_common_io)
+
+if (OS_LINUX OR OS_FREEBSD)
    add_executable(write_buffer_aio write_buffer_aio.cpp)
    target_link_libraries (write_buffer_aio PRIVATE clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY})

--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@ -1120,11 +1120,11 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(

    for (const auto & value : data)
    {
-        method.insertKeyIntoColumns(value, key_columns, key_sizes);
+        method.insertKeyIntoColumns(value.getValue(), key_columns, key_sizes);

        for (size_t i = 0; i < params.aggregates_size; ++i)
            aggregate_functions[i]->insertResultInto(
-                value.second + offsets_of_aggregate_states[i],
+                value.getSecond() + offsets_of_aggregate_states[i],
                *final_aggregate_columns[i]);
    }

@ -1151,13 +1151,13 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal(

    for (auto & value : data)
    {
-        method.insertKeyIntoColumns(value, key_columns, key_sizes);
+        method.insertKeyIntoColumns(value.getValue(), key_columns, key_sizes);

        /// reserved, so push_back does not throw exceptions
        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_columns[i]->push_back(value.second + offsets_of_aggregate_states[i]);
+            aggregate_columns[i]->push_back(value.getSecond() + offsets_of_aggregate_states[i]);

-        value.second = nullptr;
+        value.getSecond() = nullptr;
    }
 }

@ -1495,26 +1495,26 @@ void NO_INLINE Aggregator::mergeDataImpl(
    {
        typename Table::iterator res_it;
        bool inserted;
-        table_dst.emplace(it->first, res_it, inserted, it.getHash());
+        table_dst.emplace(it->getFirst(), res_it, inserted, it.getHash());

        if (!inserted)
        {
            for (size_t i = 0; i < params.aggregates_size; ++i)
                aggregate_functions[i]->merge(
-                    res_it->second + offsets_of_aggregate_states[i],
-                    it->second + offsets_of_aggregate_states[i],
+                    res_it->getSecond() + offsets_of_aggregate_states[i],
+                    it->getSecond() + offsets_of_aggregate_states[i],
                    arena);

            for (size_t i = 0; i < params.aggregates_size; ++i)
                aggregate_functions[i]->destroy(
-                    it->second + offsets_of_aggregate_states[i]);
+                    it->getSecond() + offsets_of_aggregate_states[i]);
        }
        else
        {
-            res_it->second = it->second;
+            res_it->getSecond() = it->getSecond();
        }

-        it->second = nullptr;
+        it->getSecond() = nullptr;
    }

    table_src.clearAndShrink();
@ -1534,22 +1534,22 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl(

    for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it)
    {
-        typename Table::iterator res_it = table_dst.find(it->first, it.getHash());
+        typename Table::iterator res_it = table_dst.find(it->getFirst(), it.getHash());

        AggregateDataPtr res_data = table_dst.end() == res_it
            ? overflows
-            : res_it->second;
+            : res_it->getSecond();

        for (size_t i = 0; i < params.aggregates_size; ++i)
            aggregate_functions[i]->merge(
                res_data + offsets_of_aggregate_states[i],
-                it->second + offsets_of_aggregate_states[i],
+                it->getSecond() + offsets_of_aggregate_states[i],
                arena);

        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(it->getSecond() + offsets_of_aggregate_states[i]);

-        it->second = nullptr;
+        it->getSecond() = nullptr;
    }

    table_src.clearAndShrink();
@ -1567,23 +1567,23 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(

    for (auto it = table_src.begin(); it != table_src.end(); ++it)
    {
-        decltype(it) res_it = table_dst.find(it->first, it.getHash());
+        decltype(it) res_it = table_dst.find(it->getFirst(), it.getHash());

        if (table_dst.end() == res_it)
            continue;

-        AggregateDataPtr res_data = res_it->second;
+        AggregateDataPtr res_data = res_it->getSecond();

        for (size_t i = 0; i < params.aggregates_size; ++i)
            aggregate_functions[i]->merge(
                res_data + offsets_of_aggregate_states[i],
-                it->second + offsets_of_aggregate_states[i],
+                it->getSecond() + offsets_of_aggregate_states[i],
                arena);

        for (size_t i = 0; i < params.aggregates_size; ++i)
-            aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
+            aggregate_functions[i]->destroy(it->getSecond() + offsets_of_aggregate_states[i]);

-        it->second = nullptr;
+        it->getSecond() = nullptr;
    }

    table_src.clearAndShrink();
@ -2428,7 +2428,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const
 {
    for (auto elem : table)
    {
-        AggregateDataPtr & data = elem.second;
+        AggregateDataPtr & data = elem.getSecond();

        /** If an exception (usually a lack of memory, the MemoryTracker throws) arose
          *  after inserting the key into a hash table, but before creating all states of aggregate functions,
--- a/dbms/src/Interpreters/Aggregator.h
+++ b/dbms/src/Interpreters/Aggregator.h
@ -10,6 +10,7 @@

 #include <common/StringRef.h>
 #include <Common/Arena.h>
+#include <Common/HashTable/FixedHashMap.h>
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/TwoLevelHashMap.h>
 #include <Common/ThreadPool.h>
@ -64,8 +65,8 @@ class IBlockOutputStream;

 using AggregatedDataWithoutKey = AggregateDataPtr;

-using AggregatedDataWithUInt8Key = HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<8>>;
-using AggregatedDataWithUInt16Key = HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<16>>;
+using AggregatedDataWithUInt8Key = FixedHashMap<UInt8, AggregateDataPtr>;
+using AggregatedDataWithUInt16Key = FixedHashMap<UInt16, AggregateDataPtr>;

 using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
 using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
@ -178,7 +179,7 @@ struct AggregationMethodOneNumber
    // Insert the key from the hash table into columns.
    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/)
    {
-        static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.first));
+        static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.getFirst()));
    }
 };

@ -206,7 +207,7 @@ struct AggregationMethodString

    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
    {
-        key_columns[0]->insertData(value.first.data, value.first.size);
+        key_columns[0]->insertData(value.getFirst().data, value.getFirst().size);
    }
 };

@ -234,7 +235,7 @@ struct AggregationMethodFixedString

    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
    {
-        key_columns[0]->insertData(value.first.data, value.first.size);
+        key_columns[0]->insertData(value.getFirst().data, value.getFirst().size);
    }
 };

@ -326,7 +327,7 @@ struct AggregationMethodKeysFixed
                /// corresponding key is nullable. Update the null map accordingly.
                size_t bucket = i / 8;
                size_t offset = i % 8;
-                UInt8 val = (reinterpret_cast<const UInt8 *>(&value.first)[bucket] >> offset) & 1;
+                UInt8 val = (reinterpret_cast<const UInt8 *>(&value.getFirst())[bucket] >> offset) & 1;
                null_map->insertValue(val);
                is_null = val == 1;
            }
@ -338,7 +339,7 @@ struct AggregationMethodKeysFixed
            else
            {
                size_t size = key_sizes[i];
-                observed_column->insertData(reinterpret_cast<const char *>(&value.first) + pos, size);
+                observed_column->insertData(reinterpret_cast<const char *>(&value.getFirst()) + pos, size);
                pos += size;
            }
        }
@ -373,7 +374,7 @@ struct AggregationMethodSerialized

    static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
    {
-        auto pos = value.first.data;
+        auto pos = value.getFirst().data;
        for (auto & column : key_columns)
            pos = column->deserializeAndInsertFromArena(pos);
    }
--- a/dbms/src/Interpreters/DDLWorker.cpp
+++ b/dbms/src/Interpreters/DDLWorker.cpp
@ -295,7 +295,7 @@ bool DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason,
    {
        /// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
        /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
-        /// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
+        /// Otherwise, that node will be ignored by DDLQueryStatusInputStream.

        tryLogCurrentException(log, "Cannot parse DDL task " + entry_name + ", will try to send error status");

@ -1020,12 +1020,12 @@ void DDLWorker::runCleanupThread()
 }


-class DDLQueryStatusInputSream : public IBlockInputStream
+class DDLQueryStatusInputStream : public IBlockInputStream
 {
 public:

-    DDLQueryStatusInputSream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context)
-        : node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputSream"))
+    DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context)
+        : node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputStream"))
    {
        sample = Block{
            {std::make_shared<DataTypeString>(),    "host"},
@ -1046,7 +1046,7 @@ public:

    String getName() const override
    {
-        return "DDLQueryStatusInputSream";
+        return "DDLQueryStatusInputStream";
    }

    Block getHeader() const override { return sample; }
@ -1146,7 +1146,7 @@ public:
        return sample.cloneEmpty();
    }

-    ~DDLQueryStatusInputSream() override = default;
+    ~DDLQueryStatusInputStream() override = default;

 private:

@ -1289,7 +1289,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
    if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
        return io;

-    auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, entry, context);
+    auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
    io.in = std::move(stream);
    return io;
 }
--- a/dbms/src/Interpreters/DDLWorker.h
+++ b/dbms/src/Interpreters/DDLWorker.h
@ -115,7 +115,7 @@ private:

    ThreadGroupStatusPtr thread_group;

-    friend class DDLQueryStatusInputSream;
+    friend class DDLQueryStatusInputStream;
    friend struct DDLTask;
 };

--- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp
@ -304,7 +304,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
    {
        const IAST & args = *func->arguments;

-        if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
+        if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0), context))
        {
            const ASTPtr & arg = args.children.at(1);
            if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))
--- a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp
+++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp
@ -5,7 +5,7 @@
 namespace DB
 {

-FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_)
+FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_)
    : tables(tables_)
 {
 }
@ -16,13 +16,21 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)

    if (!identifier.compound())
    {
-        if (!tables.empty())
-            best_table = &tables[0];
+        for (const auto & [table, names] : tables)
+        {
+            if (std::find(names.begin(), names.end(), identifier.name) != names.end())
+            {
+                // TODO: make sure no collision ever happens
+                if (!best_table)
+                    best_table = &table;
+            }
+        }
    }
    else
    {
+        // FIXME: make a better matcher using `names`?
        size_t best_match = 0;
-        for (const DatabaseAndTableWithAlias & table : tables)
+        for (const auto & [table, names] : tables)
        {
            if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table))
                if (match > best_match)
--- a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h
+++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h
@ -3,6 +3,7 @@
 #include <Parsers/IAST.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>

 namespace DB
 {
@ -12,10 +13,10 @@ struct FindIdentifierBestTableData
    using TypeToVisit = ASTIdentifier;
    using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;

-    const std::vector<DatabaseAndTableWithAlias> & tables;
+    const std::vector<TableWithColumnNames> & tables;
    std::vector<IdentifierWithTable> identifier_table;

-    FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);
+    FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_);

    void visit(ASTIdentifier & identifier, ASTPtr &);
 };
--- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp
@ -591,7 +591,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
    if (!as_table_name.empty())
    {
        as_storage = context.getTable(as_database_name, as_table_name);
-        as_storage_lock = as_storage->lockStructure(false);
+        as_storage_lock = as_storage->lockStructure(false, context.getCurrentQueryId());
    }

    /// Set and retrieve list of columns.
--- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp
@ -93,7 +93,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
            table = context.getTable(database_name, table_name);
        }

-        auto table_lock = table->lockStructure(false);
+        auto table_lock = table->lockStructure(false, context.getCurrentQueryId());
        columns = table->getColumns().getAll();
        column_defaults = table->getColumns().defaults;
        column_comments = table->getColumns().comments;
--- a/dbms/src/Interpreters/InterpreterDropQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp
@ -69,7 +69,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
        {
            database_and_table.second->shutdown();
            /// If table was already dropped by anyone, an exception will be thrown
-            auto table_lock = database_and_table.second->lockForAlter();
+            auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
            /// Drop table from memory, don't touch data and metadata
            database_and_table.first->detachTable(database_and_table.second->getTableName());
        }
@ -78,7 +78,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
            database_and_table.second->checkTableCanBeDropped();

            /// If table was already dropped by anyone, an exception will be thrown
-            auto table_lock = database_and_table.second->lockForAlter();
+            auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
            /// Drop table data, don't touch metadata
            database_and_table.second->truncate(query_ptr, context);
        }
@ -89,7 +89,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
            database_and_table.second->shutdown();
            /// If table was already dropped by anyone, an exception will be thrown

-            auto table_lock = database_and_table.second->lockForAlter();
+            auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
            /// Delete table metadata and table itself from memory

            database_and_table.first->removeTable(context, database_and_table.second->getTableName());
@ -126,7 +126,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDr
            if (kind == ASTDropQuery::Kind::Truncate)
            {
                /// If table was already dropped by anyone, an exception will be thrown
-                auto table_lock = table->lockForAlter();
+                auto table_lock = table->lockForAlter(context.getCurrentQueryId());
                /// Drop table data, don't touch metadata
                table->truncate(query_ptr, context);
            }
@ -135,7 +135,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDr
                context_handle.tryRemoveExternalTable(table_name);
                table->shutdown();
                /// If table was already dropped by anyone, an exception will be thrown
-                auto table_lock = table->lockForAlter();
+                auto table_lock = table->lockForAlter(context.getCurrentQueryId());
                /// Delete table data
                table->drop();
                table->is_dropped = true;
--- a/dbms/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterExplainQuery.cpp
@ -1,13 +1,15 @@
-#include <sstream>
-
-#include <DataStreams/OneBlockInputStream.h>
-#include <DataStreams/BlockIO.h>
-#include <DataTypes/DataTypeString.h>
-#include <Parsers/queryToString.h>
-#include <Common/typeid_cast.h>
 #include <Interpreters/InterpreterExplainQuery.h>
+
+#include <DataStreams/BlockIO.h>
+#include <DataStreams/OneBlockInputStream.h>
+#include <DataTypes/DataTypeString.h>
+#include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <Parsers/ASTExplainQuery.h>
 #include <Parsers/DumpASTNode.h>
+#include <Parsers/queryToString.h>
+#include <Common/typeid_cast.h>
+
+#include <sstream>


 namespace DB
@ -26,7 +28,7 @@ Block InterpreterExplainQuery::getSampleBlock()
    Block block;

    ColumnWithTypeAndName col;
-    col.name = "ast";
+    col.name = "explain";
    col.type = std::make_shared<DataTypeString>();
    col.column = col.type->createColumn();
    block.insert(col);
@ -38,12 +40,21 @@ Block InterpreterExplainQuery::getSampleBlock()
 BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
 {
    const ASTExplainQuery & ast = typeid_cast<const ASTExplainQuery &>(*query);
-
-    std::stringstream ss;
-    dumpAST(ast, ss);
-
    Block sample_block = getSampleBlock();
    MutableColumns res_columns = sample_block.cloneEmptyColumns();
+
+    std::stringstream ss;
+
+    if (ast.getKind() == ASTExplainQuery::ParsedAST)
+    {
+        dumpAST(ast, ss);
+    }
+    else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
+    {
+        InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true);
+        interpreter.getQuery()->format(IAST::FormatSettings(ss, false));
+    }
+
    res_columns[0]->insert(ss.str());

    return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));
--- a/dbms/src/Interpreters/InterpreterExplainQuery.h
+++ b/dbms/src/Interpreters/InterpreterExplainQuery.h
@ -1,5 +1,6 @@
 #pragma once

+#include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>


@ -14,8 +15,8 @@ using ASTPtr = std::shared_ptr<IAST>;
 class InterpreterExplainQuery : public IInterpreter
 {
 public:
-    InterpreterExplainQuery(const ASTPtr & query_, const Context &)
-        : query(query_)
+    InterpreterExplainQuery(const ASTPtr & query_, const Context & context_)
+        : query(query_), context(context_)
    {}

    BlockIO execute() override;
@ -24,6 +25,7 @@ public:

 private:
    ASTPtr query;
+    Context context;

    BlockInputStreamPtr executeImpl();
 };
--- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp
@ -96,7 +96,7 @@ BlockIO InterpreterInsertQuery::execute()
    checkAccess(query);
    StoragePtr table = getTable(query);

-    auto table_lock = table->lockStructure(true);
+    auto table_lock = table->lockStructure(true, context.getCurrentQueryId());

    /// We create a pipeline of several streams, into which we will write data.
    BlockOutputStreamPtr out;
--- a/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterOptimizeQuery.cpp
@ -23,7 +23,7 @@ BlockIO InterpreterOptimizeQuery::execute()
        return executeDDLQueryOnCluster(query_ptr, context, {ast.database});

    StoragePtr table = context.getTable(ast.database, ast.table);
-    auto table_lock = table->lockStructure(true);
+    auto table_lock = table->lockStructure(true, context.getCurrentQueryId());
    table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context);
    return {};
 }
--- a/dbms/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterRenameQuery.cpp
@ -101,7 +101,7 @@ BlockIO InterpreterRenameQuery::execute()

    for (const auto & names : unique_tables_from)
        if (auto table = context.tryGetTable(names.database_name, names.table_name))
-            locks.emplace_back(table->lockForAlter());
+            locks.emplace_back(table->lockForAlter(context.getCurrentQueryId()));

    /** All tables are locked. If there are more than one rename in chain,
      *  we need to hold global lock while doing all renames. Order matters to avoid deadlocks.
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@ -80,8 +80,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    const Names & required_result_column_names,
    QueryProcessingStage::Enum to_stage_,
    size_t subquery_depth_,
-    bool only_analyze_)
-    : InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_)
+    bool only_analyze_,
+    bool modify_inplace)
+    : InterpreterSelectQuery(
+          query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace)
 {
 }

@ -90,8 +92,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    const Context & context_,
    const BlockInputStreamPtr & input_,
    QueryProcessingStage::Enum to_stage_,
-    bool only_analyze_)
-    : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_)
+    bool only_analyze_,
+    bool modify_inplace)
+    : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
 {
 }

@ -100,8 +103,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    const Context & context_,
    const StoragePtr & storage_,
    QueryProcessingStage::Enum to_stage_,
-    bool only_analyze_)
-    : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_)
+    bool only_analyze_,
+    bool modify_inplace)
+    : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
 {
 }

@ -131,8 +135,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    const Names & required_result_column_names,
    QueryProcessingStage::Enum to_stage_,
    size_t subquery_depth_,
-    bool only_analyze_)
-    : query_ptr(query_ptr_->clone())    /// Note: the query is cloned because it will be modified during analysis.
+    bool only_analyze_,
+    bool modify_inplace)
+    /// NOTE: the query almost always should be cloned because it will be modified during analysis.
+    : query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone())
    , query(typeid_cast<ASTSelectQuery &>(*query_ptr))
    , context(context_)
    , to_stage(to_stage_)
@ -170,7 +176,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    {
        /// Read from subquery.
        interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
-            table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze);
+            table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace);

        source_header = interpreter_subquery->getSampleBlock();
    }
@ -194,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
    }

    if (storage)
-        table_lock = storage->lockStructure(false);
+        table_lock = storage->lockStructure(false, context.getCurrentQueryId());

    syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze(
        query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
@ -217,16 +223,23 @@ InterpreterSelectQuery::InterpreterSelectQuery(
        for (const auto & it : query_analyzer->getExternalTables())
            if (!context.tryGetExternalTable(it.first))
                context.addExternalTable(it.first, it.second);
+    }

+    if (!only_analyze || modify_inplace)
+    {
        if (query_analyzer->isRewriteSubqueriesPredicate())
        {
            /// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery
            if (is_subquery)
                interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
-                    table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1,
-                    only_analyze);
+                    table_expression,
+                    getSubqueryContext(context),
+                    required_columns,
+                    QueryProcessingStage::Complete,
+                    subquery_depth + 1,
+                    only_analyze,
+                    modify_inplace);
        }
-
    }

    if (interpreter_subquery)
@ -1026,6 +1039,9 @@ void InterpreterSelectQuery::executeFetchColumns(
            if (to_stage == QueryProcessingStage::Complete)
            {
                limits.min_execution_speed = settings.min_execution_speed;
+                limits.max_execution_speed = settings.max_execution_speed;
+                limits.min_execution_speed_bytes = settings.min_execution_speed_bytes;
+                limits.max_execution_speed_bytes = settings.max_execution_speed_bytes;
                limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
            }

@ -1474,12 +1490,9 @@ void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)

 void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets)
 {
-    const Settings & settings = context.getSettingsRef();
-
    executeUnion(pipeline);
    pipeline.firstStream() = std::make_shared<CreatingSetsBlockInputStream>(
-        pipeline.firstStream(), subqueries_for_sets,
-        SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode));
+        pipeline.firstStream(), subqueries_for_sets, context);
 }

 void InterpreterSelectQuery::unifyStreams(Pipeline & pipeline)
--- a/dbms/src/Interpreters/InterpreterSelectQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.h
@ -52,7 +52,8 @@ public:
        const Names & required_result_column_names = Names{},
        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
        size_t subquery_depth_ = 0,
-        bool only_analyze_ = false);
+        bool only_analyze_ = false,
+        bool modify_inplace = false);

    /// Read data not from the table specified in the query, but from the prepared source `input`.
    InterpreterSelectQuery(
@ -60,7 +61,8 @@ public:
        const Context & context_,
        const BlockInputStreamPtr & input_,
        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
-        bool only_analyze_ = false);
+        bool only_analyze_ = false,
+        bool modify_inplace = false);

    /// Read data not from the table specified in the query, but from the specified `storage_`.
    InterpreterSelectQuery(
@ -68,7 +70,8 @@ public:
        const Context & context_,
        const StoragePtr & storage_,
        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
-        bool only_analyze_ = false);
+        bool only_analyze_ = false,
+        bool modify_inplace = false);

    ~InterpreterSelectQuery() override;

@ -82,6 +85,8 @@ public:

    void ignoreWithTotals();

+    ASTPtr getQuery() const { return query_ptr; }
+
 private:
    InterpreterSelectQuery(
        const ASTPtr & query_ptr_,
@ -91,7 +96,8 @@ private:
        const Names & required_result_column_names,
        QueryProcessingStage::Enum to_stage_,
        size_t subquery_depth_,
-        bool only_analyze_);
+        bool only_analyze_,
+        bool modify_inplace);


    struct Pipeline
--- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@ -10,6 +10,7 @@
 #include <Columns/ColumnConst.h>
 #include <Common/typeid_cast.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/ASTExpressionList.h>


 namespace DB
@ -28,7 +29,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
    const Names & required_result_column_names,
    QueryProcessingStage::Enum to_stage_,
    size_t subquery_depth_,
-    bool only_analyze)
+    bool only_analyze,
+    bool modify_inplace)
    : query_ptr(query_ptr_),
    context(context_),
    to_stage(to_stage_),
@ -81,12 +83,17 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(

    for (size_t query_num = 0; query_num < num_selects; ++query_num)
    {
-        const Names & current_required_result_column_names = query_num == 0
-            ? required_result_column_names
-            : required_result_column_names_for_other_selects[query_num];
+        const Names & current_required_result_column_names
+            = query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num];

        nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
-            ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze));
+            ast.list_of_selects->children.at(query_num),
+            context,
+            current_required_result_column_names,
+            to_stage,
+            subquery_depth,
+            only_analyze,
+            modify_inplace));
    }

    /// Determine structure of the result.
--- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
@ -22,7 +22,8 @@ public:
        const Names & required_result_column_names = Names{},
        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
        size_t subquery_depth_ = 0,
-        bool only_analyze = false);
+        bool only_analyze = false,
+        bool modify_inplace = false);

    ~InterpreterSelectWithUnionQuery() override;

@ -39,6 +40,8 @@ public:

    void ignoreWithTotals();

+    ASTPtr getQuery() const { return query_ptr; }
+
 private:
    ASTPtr query_ptr;
    Context context;
--- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp
@ -239,7 +239,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam
        table->shutdown();

        /// If table was already dropped by anyone, an exception will be thrown
-        auto table_lock = table->lockForAlter();
+        auto table_lock = table->lockForAlter(context.getCurrentQueryId());
        create_ast = system_context.getCreateTableQuery(database_name, table_name);

        database->detachTable(table_name);
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@ -300,7 +300,7 @@ void Join::setSampleBlock(const Block & block)
        if (column.get() != column_no_lc.get())
        {
            materialized_columns.emplace_back(std::move(column_no_lc));
-            key_columns[i] = materialized_columns[i].get();
+            key_columns[i] = materialized_columns.back().get();
        }

        /// We will join only keys, where all components are not NULL.
@ -1317,10 +1317,10 @@ private:

        for (; it != end; ++it)
        {
-            if (it->second.getUsed())
+            if (it->getSecond().getUsed())
                continue;

-            AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->second, rows_added, columns_left, columns_keys_and_right);
+            AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_left, columns_keys_and_right);

            if (rows_added >= max_block_size)
            {
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@ -10,6 +10,7 @@
 #include <Common/Arena.h>
 #include <Common/ColumnsHashing.h>
 #include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/FixedHashMap.h>

 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
@ -218,8 +219,8 @@ public:
    template <typename Mapped>
    struct MapsTemplate
    {
-        std::unique_ptr<HashMap<UInt8, Mapped, TrivialHash, HashTableFixedGrower<8>>>   key8;
-        std::unique_ptr<HashMap<UInt16, Mapped, TrivialHash, HashTableFixedGrower<16>>> key16;
+        std::unique_ptr<FixedHashMap<UInt8, Mapped>>   key8;
+        std::unique_ptr<FixedHashMap<UInt16, Mapped>> key16;
        std::unique_ptr<HashMap<UInt32, Mapped, HashCRC32<UInt32>>>                     key32;
        std::unique_ptr<HashMap<UInt64, Mapped, HashCRC32<UInt64>>>                     key64;
        std::unique_ptr<HashMapWithSavedHash<StringRef, Mapped>>                        key_string;
--- a/dbms/src/Interpreters/MutationsInterpreter.cpp
+++ b/dbms/src/Interpreters/MutationsInterpreter.cpp
@ -394,11 +394,7 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(BlockInputStr

        const SubqueriesForSets & subqueries_for_sets = stage.analyzer->getSubqueriesForSets();
        if (!subqueries_for_sets.empty())
-        {
-            const auto & settings = context.getSettingsRef();
-            in = std::make_shared<CreatingSetsBlockInputStream>(in, subqueries_for_sets,
-                SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode));
-        }
+            in = std::make_shared<CreatingSetsBlockInputStream>(in, subqueries_for_sets, context);
    }

    in = std::make_shared<MaterializingBlockInputStream>(in);
--- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp
+++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp
@ -59,17 +59,17 @@ bool PredicateExpressionsOptimizer::optimize()
        is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE);
        is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE);
    }
+
    return is_rewrite_subqueries;
 }

 bool PredicateExpressionsOptimizer::optimizeImpl(
-    ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
+    ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
 {
    /// split predicate with `and`
    std::vector<ASTPtr> outer_predicate_expressions = splitConjunctionPredicate(outer_expression);

-    std::vector<DatabaseAndTableWithAlias> database_and_table_with_aliases =
-        getDatabaseAndTables(*ast_select, context.getCurrentDatabase());
+    std::vector<TableWithColumnNames> tables_with_columns = getDatabaseAndTablesWithColumnNames(*ast_select, context);

    bool is_rewrite_subquery = false;
    for (auto & outer_predicate : outer_predicate_expressions)
@ -77,7 +77,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
        if (isArrayJoinFunction(outer_predicate))
            continue;

-        auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, database_and_table_with_aliases);
+        auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, tables_with_columns);

        /// TODO: remove origin expression
        for (const auto & [subquery, projection_columns] : subqueries_projection_columns)
@ -92,7 +92,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
                cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate

                std::vector<IdentifierWithQualifier> inner_predicate_dependencies =
-                    getDependenciesAndQualifiers(inner_predicate, database_and_table_with_aliases);
+                    getDependenciesAndQualifiers(inner_predicate, tables_with_columns);

                setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies);

@ -169,7 +169,7 @@ std::vector<ASTPtr> PredicateExpressionsOptimizer::splitConjunctionPredicate(AST
 }

 std::vector<PredicateExpressionsOptimizer::IdentifierWithQualifier>
-PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<DatabaseAndTableWithAlias> & tables)
+PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<TableWithColumnNames> & tables)
 {
    FindIdentifierBestTableVisitor::Data find_data(tables);
    FindIdentifierBestTableVisitor(find_data).visit(expression);
--- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h
+++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h
@ -2,6 +2,8 @@

 #include <Interpreters/DatabaseAndTableWithAlias.h>

+#include <map>
+
 namespace DB
 {

@ -70,11 +72,11 @@ private:
    std::vector<ASTPtr> splitConjunctionPredicate(ASTPtr & predicate_expression);

    std::vector<IdentifierWithQualifier> getDependenciesAndQualifiers(ASTPtr & expression,
-                                                                      std::vector<DatabaseAndTableWithAlias> & tables_with_aliases);
+                                                                      std::vector<TableWithColumnNames> & tables_with_aliases);

    bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery);

-    bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
+    bool optimizeImpl(ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);

    bool allowPushDown(const ASTSelectQuery * subquery);

--- a/dbms/src/Interpreters/SetVariants.h
+++ b/dbms/src/Interpreters/SetVariants.h
@ -8,6 +8,8 @@
 #include <Common/Arena.h>
 #include <Common/HashTable/HashSet.h>
 #include <Common/HashTable/ClearableHashSet.h>
+#include <Common/HashTable/FixedClearableHashSet.h>
+#include <Common/HashTable/FixedHashSet.h>
 #include <Common/UInt128.h>


@ -182,9 +184,8 @@ struct SetMethodHashed
  */
 struct NonClearableSet
 {
-    /// TODO Use either bit- or byte-set for these two options.
-    std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>>            key8;
-    std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>>         key16;
+    std::unique_ptr<SetMethodOneNumber<UInt8, FixedHashSet<UInt8>>>                                             key8;
+    std::unique_ptr<SetMethodOneNumber<UInt16, FixedHashSet<UInt16>>>                                           key16;

    /** Also for the experiment was tested the ability to use SmallSet,
      *  as long as the number of elements in the set is small (and, if necessary, converted to a full-fledged HashSet).
@ -209,9 +210,8 @@ struct NonClearableSet

 struct ClearableSet
 {
-    /// TODO Use either bit- or byte-set for these two options.
-    std::unique_ptr<SetMethodOneNumber<UInt8, ClearableHashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>>       key8;
-    std::unique_ptr<SetMethodOneNumber<UInt16, ClearableHashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>>    key16;
+    std::unique_ptr<SetMethodOneNumber<UInt8, FixedClearableHashSet<UInt8>>>                                        key8;
+    std::unique_ptr<SetMethodOneNumber<UInt16, FixedClearableHashSet<UInt16>>>                                      key16;

    std::unique_ptr<SetMethodOneNumber<UInt32, ClearableHashSet<UInt32, HashCRC32<UInt32>>>>                        key32;
    std::unique_ptr<SetMethodOneNumber<UInt64, ClearableHashSet<UInt64, HashCRC32<UInt64>>>>                        key64;
--- a/dbms/src/Interpreters/Settings.h
+++ b/dbms/src/Interpreters/Settings.h
@ -233,7 +233,10 @@ struct Settings
    M(SettingSeconds, max_execution_time, 0, "") \
    M(SettingOverflowMode<false>, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \
    \
-    M(SettingUInt64, min_execution_speed, 0, "In rows per second.") \
+    M(SettingUInt64, min_execution_speed, 0, "Minimum number of execution rows per second.") \
+    M(SettingUInt64, max_execution_speed, 0, "Maximum number of execution rows per second.") \
+    M(SettingUInt64, min_execution_speed_bytes, 0, "Minimum number of execution bytes per second.") \
+    M(SettingUInt64, max_execution_speed_bytes, 0, "Maximum number of execution bytes per second.") \
    M(SettingSeconds, timeout_before_checking_execution_speed, 0, "Check that the speed is not too low after the specified time has elapsed.") \
    \
    M(SettingUInt64, max_columns_to_read, 0, "") \
--- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
@ -13,23 +13,26 @@
 #include <Interpreters/ExternalDictionaries.h>
 #include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>

-#include <Parsers/ASTSelectQuery.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTOrderByElement.h>
-#include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTOrderByElement.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/ParserTablesInSelectQuery.h>
+#include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>

 #include <DataTypes/NestedUtils.h>

-#include <Common/typeid_cast.h>
 #include <Core/NamesAndTypes.h>
-#include <Storages/IStorage.h>
 #include <IO/WriteHelpers.h>
+#include <Storages/IStorage.h>
+#include <Common/typeid_cast.h>

 #include <functional>

+
 namespace DB
 {

@ -107,7 +110,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
    visitor.visit(query);
 }

-
 bool hasArrayJoin(const ASTPtr & ast)
 {
    if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
@ -591,8 +593,30 @@ Names qualifyOccupiedNames(NamesAndTypesList & columns, const NameSet & source_c
    return originals;
 }

+void replaceJoinedTable(const ASTTablesInSelectQueryElement* join)
+{
+    if (!join || !join->table_expression)
+        return;
+
+    auto & table_expr = static_cast<ASTTableExpression &>(*join->table_expression.get());
+    if (table_expr.database_and_table_name)
+    {
+        auto & table_id = typeid_cast<ASTIdentifier &>(*table_expr.database_and_table_name.get());
+        String expr = "(select * from " + table_id.name + ") as " + table_id.shortName();
+
+        // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b",
+        //        we can't replace aliased tables.
+        // FIXME: long table names include database name, which we can't save within alias.
+        if (table_id.alias.empty() && table_id.isShort())
+        {
+            ParserTableExpression parser;
+            table_expr = static_cast<ASTTableExpression &>(*parseQuery(parser, expr, 0));
+        }
+    }
 }

+} // namespace
+

 SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
    ASTPtr & query,
@ -628,6 +652,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
    {
        if (const ASTTablesInSelectQueryElement * node = select_query->join())
        {
+            replaceJoinedTable(node);
+
            const auto & joined_expression = static_cast<const ASTTableExpression &>(*node->table_expression);
            DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase());

--- a/dbms/src/Interpreters/tests/CMakeLists.txt
+++ b/dbms/src/Interpreters/tests/CMakeLists.txt
@ -14,6 +14,10 @@ add_executable (hash_map hash_map.cpp)
 target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map PRIVATE dbms clickhouse_compression)

+add_executable (hash_map_lookup hash_map_lookup.cpp)
+target_include_directories (hash_map_lookup SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
+target_link_libraries (hash_map_lookup PRIVATE dbms clickhouse_compression)
+
 add_executable (hash_map3 hash_map3.cpp)
 target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
 target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})
--- a/dbms/src/Interpreters/tests/hash_map.cpp
+++ b/dbms/src/Interpreters/tests/hash_map.cpp
@ -162,8 +162,8 @@ int main(int argc, char ** argv)
            map.emplace(data[i], it, inserted);
            if (inserted)
            {
-                new(&it->second) Value;
-                std::swap(it->second, value);
+                new(&it->getSecond()) Value;
+                std::swap(it->getSecond(), value);
                INIT
            }
        }
@ -193,8 +193,8 @@ int main(int argc, char ** argv)
            map.emplace(data[i], it, inserted);
            if (inserted)
            {
-                new(&it->second) Value;
-                std::swap(it->second, value);
+                new(&it->getSecond()) Value;
+                std::swap(it->getSecond(), value);
                INIT
            }
        }
@ -225,8 +225,8 @@ int main(int argc, char ** argv)
            map.emplace(data[i], it, inserted);
            if (inserted)
            {
-                new(&it->second) Value;
-                std::swap(it->second, value);
+                new(&it->getSecond()) Value;
+                std::swap(it->getSecond(), value);
                INIT
            }
        }
--- a/dbms/src/Interpreters/tests/hash_map3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map3.cpp
@ -38,7 +38,7 @@ public:
            if (this->buf[i].isZero(*this))
                std::cerr << "[    ]";
            else
-                std::cerr << '[' << this->buf[i].getValue().first.data << ", " << this->buf[i].getValue().second << ']';
+                std::cerr << '[' << this->buf[i].getValue().getFirst().data << ", " << this->buf[i].getValue().getSecond() << ']';
        }
        std::cerr << std::endl;
    }
@ -85,7 +85,7 @@ int main(int, char **)
    std::cerr << "Collisions: " << map.getCollisions() << std::endl;

    for (auto x : map)
-        std::cerr << x.first.toString() << " -> " << x.second << std::endl;
+        std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl;

    return 0;
 }
--- a/dbms/src/Interpreters/tests/hash_map_lookup.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_lookup.cpp
@ -0,0 +1,124 @@
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+#include <Common/Stopwatch.h>
+
+#define DBMS_HASH_MAP_COUNT_COLLISIONS
+#define DBMS_HASH_MAP_DEBUG_RESIZES
+
+#include <Compression/CompressedReadBuffer.h>
+#include <Core/Types.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadHelpers.h>
+#include <Interpreters/AggregationCommon.h>
+#include <Common/HashTable/FixedHashMap.h>
+#include <Common/HashTable/HashMap.h>
+
+/** Do this:
+for file in ResolutionWidth ResolutionDepth; do
+    for size in 30000 100000 300000 1000000 5000000; do
+        echo
+        BEST_METHOD=0
+        BEST_RESULT=0
+        for method in {1..2}; do
+            echo -ne $file $size $method '';
+            TOTAL_ELEMS=0
+            for i in {0..1000}; do
+                TOTAL_ELEMS=$(( $TOTAL_ELEMS + $size ))
+                if [[ $TOTAL_ELEMS -gt 25000000 ]]; then break; fi
+                ./hash_map_lookup $size $method < ${file}.bin 2>&1 |
+                    grep HashMap | grep -oE '[0-9\.]+ elem';
+            done | awk -W interactive '{ if ($1 > x) { x = $1 }; printf(".") } END { print x }' | tee /tmp/hash_map_lookup_res;
+            CUR_RESULT=$(cat /tmp/hash_map_lookup_res | tr -d '.')
+            if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then
+                BEST_METHOD=$method
+                BEST_RESULT=$CUR_RESULT
+            fi;
+        done;
+        echo Best: $BEST_METHOD - $BEST_RESULT
+    done;
+done
+*/
+
+
+template <typename Map>
+void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
+{
+    Map map;
+    typename Map::iterator it;
+    bool inserted;
+
+    Stopwatch watch;
+    for (size_t i = 0, size = data.size(); i < size; ++i)
+    {
+        map.emplace(data[i], it, inserted);
+        if (inserted)
+            it->getSecond() = 1;
+        else
+            ++it->getSecond();
+    }
+
+    for (size_t i = 0, size = data.size(); i < size; ++i)
+    {
+        it = map.find(data[i]);
+        ++it->getSecond();
+    }
+    watch.stop();
+    std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
+              << ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.)"
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+              << ", collisions: " << map.getCollisions()
+#endif
+              << std::endl;
+}
+
+template <typename Map>
+void insert(Map & map, StringRef & k)
+{
+    bool inserted;
+    typename Map::iterator it;
+    map.emplace(k, it, inserted, nullptr);
+    if (inserted)
+        *it = 1;
+    else
+        ++*it;
+    std::cout << *map.find(k) << std::endl;
+}
+
+int main(int argc, char ** argv)
+{
+    if (argc < 3)
+    {
+        std::cerr << "Usage: program n m\n";
+        return 1;
+    }
+
+    size_t n = atoi(argv[1]);
+    size_t m = atoi(argv[2]);
+
+    std::vector<UInt16> data(n);
+
+    {
+        Stopwatch watch;
+        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
+        DB::CompressedReadBuffer in2(in1);
+        for (size_t i = 0; i < n && !in2.eof(); ++i)
+        {
+            DB::readBinary(data[i], in2);
+        }
+
+        watch.stop();
+        std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
+                  << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
+    }
+
+    using OldLookup = HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>;
+    using NewLookup = FixedHashMap<UInt16, UInt8>;
+
+    if (!m || m == 1)
+        bench<OldLookup>(data, "Old Lookup");
+    if (!m || m == 2)
+        bench<NewLookup>(data, "New Lookup");
+    return 0;
+}
--- a/dbms/src/Interpreters/tests/hash_map_string.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string.cpp
@ -337,8 +337,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
@ -366,8 +366,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
@ -396,8 +396,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
@ -426,8 +426,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
--- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp
@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
    {
        map.emplace(static_cast<const Key &>(data[i]), it, inserted);
        if (inserted)
-            it->second = 0;
-        ++it->second;
+            it->getSecond() = 0;
+        ++it->getSecond();
    }

    watch.stop();
--- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp
@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
    {
        map.emplace(static_cast<const Key &>(data[i]), it, inserted);
        if (inserted)
-            it->second = 0;
-        ++it->second;
+            it->getSecond() = 0;
+        ++it->getSecond();
    }

    watch.stop();
--- a/dbms/src/Interpreters/tests/hash_map_string_small.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string_small.cpp
@ -144,8 +144,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
@ -173,8 +173,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
--- a/dbms/src/Interpreters/tests/two_level_hash_map.cpp
+++ b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
@ -67,8 +67,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
@ -82,7 +82,7 @@ int main(int argc, char ** argv)
        size_t elems = 0;
        for (const auto & kv : map)
        {
-            sum_counts += kv.second;
+            sum_counts += kv.getSecond();
            ++elems;
        }

@ -103,8 +103,8 @@ int main(int argc, char ** argv)
        {
            map.emplace(i, it, inserted);
            if (inserted)
-                it->second = 0;
-            ++it->second;
+                it->getSecond() = 0;
+            ++it->getSecond();
        }

        watch.stop();
@ -118,11 +118,11 @@ int main(int argc, char ** argv)
        size_t elems = 0;
        for (const auto & kv : map)
        {
-            sum_counts += kv.second;
+            sum_counts += kv.getSecond();
            ++elems;

-            if (kv.first > n)
-                std::cerr << kv.first << std::endl;
+            if (kv.getFirst() > n)
+                std::cerr << kv.getFirst() << std::endl;
        }

        std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
--- a/dbms/src/Parsers/ASTExplainQuery.h
+++ b/dbms/src/Parsers/ASTExplainQuery.h
@ -14,13 +14,15 @@ public:
    enum ExplainKind
    {
        ParsedAST,
+        AnalyzedSyntax,
    };

-    ASTExplainQuery(ExplainKind kind_ = ParsedAST)
+    ASTExplainQuery(ExplainKind kind_)
        : kind(kind_)
    {}

    String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
+    ExplainKind getKind() const { return kind; }
    ASTPtr clone() const override { return std::make_shared<ASTExplainQuery>(*this); }

 protected:
@ -37,7 +39,9 @@ private:
        switch (kind)
        {
            case ParsedAST: return "ParsedAST";
+            case AnalyzedSyntax: return "AnalyzedSyntax";
        }
+
        __builtin_unreachable();
    }
 };
--- a/Show More
+++ b/Show More