diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp
index b54cf3b9371..188e78fe69b 100644
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@@ -110,23 +110,4 @@ ThreadGroupStatusPtr CurrentThread::getGroup()
     return current_thread->getThreadGroup();
 }
 
-MemoryTracker * CurrentThread::getUserMemoryTracker()
-{
-    if (unlikely(!current_thread))
-        return nullptr;
-
-    auto * tracker = current_thread->memory_tracker.getParent();
-    while (tracker && tracker->level != VariableContext::User)
-        tracker = tracker->getParent();
-
-    return tracker;
-}
-
-void CurrentThread::flushUntrackedMemory()
-{
-    if (unlikely(!current_thread))
-        return;
-    current_thread->flushUntrackedMemory();
-}
-
 }
diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index ffc00c77504..f4975e800ca 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -40,12 +40,6 @@ public:
     /// Group to which belongs current thread
     static ThreadGroupStatusPtr getGroup();
 
-    /// MemoryTracker for user that owns current thread if any
-    static MemoryTracker * getUserMemoryTracker();
-
-    /// Adjust counters in MemoryTracker hierarchy if untracked_memory is not 0.
-    static void flushUntrackedMemory();
-
     /// A logs queue used by TCPHandler to pass logs to a client
     static void attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue,
                                             LogsLevel client_logs_level);
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 78b173de6dc..590cbc9ba83 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -18,7 +18,6 @@
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/queryToString.h>
 #include <Storages/IStorage.h>
-#include <Common/CurrentThread.h>
 #include <Common/SipHash.h>
 #include <Common/FieldVisitorHash.h>
 #include <Common/DateLUT.h>
@@ -104,10 +103,9 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other)
     return query_str == other.query_str && settings == other.settings;
 }
 
-AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_)
+AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_)
     : bytes(std::move(bytes_))
     , query_id(std::move(query_id_))
-    , user_memory_tracker(user_memory_tracker_)
     , create_time(std::chrono::system_clock::now())
 {
 }
@@ -236,7 +234,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
     if (auto quota = query_context->getQuota())
         quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
 
-    auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker());
+    auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId());
 
     InsertQuery key{query, settings};
     InsertDataPtr data_to_process;
diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h
index e6b7bff8d26..23a2860364d 100644
--- a/src/Interpreters/AsynchronousInsertQueue.h
+++ b/src/Interpreters/AsynchronousInsertQueue.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Parsers/IAST_fwd.h>
-#include <Common/CurrentThread.h>
 #include <Common/ThreadPool.h>
 #include <Core/Settings.h>
 #include <Poco/Logger.h>
@@ -60,31 +59,6 @@ private:
         UInt128 calculateHash() const;
     };
 
-    struct UserMemoryTrackerSwitcher
-    {
-        explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker)
-        {
-            auto * thread_tracker = CurrentThread::getMemoryTracker();
-            prev_untracked_memory = current_thread->untracked_memory;
-            prev_memory_tracker_parent = thread_tracker->getParent();
-
-            current_thread->untracked_memory = 0;
-            thread_tracker->setParent(new_tracker);
-        }
-
-        ~UserMemoryTrackerSwitcher()
-        {
-            CurrentThread::flushUntrackedMemory();
-            auto * thread_tracker = CurrentThread::getMemoryTracker();
-
-            current_thread->untracked_memory = prev_untracked_memory;
-            thread_tracker->setParent(prev_memory_tracker_parent);
-        }
-
-        MemoryTracker * prev_memory_tracker_parent;
-        Int64 prev_untracked_memory;
-    };
-
     struct InsertData
     {
         struct Entry
@@ -92,10 +66,9 @@ private:
         public:
             const String bytes;
             const String query_id;
-            MemoryTracker * const user_memory_tracker;
             const std::chrono::time_point<std::chrono::system_clock> create_time;
 
-            Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_);
+            Entry(String && bytes_, String && query_id_);
 
             void finish(std::exception_ptr exception_ = nullptr);
             std::future<void> getFuture() { return promise.get_future(); }
@@ -106,19 +79,6 @@ private:
             std::atomic_bool finished = false;
         };
 
-        ~InsertData()
-        {
-            auto it = entries.begin();
-            // Entries must be destroyed in context of user who runs async insert.
-            // Each entry in the list may correspond to a different user,
-            // so we need to switch current thread's MemoryTracker parent on each iteration.
-            while (it != entries.end())
-            {
-                UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker);
-                it = entries.erase(it);
-            }
-        }
-
         using EntryPtr = std::shared_ptr<Entry>;
 
         std::list<EntryPtr> entries;
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index b2172a07e91..3c225522cc4 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -504,7 +504,10 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
                     auto pipe = QueryPipelineBuilder::getPipe(std::move(*pipeline), resources);
                     const auto & processors = pipe.getProcessors();
 
-                    printPipeline(processors, buf);
+                    if (settings.compact)
+                        printPipelineCompact(processors, buf, settings.query_pipeline_options.header);
+                    else
+                        printPipeline(processors, buf);
                 }
                 else
                 {
diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp
index 0644d9b44eb..37f56bc7a43 100644
--- a/src/Processors/QueryPlan/ISourceStep.cpp
+++ b/src/Processors/QueryPlan/ISourceStep.cpp
@@ -12,10 +12,19 @@ ISourceStep::ISourceStep(DataStream output_stream_)
 QueryPipelineBuilderPtr ISourceStep::updatePipeline(QueryPipelineBuilders, const BuildQueryPipelineSettings & settings)
 {
     auto pipeline = std::make_unique<QueryPipelineBuilder>();
-    QueryPipelineProcessorsCollector collector(*pipeline, this);
+
+    /// For `Source` step, since it's not add new Processors to `pipeline->pipe`
+    /// in `initializePipeline`, but make an assign with new created Pipe.
+    /// And Processors for the Step is added here. So we do not need to use
+    /// `QueryPipelineProcessorsCollector` to collect Processors.
     initializePipeline(*pipeline, settings);
-    auto added_processors = collector.detachProcessors();
-    processors.insert(processors.end(), added_processors.begin(), added_processors.end());
+
+    /// But we need to set QueryPlanStep manually for the Processors, which
+    /// will be used in `EXPLAIN PIPELINE`
+    for (auto & processor : processors)
+    {
+        processor->setQueryPlanStep(this);
+    }
     return pipeline;
 }
 
diff --git a/src/QueryPipeline/printPipeline.cpp b/src/QueryPipeline/printPipeline.cpp
new file mode 100644
index 00000000000..40c88502ed0
--- /dev/null
+++ b/src/QueryPipeline/printPipeline.cpp
@@ -0,0 +1,177 @@
+#include <QueryPipeline/printPipeline.h>
+#include <Processors/QueryPlan/IQueryPlanStep.h>
+#include <set>
+#include <map>
+
+namespace DB
+{
+
+void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool with_header)
+{
+    struct Node;
+
+    /// Group by processors name, QueryPlanStep and group in this step.
+    struct Key
+    {
+        size_t group;
+        IQueryPlanStep * step;
+        std::string name;
+
+        auto getTuple() const { return std::forward_as_tuple(group, step, name); }
+
+        bool operator<(const Key & other) const
+        {
+            return getTuple() < other.getTuple();
+        }
+    };
+
+    /// Group ports by header.
+    struct EdgeData
+    {
+        Block header;
+        size_t count;
+    };
+
+    using Edge = std::vector<EdgeData>;
+
+    struct Node
+    {
+        size_t id = 0;
+        std::map<Node *, Edge> edges = {};
+        std::vector<const IProcessor *> agents = {};
+    };
+
+    std::map<Key, Node> graph;
+
+    auto get_key = [](const IProcessor & processor)
+    {
+        return Key{processor.getQueryPlanStepGroup(), processor.getQueryPlanStep(), processor.getName()};
+    };
+
+    /// Fill nodes.
+    for (const auto & processor : processors)
+    {
+        auto res = graph.emplace(get_key(*processor), Node());
+        auto & node = res.first->second;
+        node.agents.emplace_back(processor.get());
+
+        if (res.second)
+            node.id = graph.size();
+    }
+
+    Block empty_header;
+
+    /// Fill edges.
+    for (const auto & processor : processors)
+    {
+        auto & from =  graph[get_key(*processor)];
+
+        for (auto & port : processor->getOutputs())
+        {
+            if (!port.isConnected())
+                continue;
+
+            auto & to = graph[get_key(port.getInputPort().getProcessor())];
+            auto & edge = from.edges[&to];
+
+            /// Use empty header for each edge if with_header is false.
+            const auto & header = with_header ? port.getHeader()
+                                              : empty_header;
+
+            /// Group by header.
+            bool found = false;
+            for (auto & item : edge)
+            {
+                if (blocksHaveEqualStructure(header, item.header))
+                {
+                    found = true;
+                    ++item.count;
+                    break;
+                }
+            }
+
+            if (!found)
+                edge.emplace_back(EdgeData{header, 1});
+        }
+    }
+
+    /// Group processors by it's QueryPlanStep.
+    std::map<IQueryPlanStep *, std::vector<const Node *>> steps_map;
+
+    for (const auto & item : graph)
+        steps_map[item.first.step].emplace_back(&item.second);
+
+    out << "digraph\n{\n";
+    out << "  rankdir=\"LR\";\n";
+    out << "  { node [shape = rect]\n";
+
+    /// Nodes // TODO quoting and escaping
+    size_t next_step = 0;
+    for (const auto & item : steps_map)
+    {
+        /// Use separate clusters for each step.
+        if (item.first != nullptr)
+        {
+            out << "    subgraph cluster_" << next_step << " {\n";
+            out << "      label =\"" << item.first->getName() << "\";\n";
+            out << "      style=filled;\n";
+            out << "      color=lightgrey;\n";
+            out << "      node [style=filled,color=white];\n";
+            out << "      { rank = same;\n";
+
+            ++next_step;
+        }
+
+        for (const auto & node : item.second)
+        {
+            const auto & processor = node->agents.front();
+            out << "        n" << node->id << " [label=\"" << processor->getName();
+
+            if (node->agents.size() > 1)
+                out << " × " << node->agents.size();
+
+            const auto & description = processor->getDescription();
+            if (!description.empty())
+                out << ' ' << description;
+
+            out << "\"];\n";
+        }
+
+        if (item.first != nullptr)
+        {
+            out << "      }\n";
+            out << "    }\n";
+        }
+    }
+
+    out << "  }\n";
+
+    /// Edges
+    for (const auto & item : graph)
+    {
+        for (const auto & edge : item.second.edges)
+        {
+            for (const auto & data : edge.second)
+            {
+                out << "  n" << item.second.id << " -> " << "n" << edge.first->id << " [label=\"";
+
+                if (data.count > 1)
+                    out << "× " << data.count;
+
+                if (with_header)
+                {
+                    for (const auto & elem : data.header)
+                    {
+                        out << "\n";
+                        elem.dumpStructure(out);
+                    }
+                }
+
+                out << "\"];\n";
+            }
+        }
+    }
+    out << "}\n";
+}
+
+}
diff --git a/src/QueryPipeline/printPipeline.h b/src/QueryPipeline/printPipeline.h
index ff3b53300ce..e91909cb50b 100644
--- a/src/QueryPipeline/printPipeline.h
+++ b/src/QueryPipeline/printPipeline.h
@@ -64,4 +64,9 @@ void printPipeline(const Processors & processors, WriteBuffer & out)
     printPipeline(processors, std::vector<IProcessor::Status>(), out);
 }
 
+/// Prints pipeline in compact representation.
+/// Group processors by it's name, QueryPlanStep and QueryPlanStepGroup.
+/// If QueryPlanStep wasn't set for processor, representation may be not correct.
+/// If with_header is set, prints block header for each edge.
+void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool with_header);
 }
diff --git a/tests/integration/test_async_insert_memory/test.py b/tests/integration/test_async_insert_memory/test.py
deleted file mode 100644
index 279542f087c..00000000000
--- a/tests/integration/test_async_insert_memory/test.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import pytest
-
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-node = cluster.add_instance("node")
-
-
-@pytest.fixture(scope="module", autouse=True)
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def test_memory_usage():
-    node.query(
-        "CREATE TABLE async_table(data Array(UInt64)) ENGINE=MergeTree() ORDER BY data"
-    )
-
-    node.get_query_request("SELECT count() FROM system.numbers")
-
-    INSERT_QUERY = "INSERT INTO async_table SETTINGS async_insert=1, wait_for_async_insert=1 VALUES ({})"
-    for iter in range(10):
-        values = list(range(iter * 5000000, (iter + 1) * 5000000))
-        node.query(INSERT_QUERY.format(values))
-
-    response = node.get_query_request(
-        "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user={}".format(
-            30 * (2**23)
-        )
-    )
-
-    _, err = response.get_answer_and_error()
-    assert err == "", "Query failed with error {}".format(err)
-
-    node.query("DROP TABLE async_table")
diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.reference b/tests/queries/0_stateless/02677_grace_hash_limit_race.reference
similarity index 100%
rename from tests/queries/0_stateless/25340_grace_hash_limit_race.reference
rename to tests/queries/0_stateless/02677_grace_hash_limit_race.reference
diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql b/tests/queries/0_stateless/02677_grace_hash_limit_race.sql
similarity index 100%
rename from tests/queries/0_stateless/25340_grace_hash_limit_race.sql
rename to tests/queries/0_stateless/02677_grace_hash_limit_race.sql
diff --git a/tests/queries/0_stateless/02678_explain_pipeline_graph.reference b/tests/queries/0_stateless/02678_explain_pipeline_graph.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_async_insert_memory/__init__.py b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference
similarity index 100%
rename from tests/integration/test_async_insert_memory/__init__.py
rename to tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference
diff --git a/tests/queries/0_stateless/02678_explain_pipeline_graph.sql b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql
similarity index 50%
rename from tests/queries/0_stateless/02678_explain_pipeline_graph.sql
rename to tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql
index 48cfbf2b349..e8b7405d602 100644
--- a/tests/queries/0_stateless/02678_explain_pipeline_graph.sql
+++ b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql
@@ -1,7 +1,12 @@
--- The server does not crash after these queries:
-
 DROP TABLE IF EXISTS t1;
 CREATE TABLE t1(ID UInt64, name String) engine=MergeTree order by ID;
+
 insert into t1(ID, name) values (1, 'abc'), (2, 'bbb');
+
+-- The returned node order is uncertain
 explain pipeline graph=1 select count(ID) from t1 FORMAT Null;
+explain pipeline graph=1 select sum(1) from t1 FORMAT Null;
+explain pipeline graph=1 select min(ID) from t1 FORMAT Null;
+explain pipeline graph=1 select max(ID) from t1 FORMAT Null;
+
 DROP TABLE t1;