diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp index b54cf3b9371..188e78fe69b 100644 --- a/src/Common/CurrentThread.cpp +++ b/src/Common/CurrentThread.cpp @@ -110,23 +110,4 @@ ThreadGroupStatusPtr CurrentThread::getGroup() return current_thread->getThreadGroup(); } -MemoryTracker * CurrentThread::getUserMemoryTracker() -{ - if (unlikely(!current_thread)) - return nullptr; - - auto * tracker = current_thread->memory_tracker.getParent(); - while (tracker && tracker->level != VariableContext::User) - tracker = tracker->getParent(); - - return tracker; -} - -void CurrentThread::flushUntrackedMemory() -{ - if (unlikely(!current_thread)) - return; - current_thread->flushUntrackedMemory(); -} - } diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index ffc00c77504..f4975e800ca 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -40,12 +40,6 @@ public: /// Group to which belongs current thread static ThreadGroupStatusPtr getGroup(); - /// MemoryTracker for user that owns current thread if any - static MemoryTracker * getUserMemoryTracker(); - - /// Adjust counters in MemoryTracker hierarchy if untracked_memory is not 0. - static void flushUntrackedMemory(); - /// A logs queue used by TCPHandler to pass logs to a client static void attachInternalTextLogsQueue(const std::shared_ptr & logs_queue, LogsLevel client_logs_level); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 78b173de6dc..590cbc9ba83 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -104,10 +103,9 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other) return query_str == other.query_str && settings == other.settings; } -AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_) +AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_) : bytes(std::move(bytes_)) , query_id(std::move(query_id_)) - , user_memory_tracker(user_memory_tracker_) , create_time(std::chrono::system_clock::now()) { } @@ -236,7 +234,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) if (auto quota = query_context->getQuota()) quota->used(QuotaType::WRITTEN_BYTES, bytes.size()); - auto entry = std::make_shared(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker()); + auto entry = std::make_shared(std::move(bytes), query_context->getCurrentQueryId()); InsertQuery key{query, settings}; InsertDataPtr data_to_process; diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index e6b7bff8d26..23a2860364d 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -60,31 +59,6 @@ private: UInt128 calculateHash() const; }; - struct UserMemoryTrackerSwitcher - { - explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker) - { - auto * thread_tracker = CurrentThread::getMemoryTracker(); - prev_untracked_memory = current_thread->untracked_memory; - prev_memory_tracker_parent = thread_tracker->getParent(); - - current_thread->untracked_memory = 0; - thread_tracker->setParent(new_tracker); - } - - ~UserMemoryTrackerSwitcher() - { - CurrentThread::flushUntrackedMemory(); - auto * thread_tracker = CurrentThread::getMemoryTracker(); - - current_thread->untracked_memory = prev_untracked_memory; - thread_tracker->setParent(prev_memory_tracker_parent); - } - - MemoryTracker * prev_memory_tracker_parent; - Int64 prev_untracked_memory; - }; - struct InsertData { struct Entry @@ -92,10 +66,9 @@ private: public: const String bytes; const String query_id; - MemoryTracker * const user_memory_tracker; const std::chrono::time_point create_time; - Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_); + Entry(String && bytes_, String && query_id_); void finish(std::exception_ptr exception_ = nullptr); std::future getFuture() { return promise.get_future(); } @@ -106,19 +79,6 @@ private: std::atomic_bool finished = false; }; - ~InsertData() - { - auto it = entries.begin(); - // Entries must be destroyed in context of user who runs async insert. - // Each entry in the list may correspond to a different user, - // so we need to switch current thread's MemoryTracker parent on each iteration. - while (it != entries.end()) - { - UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker); - it = entries.erase(it); - } - } - using EntryPtr = std::shared_ptr; std::list entries; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index b2172a07e91..3c225522cc4 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -504,7 +504,10 @@ QueryPipeline InterpreterExplainQuery::executeImpl() auto pipe = QueryPipelineBuilder::getPipe(std::move(*pipeline), resources); const auto & processors = pipe.getProcessors(); - printPipeline(processors, buf); + if (settings.compact) + printPipelineCompact(processors, buf, settings.query_pipeline_options.header); + else + printPipeline(processors, buf); } else { diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp index 0644d9b44eb..37f56bc7a43 100644 --- a/src/Processors/QueryPlan/ISourceStep.cpp +++ b/src/Processors/QueryPlan/ISourceStep.cpp @@ -12,10 +12,19 @@ ISourceStep::ISourceStep(DataStream output_stream_) QueryPipelineBuilderPtr ISourceStep::updatePipeline(QueryPipelineBuilders, const BuildQueryPipelineSettings & settings) { auto pipeline = std::make_unique(); - QueryPipelineProcessorsCollector collector(*pipeline, this); + + /// For `Source` step, since it's not add new Processors to `pipeline->pipe` + /// in `initializePipeline`, but make an assign with new created Pipe. + /// And Processors for the Step is added here. So we do not need to use + /// `QueryPipelineProcessorsCollector` to collect Processors. initializePipeline(*pipeline, settings); - auto added_processors = collector.detachProcessors(); - processors.insert(processors.end(), added_processors.begin(), added_processors.end()); + + /// But we need to set QueryPlanStep manually for the Processors, which + /// will be used in `EXPLAIN PIPELINE` + for (auto & processor : processors) + { + processor->setQueryPlanStep(this); + } return pipeline; } diff --git a/src/QueryPipeline/printPipeline.cpp b/src/QueryPipeline/printPipeline.cpp new file mode 100644 index 00000000000..40c88502ed0 --- /dev/null +++ b/src/QueryPipeline/printPipeline.cpp @@ -0,0 +1,177 @@ +#include +#include +#include +#include + +namespace DB +{ + +void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool with_header) +{ + struct Node; + + /// Group by processors name, QueryPlanStep and group in this step. + struct Key + { + size_t group; + IQueryPlanStep * step; + std::string name; + + auto getTuple() const { return std::forward_as_tuple(group, step, name); } + + bool operator<(const Key & other) const + { + return getTuple() < other.getTuple(); + } + }; + + /// Group ports by header. + struct EdgeData + { + Block header; + size_t count; + }; + + using Edge = std::vector; + + struct Node + { + size_t id = 0; + std::map edges = {}; + std::vector agents = {}; + }; + + std::map graph; + + auto get_key = [](const IProcessor & processor) + { + return Key{processor.getQueryPlanStepGroup(), processor.getQueryPlanStep(), processor.getName()}; + }; + + /// Fill nodes. + for (const auto & processor : processors) + { + auto res = graph.emplace(get_key(*processor), Node()); + auto & node = res.first->second; + node.agents.emplace_back(processor.get()); + + if (res.second) + node.id = graph.size(); + } + + Block empty_header; + + /// Fill edges. + for (const auto & processor : processors) + { + auto & from = graph[get_key(*processor)]; + + for (auto & port : processor->getOutputs()) + { + if (!port.isConnected()) + continue; + + auto & to = graph[get_key(port.getInputPort().getProcessor())]; + auto & edge = from.edges[&to]; + + /// Use empty header for each edge if with_header is false. + const auto & header = with_header ? port.getHeader() + : empty_header; + + /// Group by header. + bool found = false; + for (auto & item : edge) + { + if (blocksHaveEqualStructure(header, item.header)) + { + found = true; + ++item.count; + break; + } + } + + if (!found) + edge.emplace_back(EdgeData{header, 1}); + } + } + + /// Group processors by it's QueryPlanStep. + std::map> steps_map; + + for (const auto & item : graph) + steps_map[item.first.step].emplace_back(&item.second); + + out << "digraph\n{\n"; + out << " rankdir=\"LR\";\n"; + out << " { node [shape = rect]\n"; + + /// Nodes // TODO quoting and escaping + size_t next_step = 0; + for (const auto & item : steps_map) + { + /// Use separate clusters for each step. + if (item.first != nullptr) + { + out << " subgraph cluster_" << next_step << " {\n"; + out << " label =\"" << item.first->getName() << "\";\n"; + out << " style=filled;\n"; + out << " color=lightgrey;\n"; + out << " node [style=filled,color=white];\n"; + out << " { rank = same;\n"; + + ++next_step; + } + + for (const auto & node : item.second) + { + const auto & processor = node->agents.front(); + out << " n" << node->id << " [label=\"" << processor->getName(); + + if (node->agents.size() > 1) + out << " × " << node->agents.size(); + + const auto & description = processor->getDescription(); + if (!description.empty()) + out << ' ' << description; + + out << "\"];\n"; + } + + if (item.first != nullptr) + { + out << " }\n"; + out << " }\n"; + } + } + + out << " }\n"; + + /// Edges + for (const auto & item : graph) + { + for (const auto & edge : item.second.edges) + { + for (const auto & data : edge.second) + { + out << " n" << item.second.id << " -> " << "n" << edge.first->id << " [label=\""; + + if (data.count > 1) + out << "× " << data.count; + + if (with_header) + { + for (const auto & elem : data.header) + { + out << "\n"; + elem.dumpStructure(out); + } + } + + out << "\"];\n"; + } + } + } + out << "}\n"; +} + +} diff --git a/src/QueryPipeline/printPipeline.h b/src/QueryPipeline/printPipeline.h index ff3b53300ce..e91909cb50b 100644 --- a/src/QueryPipeline/printPipeline.h +++ b/src/QueryPipeline/printPipeline.h @@ -64,4 +64,9 @@ void printPipeline(const Processors & processors, WriteBuffer & out) printPipeline(processors, std::vector(), out); } +/// Prints pipeline in compact representation. +/// Group processors by it's name, QueryPlanStep and QueryPlanStepGroup. +/// If QueryPlanStep wasn't set for processor, representation may be not correct. +/// If with_header is set, prints block header for each edge. +void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool with_header); } diff --git a/tests/integration/test_async_insert_memory/test.py b/tests/integration/test_async_insert_memory/test.py deleted file mode 100644 index 279542f087c..00000000000 --- a/tests/integration/test_async_insert_memory/test.py +++ /dev/null @@ -1,40 +0,0 @@ -import pytest - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) - -node = cluster.add_instance("node") - - -@pytest.fixture(scope="module", autouse=True) -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_memory_usage(): - node.query( - "CREATE TABLE async_table(data Array(UInt64)) ENGINE=MergeTree() ORDER BY data" - ) - - node.get_query_request("SELECT count() FROM system.numbers") - - INSERT_QUERY = "INSERT INTO async_table SETTINGS async_insert=1, wait_for_async_insert=1 VALUES ({})" - for iter in range(10): - values = list(range(iter * 5000000, (iter + 1) * 5000000)) - node.query(INSERT_QUERY.format(values)) - - response = node.get_query_request( - "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user={}".format( - 30 * (2**23) - ) - ) - - _, err = response.get_answer_and_error() - assert err == "", "Query failed with error {}".format(err) - - node.query("DROP TABLE async_table") diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.reference b/tests/queries/0_stateless/02677_grace_hash_limit_race.reference similarity index 100% rename from tests/queries/0_stateless/25340_grace_hash_limit_race.reference rename to tests/queries/0_stateless/02677_grace_hash_limit_race.reference diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql b/tests/queries/0_stateless/02677_grace_hash_limit_race.sql similarity index 100% rename from tests/queries/0_stateless/25340_grace_hash_limit_race.sql rename to tests/queries/0_stateless/02677_grace_hash_limit_race.sql diff --git a/tests/queries/0_stateless/02678_explain_pipeline_graph.reference b/tests/queries/0_stateless/02678_explain_pipeline_graph.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_async_insert_memory/__init__.py b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference similarity index 100% rename from tests/integration/test_async_insert_memory/__init__.py rename to tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference diff --git a/tests/queries/0_stateless/02678_explain_pipeline_graph.sql b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql similarity index 50% rename from tests/queries/0_stateless/02678_explain_pipeline_graph.sql rename to tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql index 48cfbf2b349..e8b7405d602 100644 --- a/tests/queries/0_stateless/02678_explain_pipeline_graph.sql +++ b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql @@ -1,7 +1,12 @@ --- The server does not crash after these queries: - DROP TABLE IF EXISTS t1; CREATE TABLE t1(ID UInt64, name String) engine=MergeTree order by ID; + insert into t1(ID, name) values (1, 'abc'), (2, 'bbb'); + +-- The returned node order is uncertain explain pipeline graph=1 select count(ID) from t1 FORMAT Null; +explain pipeline graph=1 select sum(1) from t1 FORMAT Null; +explain pipeline graph=1 select min(ID) from t1 FORMAT Null; +explain pipeline graph=1 select max(ID) from t1 FORMAT Null; + DROP TABLE t1;