From 0df4ae6b02c3a6ca6de0daceb02c3cd5c47642a8 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 15 Aug 2017 14:59:08 +0300 Subject: [PATCH] do not use unnecessary temporary objects to query a set of parts [#CLICKHOUSE-3000] --- .../Storages/MergeTree/ActiveDataPartSet.cpp | 53 ++++++++----------- .../Storages/MergeTree/ActiveDataPartSet.h | 18 ++----- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 14 +++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 11 +++- 4 files changed, 42 insertions(+), 54 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp index 2c8b7f225b7..8396181901c 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -20,66 +20,59 @@ void ActiveDataPartSet::add(const String & name) void ActiveDataPartSet::addImpl(const String & name) { - if (!getContainingPartImpl(name).empty()) + auto part_info = MergeTreePartInfo::fromPartName(name); + + if (!getContainingPartImpl(part_info).empty()) return; - Part part; - part.name = name; - part.info = MergeTreePartInfo::fromPartName(name); - /// Parts contained in `part` are located contiguously inside `data_parts`, overlapping with the place where the part itself would be inserted. - Parts::iterator it = parts.lower_bound(part); + auto it = part_info_to_name.lower_bound(part_info); /// Let's go left. - while (it != parts.begin()) + while (it != part_info_to_name.begin()) { --it; - if (!part.contains(*it)) + if (!part_info.contains(it->first)) { ++it; break; } - parts.erase(it++); + part_info_to_name.erase(it++); } /// Let's go to the right. - while (it != parts.end() && part.contains(*it)) + while (it != part_info_to_name.end() && part_info.contains(it->first)) { - parts.erase(it++); + part_info_to_name.erase(it++); } - parts.insert(part); + part_info_to_name.emplace(part_info, name); } String ActiveDataPartSet::getContainingPart(const String & part_name) const { std::lock_guard lock(mutex); - return getContainingPartImpl(part_name); + return getContainingPartImpl(MergeTreePartInfo::fromPartName(part_name)); } -String ActiveDataPartSet::getContainingPartImpl(const String & part_name) const +String ActiveDataPartSet::getContainingPartImpl(const MergeTreePartInfo & part_info) const { - Part part; - part.info = MergeTreePartInfo::fromPartName(part_name); - /// A part can only be covered/overlapped by the previous or next one in `parts`. - Parts::iterator it = parts.lower_bound(part); + auto it = part_info_to_name.lower_bound(part_info); - if (it != parts.end()) + if (it != part_info_to_name.end()) { - if (it->name == part_name) - return it->name; - if (it->contains(part)) - return it->name; + if (it->first.contains(part_info)) + return it->second; } - if (it != parts.begin()) + if (it != part_info_to_name.begin()) { --it; - if (it->contains(part)) - return it->name; + if (it->first.contains(part_info)) + return it->second; } return String(); @@ -91,9 +84,9 @@ Strings ActiveDataPartSet::getParts() const std::lock_guard lock(mutex); Strings res; - res.reserve(parts.size()); - for (const Part & part : parts) - res.push_back(part.name); + res.reserve(part_info_to_name.size()); + for (const auto & kv : part_info_to_name) + res.push_back(kv.second); return res; } @@ -102,7 +95,7 @@ Strings ActiveDataPartSet::getParts() const size_t ActiveDataPartSet::size() const { std::lock_guard lock(mutex); - return parts.size(); + return part_info_to_name.size(); } diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index 34a1ffd73b5..1e6067adb16 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB @@ -21,16 +21,6 @@ public: ActiveDataPartSet() {} ActiveDataPartSet(const Strings & names); - struct Part - { - String name; - MergeTreePartInfo info; - - bool operator<(const Part & rhs) const { return info < rhs.info; } - - bool contains(const Part & rhs) const { return info.contains(rhs.info); } - }; - void add(const String & name); /// If not found, returns an empty string. @@ -41,14 +31,12 @@ public: size_t size() const; private: - using Parts = std::set; - mutable std::mutex mutex; - Parts parts; + std::map part_info_to_name; /// Do not block mutex. void addImpl(const String & name); - String getContainingPartImpl(const String & name) const; + String getContainingPartImpl(const MergeTreePartInfo & part_info) const; }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 86c2f7a607e..f675eaee193 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1483,26 +1483,25 @@ void MergeTreeData::delayInsertIfNeeded(Poco::Event * until) MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name) { - MutableDataPartPtr tmp_part(new DataPart(*this)); - tmp_part->info = MergeTreePartInfo::fromPartName(part_name); + auto part_info = MergeTreePartInfo::fromPartName(part_name); std::lock_guard lock(data_parts_mutex); /// The part can be covered only by the previous or the next one in data_parts. - auto it = data_parts.lower_bound(tmp_part); + auto it = data_parts.lower_bound(part_info); if (it != data_parts.end()) { if ((*it)->name == part_name) return *it; - if ((*it)->contains(*tmp_part)) + if ((*it)->info.contains(part_info)) return *it; } if (it != data_parts.begin()) { --it; - if ((*it)->contains(*tmp_part)) + if ((*it)->info.contains(part_info)) return *it; } @@ -1511,11 +1510,10 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name) { - MutableDataPartPtr tmp_part(new DataPart(*this)); - tmp_part->info = MergeTreePartInfo::fromPartName(part_name); + auto part_info = MergeTreePartInfo::fromPartName(part_name); std::lock_guard lock(all_data_parts_mutex); - auto it = all_data_parts.lower_bound(tmp_part); + auto it = all_data_parts.lower_bound(part_info); if (it != all_data_parts.end() && (*it)->name == part_name) return *it; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index f2372b17d09..0229494e36a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -90,7 +90,16 @@ public: using MutableDataPartPtr = std::shared_ptr; /// After the DataPart is added to the working set, it cannot be changed. using DataPartPtr = std::shared_ptr; - struct DataPartPtrLess { bool operator() (const DataPartPtr & lhs, const DataPartPtr & rhs) const { return lhs->info < rhs->info; } }; + + struct DataPartPtrLess + { + using is_transparent = void; + + bool operator()(const DataPartPtr & lhs, const MergeTreePartInfo & rhs) const { return lhs->info < rhs; } + bool operator()(const MergeTreePartInfo & lhs, const DataPartPtr & rhs) const { return lhs < rhs->info; } + bool operator()(const DataPartPtr & lhs, const DataPartPtr & rhs) const { return lhs->info < rhs->info; } + }; + using DataParts = std::set; using DataPartsVector = std::vector;