diff --git a/src/Common/TLDListsHolder.cpp b/src/Common/TLDListsHolder.cpp index cbad8beaa7d..e29a89a03d6 100644 --- a/src/Common/TLDListsHolder.cpp +++ b/src/Common/TLDListsHolder.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -15,15 +16,19 @@ namespace ErrorCodes /// /// TLDList /// +TLDList::TLDList(size_t size) + : tld_container(size) + , pool(std::make_unique(10 << 20)) +{} bool TLDList::insert(const StringRef & host) { - StringRefHash hash; - return tld_container.insert(hash(host)).second; + bool inserted; + tld_container.emplace(DB::ArenaKeyHolder{host, *pool}, inserted); + return inserted; } bool TLDList::has(const StringRef & host) const { - StringRefHash hash; - return tld_container.has(hash(host)); + return tld_container.has(host); } /// @@ -54,7 +59,7 @@ void TLDListsHolder::parseConfig(const Poco::Util::AbstractConfiguration & confi size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::string & path) { - TLDList tld_list; + std::unordered_set tld_list_tmp; ReadBufferFromFile in(path); while (!in.eof()) @@ -73,12 +78,19 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s /// Skip empty line if (line.empty()) continue; - tld_list.insert(StringRef{line.data(), line.size()}); + tld_list_tmp.emplace(line); + } + + TLDList tld_list(tld_list_tmp.size()); + for (const auto & host : tld_list_tmp) + { + StringRef host_ref{host.data(), host.size()}; + tld_list.insert(host_ref); } size_t tld_list_size = tld_list.size(); std::lock_guard lock(tld_lists_map_mutex); - tld_lists_map.emplace(name, std::move(tld_list)); + tld_lists_map.insert(std::make_pair(name, std::move(tld_list))); return tld_list_size; } diff --git a/src/Common/TLDListsHolder.h b/src/Common/TLDListsHolder.h index 9ce394267ec..209ccba217b 100644 --- a/src/Common/TLDListsHolder.h +++ b/src/Common/TLDListsHolder.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include #include #include #include @@ -12,20 +13,24 @@ namespace DB { /// Custom TLD List -/// Unlike tldLookup (which uses gperf) this one uses plain HashSet. +/// +/// Unlike tldLookup (which uses gperf) this one uses plain StringHashSet. class TLDList { public: - /// Uses StringRefHash - using Container = HashSet; + using Container = StringHashSet<>; + + TLDList(size_t size); /// Return true if the tld_container does not contains such element. bool insert(const StringRef & host); + /// Check is there such TLD bool has(const StringRef & host) const; size_t size() const { return tld_container.size(); } private: Container tld_container; + std::unique_ptr pool; }; class TLDListsHolder