mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Switch TLDList to StringHashSet (to avoid errors on collisions)
This commit is contained in:
parent
89cb2185a8
commit
c987be632f
@ -3,6 +3,7 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,15 +16,19 @@ namespace ErrorCodes
|
||||
///
|
||||
/// TLDList
|
||||
///
|
||||
TLDList::TLDList(size_t size)
|
||||
: tld_container(size)
|
||||
, pool(std::make_unique<Arena>(10 << 20))
|
||||
{}
|
||||
bool TLDList::insert(const StringRef & host)
|
||||
{
|
||||
StringRefHash hash;
|
||||
return tld_container.insert(hash(host)).second;
|
||||
bool inserted;
|
||||
tld_container.emplace(DB::ArenaKeyHolder{host, *pool}, inserted);
|
||||
return inserted;
|
||||
}
|
||||
bool TLDList::has(const StringRef & host) const
|
||||
{
|
||||
StringRefHash hash;
|
||||
return tld_container.has(hash(host));
|
||||
return tld_container.has(host);
|
||||
}
|
||||
|
||||
///
|
||||
@ -54,7 +59,7 @@ void TLDListsHolder::parseConfig(const Poco::Util::AbstractConfiguration & confi
|
||||
|
||||
size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::string & path)
|
||||
{
|
||||
TLDList tld_list;
|
||||
std::unordered_set<std::string> tld_list_tmp;
|
||||
|
||||
ReadBufferFromFile in(path);
|
||||
while (!in.eof())
|
||||
@ -73,12 +78,19 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s
|
||||
/// Skip empty line
|
||||
if (line.empty())
|
||||
continue;
|
||||
tld_list.insert(StringRef{line.data(), line.size()});
|
||||
tld_list_tmp.emplace(line);
|
||||
}
|
||||
|
||||
TLDList tld_list(tld_list_tmp.size());
|
||||
for (const auto & host : tld_list_tmp)
|
||||
{
|
||||
StringRef host_ref{host.data(), host.size()};
|
||||
tld_list.insert(host_ref);
|
||||
}
|
||||
|
||||
size_t tld_list_size = tld_list.size();
|
||||
std::lock_guard<std::mutex> lock(tld_lists_map_mutex);
|
||||
tld_lists_map.emplace(name, std::move(tld_list));
|
||||
tld_lists_map.insert(std::make_pair(name, std::move(tld_list)));
|
||||
return tld_list_size;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,8 @@
|
||||
|
||||
#include <common/defines.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/StringHashSet.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
@ -12,20 +13,24 @@ namespace DB
|
||||
{
|
||||
|
||||
/// Custom TLD List
|
||||
/// Unlike tldLookup (which uses gperf) this one uses plain HashSet.
|
||||
///
|
||||
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashSet.
|
||||
class TLDList
|
||||
{
|
||||
public:
|
||||
/// Uses StringRefHash
|
||||
using Container = HashSet<UInt32>;
|
||||
using Container = StringHashSet<>;
|
||||
|
||||
TLDList(size_t size);
|
||||
|
||||
/// Return true if the tld_container does not contains such element.
|
||||
bool insert(const StringRef & host);
|
||||
/// Check is there such TLD
|
||||
bool has(const StringRef & host) const;
|
||||
size_t size() const { return tld_container.size(); }
|
||||
|
||||
private:
|
||||
Container tld_container;
|
||||
std::unique_ptr<Arena> pool;
|
||||
};
|
||||
|
||||
class TLDListsHolder
|
||||
|
Loading…
Reference in New Issue
Block a user