Merge pull request #26948 from azat/fix-custom-tld-read

Fix reading of custom TLDs (stops processing with lower buffer or bigger file)
This commit is contained in:
alexey-milovidov 2021-07-29 20:57:25 +03:00 committed by GitHub
commit 2bd5067e54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,6 +2,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <common/logger_useful.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <string_view>
#include <unordered_set>
@ -11,11 +12,10 @@ namespace DB
namespace ErrorCodes
{
extern const int TLD_LIST_NOT_FOUND;
extern const int LOGICAL_ERROR;
}
///
/// TLDList
///
TLDList::TLDList(size_t size)
: tld_container(size)
, pool(std::make_unique<Arena>(10 << 20))
@ -31,9 +31,7 @@ bool TLDList::has(const StringRef & host) const
return tld_container.has(host);
}
///
/// TLDListsHolder
///
TLDListsHolder & TLDListsHolder::getInstance()
{
static TLDListsHolder instance;
@ -62,24 +60,22 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s
std::unordered_set<std::string> tld_list_tmp;
ReadBufferFromFile in(path);
String line;
while (!in.eof())
{
char * newline = find_first_symbols<'\n'>(in.position(), in.buffer().end());
if (newline >= in.buffer().end())
break;
std::string_view line(in.position(), newline - in.position());
in.position() = newline + 1;
readEscapedStringUntilEOL(line, in);
++in.position();
/// Skip comments
if (line.size() > 2 && line[0] == '/' && line[1] == '/')
continue;
trim(line);
line = trim(line, [](char c) { return std::isspace(c); });
/// Skip empty line
if (line.empty())
continue;
tld_list_tmp.emplace(line);
}
if (!in.eof())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not all list had been read", name);
TLDList tld_list(tld_list_tmp.size());
for (const auto & host : tld_list_tmp)