Merge pull request #26948 from azat/fix-custom-tld-read

Fix reading of custom TLDs (stops processing with lower buffer or bigger file)
This commit is contained in:
alexey-milovidov 2021-07-29 20:57:25 +03:00 committed by GitHub
commit 2bd5067e54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,6 +2,7 @@
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <string_view> #include <string_view>
#include <unordered_set> #include <unordered_set>
@ -11,11 +12,10 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TLD_LIST_NOT_FOUND; extern const int TLD_LIST_NOT_FOUND;
extern const int LOGICAL_ERROR;
} }
///
/// TLDList /// TLDList
///
TLDList::TLDList(size_t size) TLDList::TLDList(size_t size)
: tld_container(size) : tld_container(size)
, pool(std::make_unique<Arena>(10 << 20)) , pool(std::make_unique<Arena>(10 << 20))
@ -31,9 +31,7 @@ bool TLDList::has(const StringRef & host) const
return tld_container.has(host); return tld_container.has(host);
} }
///
/// TLDListsHolder /// TLDListsHolder
///
TLDListsHolder & TLDListsHolder::getInstance() TLDListsHolder & TLDListsHolder::getInstance()
{ {
static TLDListsHolder instance; static TLDListsHolder instance;
@ -62,24 +60,22 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s
std::unordered_set<std::string> tld_list_tmp; std::unordered_set<std::string> tld_list_tmp;
ReadBufferFromFile in(path); ReadBufferFromFile in(path);
String line;
while (!in.eof()) while (!in.eof())
{ {
char * newline = find_first_symbols<'\n'>(in.position(), in.buffer().end()); readEscapedStringUntilEOL(line, in);
if (newline >= in.buffer().end()) ++in.position();
break;
std::string_view line(in.position(), newline - in.position());
in.position() = newline + 1;
/// Skip comments /// Skip comments
if (line.size() > 2 && line[0] == '/' && line[1] == '/') if (line.size() > 2 && line[0] == '/' && line[1] == '/')
continue; continue;
trim(line); line = trim(line, [](char c) { return std::isspace(c); });
/// Skip empty line /// Skip empty line
if (line.empty()) if (line.empty())
continue; continue;
tld_list_tmp.emplace(line); tld_list_tmp.emplace(line);
} }
if (!in.eof())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not all list had been read", name);
TLDList tld_list(tld_list_tmp.size()); TLDList tld_list(tld_list_tmp.size());
for (const auto & host : tld_list_tmp) for (const auto & host : tld_list_tmp)