Merge pull request #43627 from ClickHouse/faster_backup_config_loading

Speedup backup config loading
This commit is contained in:
Vitaly Baranov 2022-11-30 16:33:54 +01:00 committed by GitHub
commit f966bf61e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 145 additions and 34 deletions

View File

@ -8,6 +8,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <Common/quoteString.h>
#include <Common/XMLUtils.h>
#include <Interpreters/Context.h>
#include <IO/Archives/IArchiveReader.h>
#include <IO/Archives/IArchiveWriter.h>
@ -22,6 +23,7 @@
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h>
namespace DB
@ -352,8 +354,11 @@ void BackupImpl::writeBackupMetadata()
increaseUncompressedSize(str.size());
}
void BackupImpl::readBackupMetadata()
{
using namespace XMLUtils;
std::unique_ptr<ReadBuffer> in;
if (use_archives)
{
@ -372,40 +377,39 @@ void BackupImpl::readBackupMetadata()
String str;
readStringUntilEOF(str, *in);
increaseUncompressedSize(str.size());
std::istringstream stream(str); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->load(stream);
Poco::XML::DOMParser dom_parser;
Poco::AutoPtr<Poco::XML::Document> config = dom_parser.parseMemory(str.data(), str.size());
const Poco::XML::Node * config_root = getRootNode(config);
version = config->getInt("version");
version = getInt(config_root, "version");
if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION))
throw Exception(
ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name_for_logging, version);
timestamp = parse<LocalDateTime>(config->getString("timestamp")).to_time_t();
uuid = parse<UUID>(config->getString("uuid"));
timestamp = parse<::LocalDateTime>(getString(config_root, "timestamp")).to_time_t();
uuid = parse<UUID>(getString(config_root, "uuid"));
if (config->has("base_backup") && !base_backup_info)
base_backup_info = BackupInfo::fromString(config->getString("base_backup"));
if (config_root->getNodeByPath("base_backup") && !base_backup_info)
base_backup_info = BackupInfo::fromString(getString(config_root, "base_backup"));
if (config->has("base_backup_uuid"))
base_backup_uuid = parse<UUID>(config->getString("base_backup_uuid"));
if (config_root->getNodeByPath("base_backup_uuid"))
base_backup_uuid = parse<UUID>(getString(config_root, "base_backup_uuid"));
Poco::Util::AbstractConfiguration::Keys keys;
config->keys("contents", keys);
for (const auto & key : keys)
const auto * contents = config_root->getNodeByPath("contents");
for (const Poco::XML::Node * child = contents->firstChild(); child; child = child->nextSibling())
{
if ((key == "file") || key.starts_with("file["))
if (child->nodeName() == "file")
{
String prefix = "contents." + key + ".";
const Poco::XML::Node * file_config = child;
FileInfo info;
info.file_name = config->getString(prefix + "name");
info.size = config->getUInt64(prefix + "size");
info.file_name = getString(file_config, "name");
info.size = getUInt64(file_config, "size");
if (info.size)
{
info.checksum = unhexChecksum(config->getString(prefix + "checksum"));
info.checksum = unhexChecksum(getString(file_config, "checksum"));
bool use_base = config->getBool(prefix + "use_base", false);
info.base_size = config->getUInt64(prefix + "base_size", use_base ? info.size : 0);
bool use_base = getBool(file_config, "use_base", false);
info.base_size = getUInt64(file_config, "base_size", use_base ? info.size : 0);
if (info.base_size)
use_base = true;
@ -423,14 +427,14 @@ void BackupImpl::readBackupMetadata()
if (info.base_size == info.size)
info.base_checksum = info.checksum;
else
info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum"));
info.base_checksum = unhexChecksum(getString(file_config, "base_checksum"));
}
if (info.size > info.base_size)
{
info.data_file_name = config->getString(prefix + "data_file", info.file_name);
info.archive_suffix = config->getString(prefix + "archive_suffix", "");
info.pos_in_archive = config->getUInt64(prefix + "pos_in_archive", static_cast<UInt64>(-1));
info.data_file_name = getString(file_config, "data_file", info.file_name);
info.archive_suffix = getString(file_config, "archive_suffix", "");
info.pos_in_archive = getUInt64(file_config, "pos_in_archive", static_cast<UInt64>(-1));
}
}

View File

@ -20,6 +20,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <Common/getResource.h>
#include <Common/XMLUtils.h>
#include <base/errnoToString.h>
#include <base/sort.h>
#include <IO/WriteBufferFromString.h>
@ -121,16 +122,7 @@ static ElementIdentifier getElementIdentifier(Node * element)
static Node * getRootNode(Document * document)
{
const NodeListPtr children = document->childNodes();
for (Node * child = children->item(0); child; child = child->nextSibling())
{
/// Besides the root element there can be comment nodes on the top level.
/// Skip them.
if (child->nodeType() == Node::ELEMENT_NODE)
return child;
}
throw Poco::Exception("No root node in document");
return XMLUtils::getRootNode(document);
}
static bool allWhitespace(const std::string & s)

92
src/Common/XMLUtils.cpp Normal file
View File

@ -0,0 +1,92 @@
#include <string>
#include <Common/XMLUtils.h>
#include <Poco/DOM/Document.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB::XMLUtils
{
using namespace Poco;
using namespace Poco::XML;
using XMLDocumentPtr = Poco::AutoPtr<Document>;
Node * getRootNode(Document * document)
{
for (Node * child = document->firstChild(); child; child = child->nextSibling())
{
/// Besides the root element there can be comment nodes on the top level.
/// Skip them.
if (child->nodeType() == Node::ELEMENT_NODE)
return child;
}
throw Poco::Exception("No root node in document");
}
/// This class is used to access protected parseXXX static methods from AbstractConfiguration
class ParseHelper : private Util::AbstractConfiguration
{
public:
ParseHelper() = delete;
using Util::AbstractConfiguration::parseInt;
using Util::AbstractConfiguration::parseUInt;
using Util::AbstractConfiguration::parseInt64;
using Util::AbstractConfiguration::parseUInt64;
using Util::AbstractConfiguration::parseBool;
static std::string parseString(const std::string & s)
{
return s;
}
template <typename ValueType, typename ParseFunction>
static ValueType getValue(const Node * node, const std::string & path,
const std::optional<ValueType> & default_value, const ParseFunction & parse_function)
{
const auto * value_node = node->getNodeByPath(path);
if (!value_node)
{
if (default_value)
return *default_value;
else
throw Poco::NotFoundException(path);
}
return parse_function(value_node->innerText());
}
};
std::string getString(const Node * node, const std::string & path, const std::optional<std::string> & default_value)
{
return ParseHelper::getValue<std::string>(node, path, default_value, ParseHelper::parseString);
}
Int64 getInt64(const Node * node, const std::string & path, const std::optional<Int64> & default_value)
{
return ParseHelper::getValue<Int64>(node, path, default_value, ParseHelper::parseInt64);
}
UInt64 getUInt64(const Node * node, const std::string & path, const std::optional<UInt64> & default_value)
{
return ParseHelper::getValue<UInt64>(node, path, default_value, ParseHelper::parseUInt64);
}
int getInt(const Node * node, const std::string & path, const std::optional<int> & default_value)
{
return ParseHelper::getValue<int>(node, path, default_value, ParseHelper::parseInt);
}
unsigned getUInt(const Node * node, const std::string & path, const std::optional<unsigned> & default_value)
{
return ParseHelper::getValue<unsigned>(node, path, default_value, ParseHelper::parseUInt);
}
bool getBool(const Node * node, const std::string & path, const std::optional<bool> & default_value)
{
return ParseHelper::getValue<bool>(node, path, default_value, ParseHelper::parseBool);
}
}

23
src/Common/XMLUtils.h Normal file
View File

@ -0,0 +1,23 @@
#pragma once
#include <Poco/DOM/DOMParser.h>
#include <Poco/DOM/Node.h>
#include <Poco/AutoPtr.h>
#include <base/types.h>
namespace DB:: XMLUtils
{
Poco::XML::Node * getRootNode(Poco::XML::Document * document);
std::string getString(const Poco::XML::Node * node, const std::string & path, const std::optional<std::string> & default_value = std::nullopt);
Int64 getInt64(const Poco::XML::Node * node, const std::string & path, const std::optional<Int64> & default_value = std::nullopt);
UInt64 getUInt64(const Poco::XML::Node * node, const std::string & path, const std::optional<UInt64> & default_value = std::nullopt);
int getInt(const Poco::XML::Node * node, const std::string & path, const std::optional<int> & default_value = std::nullopt);
unsigned getUInt(const Poco::XML::Node * node, const std::string & path, const std::optional<unsigned> & default_value = std::nullopt);
bool getBool(const Poco::XML::Node * node, const std::string & path, const std::optional<bool> & default_value = std::nullopt);
}