From 6064f83aca7354202d29fbfdcb750bcd7631d9d7 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 24 Nov 2022 14:10:59 +0100 Subject: [PATCH 1/2] Use XMLDocument instead of XMLConfiguration for faster loading --- src/Backups/BackupImpl.cpp | 221 +++++++++++++++++++++++++++++++++---- 1 file changed, 197 insertions(+), 24 deletions(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 7ae3d80b118..aa4a0e433c3 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace DB @@ -352,8 +353,181 @@ void BackupImpl::writeBackupMetadata() increaseUncompressedSize(str.size()); } +/// TODO: move this to some common place instead of copy-pasting from ConfigProcessor? +namespace XMLHelpers +{ + +using namespace Poco; +using namespace Poco::XML; + +using XMLDocumentPtr = AutoPtr; + +Node * getRootNode(Document * document) +{ + for (Node * child = document->firstChild(); child; child = child->nextSibling()) + { + /// Besides the root element there can be comment nodes on the top level. + /// Skip them. + if (child->nodeType() == Node::ELEMENT_NODE) + return child; + } + + throw Poco::Exception("No root node in document"); +} + +int parseInt(const std::string& value) +{ + if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) + return static_cast(NumberParser::parseHex(value)); + else + return NumberParser::parse(value); +} + +/* +unsigned parseUInt(const std::string& value) +{ + if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) + return NumberParser::parseHex(value); + else + return NumberParser::parseUnsigned(value); +} + + +Int64 parseInt64(const std::string& value) +{ + if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) + return static_cast(NumberParser::parseHex64(value)); + else + return NumberParser::parse64(value); +} +*/ + +UInt64 parseUInt64(const std::string& value) +{ + if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) + return NumberParser::parseHex64(value); + else + return NumberParser::parseUnsigned64(value); +} + + +bool parseBool(const std::string& value) +{ + int n; + if (NumberParser::tryParse(value, n)) + return n != 0; + else if (icompare(value, "true") == 0) + return true; + else if (icompare(value, "yes") == 0) + return true; + else if (icompare(value, "on") == 0) + return true; + else if (icompare(value, "false") == 0) + return false; + else if (icompare(value, "no") == 0) + return false; + else if (icompare(value, "off") == 0) + return false; + else + throw SyntaxException("Cannot convert to boolean", value); +} + +/* +const Node * getValueNode(const Node * node, const std::string & path) +{ + const auto * path_node = node->getNodeByPath(path); + if (!path_node) + return nullptr; + for (const auto * child = path_node->firstChild(); child; child = child->nextSibling()) + { + if (child->) + } +} +*/ +std::string getString(const Node * node, const std::string & path, std::optional default_value = std::nullopt) +{ + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return value_node->innerText(); +} +/* +Int64 getInt64(const Node * node, const std::string & path, std::optional default_value = std::nullopt) +{ + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return parseInt64(value_node->nodeValue()); +} +*/ +UInt64 getUInt64(const Node * node, const std::string & path, std::optional default_value = std::nullopt) +{ + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return parseUInt64(value_node->innerText()); +} + +int getInt(const Node * node, const std::string & path, std::optional default_value = std::nullopt) +{ + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return parseInt(value_node->innerText()); +} + +/* +unsigned getUInt(const Node * node, const std::string & path, std::optional default_value = std::nullopt) +{ + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return parseUInt(value_node->nodeValue()); +} +*/ +bool getBool(const Node * node, const std::string & path, std::optional default_value = std::nullopt) +{ + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return parseBool(value_node->innerText()); +} +} + void BackupImpl::readBackupMetadata() { + using namespace XMLHelpers; + std::unique_ptr in; if (use_archives) { @@ -372,40 +546,39 @@ void BackupImpl::readBackupMetadata() String str; readStringUntilEOF(str, *in); increaseUncompressedSize(str.size()); - std::istringstream stream(str); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; - config->load(stream); + Poco::XML::DOMParser dom_parser; + XMLDocumentPtr config = dom_parser.parseMemory(str.data(), str.size()); + const Poco::XML::Node * config_root = getRootNode(config); - version = config->getInt("version"); + version = getInt(config_root, "version"); if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION)) throw Exception( ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name_for_logging, version); - timestamp = parse(config->getString("timestamp")).to_time_t(); - uuid = parse(config->getString("uuid")); + timestamp = parse<::LocalDateTime>(getString(config_root, "timestamp")).to_time_t(); + uuid = parse(getString(config_root, "uuid")); - if (config->has("base_backup") && !base_backup_info) - base_backup_info = BackupInfo::fromString(config->getString("base_backup")); + if (config_root->getNodeByPath("base_backup") && !base_backup_info) + base_backup_info = BackupInfo::fromString(getString(config_root, "base_backup")); - if (config->has("base_backup_uuid")) - base_backup_uuid = parse(config->getString("base_backup_uuid")); + if (config_root->getNodeByPath("base_backup_uuid")) + base_backup_uuid = parse(getString(config_root, "base_backup_uuid")); - Poco::Util::AbstractConfiguration::Keys keys; - config->keys("contents", keys); - for (const auto & key : keys) + const auto * contents = config_root->getNodeByPath("contents"); + for (const Poco::XML::Node * child = contents->firstChild(); child; child = child->nextSibling()) { - if ((key == "file") || key.starts_with("file[")) + if (child->nodeName() == "file") { - String prefix = "contents." + key + "."; + const Poco::XML::Node * file_config = child; FileInfo info; - info.file_name = config->getString(prefix + "name"); - info.size = config->getUInt64(prefix + "size"); + info.file_name = getString(file_config, "name"); + info.size = getUInt64(file_config, "size"); if (info.size) { - info.checksum = unhexChecksum(config->getString(prefix + "checksum")); + info.checksum = unhexChecksum(getString(file_config, "checksum")); - bool use_base = config->getBool(prefix + "use_base", false); - info.base_size = config->getUInt64(prefix + "base_size", use_base ? info.size : 0); + bool use_base = getBool(file_config, "use_base", false); + info.base_size = getUInt64(file_config, "base_size", use_base ? info.size : 0); if (info.base_size) use_base = true; @@ -423,14 +596,14 @@ void BackupImpl::readBackupMetadata() if (info.base_size == info.size) info.base_checksum = info.checksum; else - info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum")); + info.base_checksum = unhexChecksum(getString(file_config, "base_checksum")); } if (info.size > info.base_size) { - info.data_file_name = config->getString(prefix + "data_file", info.file_name); - info.archive_suffix = config->getString(prefix + "archive_suffix", ""); - info.pos_in_archive = config->getUInt64(prefix + "pos_in_archive", static_cast(-1)); + info.data_file_name = getString(file_config, "data_file", info.file_name); + info.archive_suffix = getString(file_config, "archive_suffix", ""); + info.pos_in_archive = getUInt64(file_config, "pos_in_archive", static_cast(-1)); } } From 8f49c1ea1680e6c5db7e464da3d4e626acb840c4 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 28 Nov 2022 18:14:01 +0100 Subject: [PATCH 2/2] Moved helpers to Common/XMLUtils.* --- src/Backups/BackupImpl.cpp | 175 +------------------------- src/Common/Config/ConfigProcessor.cpp | 13 +- src/Common/XMLUtils.cpp | 92 ++++++++++++++ src/Common/XMLUtils.h | 23 ++++ 4 files changed, 120 insertions(+), 183 deletions(-) create mode 100644 src/Common/XMLUtils.cpp create mode 100644 src/Common/XMLUtils.h diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index aa4a0e433c3..ddc3f431c6f 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -353,180 +354,10 @@ void BackupImpl::writeBackupMetadata() increaseUncompressedSize(str.size()); } -/// TODO: move this to some common place instead of copy-pasting from ConfigProcessor? -namespace XMLHelpers -{ - -using namespace Poco; -using namespace Poco::XML; - -using XMLDocumentPtr = AutoPtr; - -Node * getRootNode(Document * document) -{ - for (Node * child = document->firstChild(); child; child = child->nextSibling()) - { - /// Besides the root element there can be comment nodes on the top level. - /// Skip them. - if (child->nodeType() == Node::ELEMENT_NODE) - return child; - } - - throw Poco::Exception("No root node in document"); -} - -int parseInt(const std::string& value) -{ - if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) - return static_cast(NumberParser::parseHex(value)); - else - return NumberParser::parse(value); -} - -/* -unsigned parseUInt(const std::string& value) -{ - if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) - return NumberParser::parseHex(value); - else - return NumberParser::parseUnsigned(value); -} - - -Int64 parseInt64(const std::string& value) -{ - if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) - return static_cast(NumberParser::parseHex64(value)); - else - return NumberParser::parse64(value); -} -*/ - -UInt64 parseUInt64(const std::string& value) -{ - if ((value.compare(0, 2, "0x") == 0) || (value.compare(0, 2, "0X") == 0)) - return NumberParser::parseHex64(value); - else - return NumberParser::parseUnsigned64(value); -} - - -bool parseBool(const std::string& value) -{ - int n; - if (NumberParser::tryParse(value, n)) - return n != 0; - else if (icompare(value, "true") == 0) - return true; - else if (icompare(value, "yes") == 0) - return true; - else if (icompare(value, "on") == 0) - return true; - else if (icompare(value, "false") == 0) - return false; - else if (icompare(value, "no") == 0) - return false; - else if (icompare(value, "off") == 0) - return false; - else - throw SyntaxException("Cannot convert to boolean", value); -} - -/* -const Node * getValueNode(const Node * node, const std::string & path) -{ - const auto * path_node = node->getNodeByPath(path); - if (!path_node) - return nullptr; - for (const auto * child = path_node->firstChild(); child; child = child->nextSibling()) - { - if (child->) - } -} -*/ -std::string getString(const Node * node, const std::string & path, std::optional default_value = std::nullopt) -{ - const auto * value_node = node->getNodeByPath(path); - if (!value_node) - { - if (default_value) - return *default_value; - else - throw Poco::NotFoundException(path); - } - return value_node->innerText(); -} -/* -Int64 getInt64(const Node * node, const std::string & path, std::optional default_value = std::nullopt) -{ - const auto * value_node = node->getNodeByPath(path); - if (!value_node) - { - if (default_value) - return *default_value; - else - throw Poco::NotFoundException(path); - } - return parseInt64(value_node->nodeValue()); -} -*/ -UInt64 getUInt64(const Node * node, const std::string & path, std::optional default_value = std::nullopt) -{ - const auto * value_node = node->getNodeByPath(path); - if (!value_node) - { - if (default_value) - return *default_value; - else - throw Poco::NotFoundException(path); - } - return parseUInt64(value_node->innerText()); -} - -int getInt(const Node * node, const std::string & path, std::optional default_value = std::nullopt) -{ - const auto * value_node = node->getNodeByPath(path); - if (!value_node) - { - if (default_value) - return *default_value; - else - throw Poco::NotFoundException(path); - } - return parseInt(value_node->innerText()); -} - -/* -unsigned getUInt(const Node * node, const std::string & path, std::optional default_value = std::nullopt) -{ - const auto * value_node = node->getNodeByPath(path); - if (!value_node) - { - if (default_value) - return *default_value; - else - throw Poco::NotFoundException(path); - } - return parseUInt(value_node->nodeValue()); -} -*/ -bool getBool(const Node * node, const std::string & path, std::optional default_value = std::nullopt) -{ - const auto * value_node = node->getNodeByPath(path); - if (!value_node) - { - if (default_value) - return *default_value; - else - throw Poco::NotFoundException(path); - } - return parseBool(value_node->innerText()); -} -} void BackupImpl::readBackupMetadata() { - using namespace XMLHelpers; + using namespace XMLUtils; std::unique_ptr in; if (use_archives) @@ -547,7 +378,7 @@ void BackupImpl::readBackupMetadata() readStringUntilEOF(str, *in); increaseUncompressedSize(str.size()); Poco::XML::DOMParser dom_parser; - XMLDocumentPtr config = dom_parser.parseMemory(str.data(), str.size()); + Poco::AutoPtr config = dom_parser.parseMemory(str.data(), str.size()); const Poco::XML::Node * config_root = getRootNode(config); version = getInt(config_root, "version"); diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 41535fad8f7..88c7f984de0 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -122,17 +123,7 @@ static ElementIdentifier getElementIdentifier(Node * element) static Node * getRootNode(Document * document) { - const NodeListPtr children = document->childNodes(); - for (size_t i = 0, size = children->length(); i < size; ++i) - { - Node * child = children->item(i); - /// Besides the root element there can be comment nodes on the top level. - /// Skip them. - if (child->nodeType() == Node::ELEMENT_NODE) - return child; - } - - throw Poco::Exception("No root node in document"); + return XMLUtils::getRootNode(document); } static bool allWhitespace(const std::string & s) diff --git a/src/Common/XMLUtils.cpp b/src/Common/XMLUtils.cpp new file mode 100644 index 00000000000..3d15400461e --- /dev/null +++ b/src/Common/XMLUtils.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +namespace DB::XMLUtils +{ + +using namespace Poco; +using namespace Poco::XML; + +using XMLDocumentPtr = Poco::AutoPtr; + +Node * getRootNode(Document * document) +{ + for (Node * child = document->firstChild(); child; child = child->nextSibling()) + { + /// Besides the root element there can be comment nodes on the top level. + /// Skip them. + if (child->nodeType() == Node::ELEMENT_NODE) + return child; + } + + throw Poco::Exception("No root node in document"); +} + + +/// This class is used to access protected parseXXX static methods from AbstractConfiguration +class ParseHelper : private Util::AbstractConfiguration +{ +public: + ParseHelper() = delete; + + using Util::AbstractConfiguration::parseInt; + using Util::AbstractConfiguration::parseUInt; + using Util::AbstractConfiguration::parseInt64; + using Util::AbstractConfiguration::parseUInt64; + using Util::AbstractConfiguration::parseBool; + + static std::string parseString(const std::string & s) + { + return s; + } + + template + static ValueType getValue(const Node * node, const std::string & path, + const std::optional & default_value, const ParseFunction & parse_function) + { + const auto * value_node = node->getNodeByPath(path); + if (!value_node) + { + if (default_value) + return *default_value; + else + throw Poco::NotFoundException(path); + } + return parse_function(value_node->innerText()); + } +}; + + +std::string getString(const Node * node, const std::string & path, const std::optional & default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseString); +} + +Int64 getInt64(const Node * node, const std::string & path, const std::optional & default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseInt64); +} + +UInt64 getUInt64(const Node * node, const std::string & path, const std::optional & default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseUInt64); +} + +int getInt(const Node * node, const std::string & path, const std::optional & default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseInt); +} + +unsigned getUInt(const Node * node, const std::string & path, const std::optional & default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseUInt); +} + +bool getBool(const Node * node, const std::string & path, const std::optional & default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseBool); +} + +} diff --git a/src/Common/XMLUtils.h b/src/Common/XMLUtils.h new file mode 100644 index 00000000000..24efc691704 --- /dev/null +++ b/src/Common/XMLUtils.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB:: XMLUtils +{ +Poco::XML::Node * getRootNode(Poco::XML::Document * document); + +std::string getString(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); + +Int64 getInt64(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); + +UInt64 getUInt64(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); + +int getInt(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); + +unsigned getUInt(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); + +bool getBool(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); +}