Merge pull request #42146 from azat/backups/metadata-overflow-fix

Fix reusing of files > 4GB from base backup
This commit is contained in:
Vitaly Baranov 2022-10-08 00:22:28 +02:00 committed by GitHub
commit 69ebf12dab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 10 deletions

View File

@ -43,8 +43,8 @@ namespace ErrorCodes
namespace
{
const UInt64 INITIAL_BACKUP_VERSION = 1;
const UInt64 CURRENT_BACKUP_VERSION = 1;
const int INITIAL_BACKUP_VERSION = 1;
const int CURRENT_BACKUP_VERSION = 1;
using SizeAndChecksum = IBackup::SizeAndChecksum;
using FileInfo = IBackupCoordination::FileInfo;
@ -275,7 +275,7 @@ void BackupImpl::writeBackupMetadata()
assert(!is_internal_backup);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->setUInt("version", CURRENT_BACKUP_VERSION);
config->setInt("version", CURRENT_BACKUP_VERSION);
config->setString("timestamp", toString(LocalDateTime{timestamp}));
config->setString("uuid", toString(*uuid));
@ -302,7 +302,7 @@ void BackupImpl::writeBackupMetadata()
{
String prefix = index ? "contents.file[" + std::to_string(index) + "]." : "contents.file.";
config->setString(prefix + "name", info.file_name);
config->setUInt(prefix + "size", info.size);
config->setUInt64(prefix + "size", info.size);
if (info.size)
{
config->setString(prefix + "checksum", hexChecksum(info.checksum));
@ -311,7 +311,7 @@ void BackupImpl::writeBackupMetadata()
config->setBool(prefix + "use_base", true);
if (info.base_size != info.size)
{
config->setUInt(prefix + "base_size", info.base_size);
config->setUInt64(prefix + "base_size", info.base_size);
config->setString(prefix + "base_checksum", hexChecksum(info.base_checksum));
}
}
@ -367,7 +367,7 @@ void BackupImpl::readBackupMetadata()
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->load(stream);
version = config->getUInt("version");
version = config->getInt("version");
if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION))
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name, version);
@ -389,13 +389,13 @@ void BackupImpl::readBackupMetadata()
String prefix = "contents." + key + ".";
FileInfo info;
info.file_name = config->getString(prefix + "name");
info.size = config->getUInt(prefix + "size");
info.size = config->getUInt64(prefix + "size");
if (info.size)
{
info.checksum = unhexChecksum(config->getString(prefix + "checksum"));
bool use_base = config->getBool(prefix + "use_base", false);
info.base_size = config->getUInt(prefix + "base_size", use_base ? info.size : 0);
info.base_size = config->getUInt64(prefix + "base_size", use_base ? info.size : 0);
if (info.base_size)
use_base = true;
@ -691,7 +691,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
std::string from_file_name = "memory buffer";
if (auto fname = entry->getFilePath(); !fname.empty())
from_file_name = "file " + fname;
LOG_TRACE(log, "Writing backup for file {} from file {}", file_name, from_file_name);
LOG_TRACE(log, "Writing backup for file {} from {}", file_name, from_file_name);
auto adjusted_path = removeLeadingSlash(file_name);
if (coordination->getFileInfo(adjusted_path))

View File

@ -122,7 +122,7 @@ private:
size_t num_files = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
UInt64 version;
int version;
std::optional<BackupInfo> base_backup_info;
std::shared_ptr<const IBackup> base_backup;
std::optional<UUID> base_backup_uuid;

View File

@ -191,6 +191,42 @@ def test_incremental_backup():
assert instance.query("SELECT count(), sum(x) FROM test.table2") == "102\t5081\n"
def test_incremental_backup_overflow():
backup_name = new_backup_name()
incremental_backup_name = new_backup_name()
instance.query("CREATE DATABASE test")
instance.query(
"CREATE TABLE test.table(y String CODEC(NONE)) ENGINE=MergeTree ORDER BY tuple()"
)
# Create a column of 4GB+10K
instance.query(
"INSERT INTO test.table SELECT toString(repeat('A', 1024)) FROM numbers((4*1024*1024)+10)"
)
# Force one part
instance.query("OPTIMIZE TABLE test.table FINAL")
# ensure that the column's size on disk is indeed greater then 4GB
assert (
int(
instance.query(
"SELECT bytes_on_disk FROM system.parts_columns WHERE active AND database = 'test' AND table = 'table' AND column = 'y'"
)
)
> 4 * 1024 * 1024 * 1024
)
instance.query(f"BACKUP TABLE test.table TO {backup_name}")
instance.query(
f"BACKUP TABLE test.table TO {incremental_backup_name} SETTINGS base_backup = {backup_name}"
)
# And now check that incremental backup does not have any files
assert os.listdir(os.path.join(get_path_to_backup(incremental_backup_name))) == [
".backup"
]
def test_incremental_backup_after_renaming_table():
backup_name = new_backup_name()
incremental_backup_name = new_backup_name()