add support for .tar.zst and tar.xz

This commit is contained in:
Joshua Hildred 2024-02-21 15:35:30 -08:00
parent e6134bbb7b
commit ca9a6f809f
11 changed files with 153 additions and 96 deletions

View File

@ -157,7 +157,7 @@ if (TARGET ch_contrib::zlib)
endif()
if (TARGET ch_contrib::zstd)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1 HAVE_LIBZSTD_COMPRESSOR=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
endif()

View File

@ -187,7 +187,7 @@ To change the compression method, the correct file suffix should be appended to
BACKUP TABLE test.table TO Disk('backups', '1.tar.gz')
```
The supported compression file suffixes are `.gz` `.bz2` and `.lzma`.
The supported compression file suffixes are `tar.gz`, `.tgz` `tar.bz2`, `tar.lzma`, `.tar.zst`, `.tzst` and `.tar.xz`.
### Check the status of backups

View File

@ -25,7 +25,8 @@ void checkResultCodeImpl(int code, const String & filename)
{
if (code == ARCHIVE_OK)
return;
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack archive: LibArchive Code = {}, filename={}", code, quoteString(filename));
throw Exception(
ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack archive: LibArchive Code = {}, filename={}", code, quoteString(filename));
}
}
@ -168,11 +169,14 @@ void LibArchiveWriter::createArchive()
{
std::lock_guard lock{mutex};
archive = archive_write_new();
setFormatAndSettings(archive);
//this allows use to write directly to a writer buffer rather than an intermediate buffer in LibArchive
//archive_write_set_bytes_per_block(a, 0);
setFormatAndSettings();
if (stream_info)
{
//This allows use to write directly to a writebuffer rather than an intermediate buffer in libarchive.
//This has to be set otherwise zstd breaks due to extra bytes being written at the end of the archive.
archive_write_set_bytes_per_block(archive, 0);
archive_write_open2(archive, stream_info.get(), nullptr, &StreamInfo::memory_write, nullptr, nullptr);
}
else
archive_write_open_filename(archive, path_to_archive.c_str());
}

View File

@ -53,9 +53,9 @@ protected:
using Archive = struct archive *;
using Entry = struct archive_entry *;
/// derived classes must call createArcive. createArchive calls initArchive
/// derived classes must call createArchive. CreateArchive calls setFormatAndSettings.
void createArchive();
virtual void setFormatAndSettings(Archive) = 0;
virtual void setFormatAndSettings() = 0;
Archive archive = nullptr;
String path_to_archive;

View File

@ -6,6 +6,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_PACK_ARCHIVE;
}
void TarArchiveWriter::setCompression(const String & compression_method_, int compression_level_)
{
@ -16,21 +17,26 @@ void TarArchiveWriter::setCompression(const String & compression_method_, int co
ErrorCodes::NOT_IMPLEMENTED, "Using compression_method and compression_level options are not supported for tar archives");
}
void TarArchiveWriter::setFormatAndSettings(Archive archive_)
void TarArchiveWriter::setFormatAndSettings()
{
archive_write_set_format_pax_restricted(archive_);
archive_write_set_format_pax_restricted(archive);
inferCompressionFromPath();
}
void TarArchiveWriter::inferCompressionFromPath()
{
if (path_to_archive.ends_with(".gz"))
if (path_to_archive.ends_with(".tar.gz") || path_to_archive.ends_with(".tgz"))
archive_write_add_filter_gzip(archive);
else if (path_to_archive.ends_with(".bz2"))
else if (path_to_archive.ends_with(".tar.bz2"))
archive_write_add_filter_bzip2(archive);
else if (path_to_archive.ends_with(".lzma"))
else if (path_to_archive.ends_with(".tar.lzma"))
archive_write_add_filter_lzma(archive);
//else path ends in .tar and we dont do any compression
else if (path_to_archive.ends_with(".tar.zst") || path_to_archive.ends_with(".tzst"))
archive_write_add_filter_zstd(archive);
else if (path_to_archive.ends_with(".tar.xz"))
archive_write_add_filter_xz(archive);
else if (!path_to_archive.ends_with(".tar"))
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression format");
}
}
#endif

View File

@ -19,7 +19,7 @@ public:
}
void setCompression(const String & compression_method_, int compression_level_) override;
void setFormatAndSettings(Archive archive_) override;
void setFormatAndSettings() override;
void inferCompressionFromPath();
};
}

View File

@ -25,7 +25,8 @@ std::shared_ptr<IArchiveWriter>
createArchiveWriter(const String & path_to_archive, [[maybe_unused]] std::unique_ptr<WriteBuffer> archive_write_buffer)
{
using namespace std::literals;
static constexpr std::array tar_extensions{".tar"sv, ".tar.gz"sv, ".tar.bz2"sv, ".tar.lzma"sv};
static constexpr std::array tar_extensions{
".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.bz2"sv, ".tar.lzma"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv};
if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx"))
{
#if USE_MINIZIP

View File

@ -7,7 +7,8 @@ namespace DB
bool hasRegisteredArchiveFileExtension(const String & path)
{
return path.ends_with(".zip") || path.ends_with(".zipx") || path.ends_with(".tar") || path.ends_with(".tar.gz")
|| path.ends_with(".tar.bz2") || path.ends_with(".tar.lzma");
|| path.ends_with(".tar.bz2") || path.ends_with(".tar.lzma") || path.ends_with(".tar.zst") || path.ends_with(".tzst")
|| path.ends_with(".tgz") || path.ends_with(".tar.xz");
}
}

View File

@ -1,30 +1,29 @@
#include <gtest/gtest.h>
#include "config.h"
#include <filesystem>
#include <format>
#include <IO/Archives/ArchiveUtils.h>
#include <IO/Archives/IArchiveReader.h>
#include <IO/Archives/IArchiveWriter.h>
#include <IO/Archives/createArchiveReader.h>
#include <IO/Archives/createArchiveWriter.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Poco/TemporaryFile.h>
#include <filesystem>
#include <format>
#include <Common/Exception.h>
namespace DB::ErrorCodes
{
extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR;
extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR;
}
namespace fs = std::filesystem;
@ -53,7 +52,8 @@ bool createArchiveWithFiles(const std::string & archivename, const std::map<std:
archive_write_open_filename(a, archivename.c_str());
for (const auto & [filename, content] : files) {
for (const auto & [filename, content] : files)
{
entry = archive_entry_new();
archive_entry_set_pathname(entry, filename.c_str());
archive_entry_set_size(entry, content.size());
@ -63,12 +63,11 @@ bool createArchiveWithFiles(const std::string & archivename, const std::map<std:
archive_write_data(a, content.c_str(), content.size());
archive_entry_free(entry);
}
archive_write_close(a);
archive_write_free(a);
return true;
}
class ArchiveReaderAndWriterTest : public ::testing::TestWithParam<const char *>
@ -118,11 +117,13 @@ TEST_P(ArchiveReaderAndWriterTest, EmptyArchive)
EXPECT_FALSE(reader->fileExists("nofile.txt"));
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive",
[&]{ reader->getFileInfo("nofile.txt"); });
expectException(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive", [&] { reader->getFileInfo("nofile.txt"); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive",
[&]{ reader->readFile("nofile.txt", /*throw_on_not_found=*/true); });
expectException(
ErrorCodes::CANNOT_UNPACK_ARCHIVE,
"File 'nofile.txt' was not found in archive",
[&] { reader->readFile("nofile.txt", /*throw_on_not_found=*/true); });
EXPECT_EQ(reader->firstFile(), nullptr);
}
@ -186,11 +187,9 @@ TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive)
auto enumerator = reader->firstFile();
ASSERT_NE(enumerator, nullptr);
EXPECT_FALSE(enumerator->nextFile());
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file",
[&]{ enumerator->getFileName(); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", [&] { enumerator->getFileName(); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file",
[&] { reader->readFile(std::move(enumerator)); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", [&] { reader->readFile(std::move(enumerator)); });
}
}
@ -280,7 +279,7 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive)
enumerator = reader->nextFile(std::move(in));
EXPECT_EQ(enumerator, nullptr);
}
// Get all files one last time
files = reader->getAllFiles();
EXPECT_EQ(files.size(), 2);
@ -313,7 +312,8 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory)
ASSERT_FALSE(fs::exists(getPathToArchive()));
/// Read the archive.
auto read_archive_func = [&]() -> std::unique_ptr<SeekableReadBuffer> { return std::make_unique<ReadBufferFromString>(archive_in_memory); };
auto read_archive_func
= [&]() -> std::unique_ptr<SeekableReadBuffer> { return std::make_unique<ReadBufferFromString>(archive_in_memory); };
auto reader = createArchiveReader(getPathToArchive(), read_archive_func, archive_in_memory.size());
ASSERT_TRUE(reader->fileExists("a.txt"));
@ -355,15 +355,13 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesInMemory)
{
auto writer = createArchiveWriter(getPathToArchive(), std::make_unique<WriteBufferFromString>(archive_in_memory));
{
for(int i = 0; i < files; i++)
for (int i = 0; i < files; i++)
{
auto filename = std::format("{}.txt", i);
auto contents = std::format("The contents of {}.txt", i);
auto out = writer->writeFile(filename, times * contents.size());
for(int j = 0; j < times; j++)
{
for (int j = 0; j < times; j++)
writeString(contents, *out);
}
out->finalize();
}
}
@ -374,10 +372,11 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesInMemory)
ASSERT_FALSE(fs::exists(getPathToArchive()));
/// Read the archive.
auto read_archive_func = [&]() -> std::unique_ptr<SeekableReadBuffer> { return std::make_unique<ReadBufferFromString>(archive_in_memory); };
auto read_archive_func
= [&]() -> std::unique_ptr<SeekableReadBuffer> { return std::make_unique<ReadBufferFromString>(archive_in_memory); };
auto reader = createArchiveReader(getPathToArchive(), read_archive_func, archive_in_memory.size());
for(int i = 0; i < files; i++)
for (int i = 0; i < files; i++)
{
auto filename = std::format("{}.txt", i);
auto contents = std::format("The contents of {}.txt", i);
@ -386,28 +385,29 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesInMemory)
{
auto in = reader->readFile(filename, /*throw_on_not_found=*/true);
for(int j = 0; j < times; j++)
{
for (int j = 0; j < times; j++)
ASSERT_TRUE(checkString(String(contents), *in));
}
}
}
}
TEST_P(ArchiveReaderAndWriterTest, Password)
{
{
auto writer = createArchiveWriter(getPathToArchive());
//don't support passwords for tar archives
if(getPathToArchive().ends_with(".tar") || getPathToArchive().ends_with(".tar.gz") || getPathToArchive().ends_with(".tar.bz2") || getPathToArchive().ends_with(".tar.lzma"))
if (getPathToArchive().ends_with(".tar") || getPathToArchive().ends_with(".tar.gz") || getPathToArchive().ends_with(".tar.bz2")
|| getPathToArchive().ends_with(".tar.lzma") || getPathToArchive().ends_with(".tar.zst") || getPathToArchive().ends_with(".tar.xz"))
{
expectException(ErrorCodes::NOT_IMPLEMENTED, "Setting a password is not currently supported for libarchive",
[&]{ writer->setPassword("a.txt"); });
expectException(
ErrorCodes::NOT_IMPLEMENTED,
"Setting a password is not currently supported for libarchive",
[&] { writer->setPassword("a.txt"); });
writer->finalize();
}
else
{
/// Make an archive.
std::string_view contents = "The contents of a.txt";
/// Make an archive.
std::string_view contents = "The contents of a.txt";
{
writer->setPassword("Qwe123");
{
@ -422,14 +422,14 @@ TEST_P(ArchiveReaderAndWriterTest, Password)
auto reader = createArchiveReader(getPathToArchive());
/// Try to read without a password.
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required",
[&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); });
expectException(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required", [&] { reader->readFile("a.txt", /*throw_on_not_found=*/true); });
{
/// Try to read with a wrong password.
reader->setPassword("123Qwe");
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password",
[&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); });
expectException(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password", [&] { reader->readFile("a.txt", /*throw_on_not_found=*/true); });
}
{
@ -446,8 +446,7 @@ TEST_P(ArchiveReaderAndWriterTest, Password)
TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist)
{
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open",
[&]{ createArchiveReader(getPathToArchive()); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open", [&] { createArchiveReader(getPathToArchive()); });
}
@ -459,15 +458,13 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesOnDisk)
{
auto writer = createArchiveWriter(getPathToArchive());
{
for(int i = 0; i < files; i++)
for (int i = 0; i < files; i++)
{
auto filename = std::format("{}.txt", i);
auto contents = std::format("The contents of {}.txt", i);
auto out = writer->writeFile(filename, times * contents.size());
for(int j = 0; j < times; j++)
{
for (int j = 0; j < times; j++)
writeString(contents, *out);
}
out->finalize();
}
}
@ -480,7 +477,7 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesOnDisk)
/// Read the archive.
auto reader = createArchiveReader(getPathToArchive());
for(int i = 0; i < files; i++)
for (int i = 0; i < files; i++)
{
auto filename = std::format("{}.txt", i);
auto contents = std::format("The contents of {}.txt", i);
@ -489,10 +486,8 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesOnDisk)
{
auto in = reader->readFile(filename, /*throw_on_not_found=*/true);
for(int j = 0; j < times; j++)
{
for (int j = 0; j < times; j++)
ASSERT_TRUE(checkString(String(contents), *in));
}
}
}
}
@ -506,10 +501,8 @@ TEST_P(ArchiveReaderAndWriterTest, LargeFile)
auto writer = createArchiveWriter(getPathToArchive());
{
auto out = writer->writeFile("a.txt", times * contents.size());
for(int i = 0; i < times; i++)
{
for (int i = 0; i < times; i++)
writeString(contents, *out);
}
out->finalize();
}
writer->finalize();
@ -526,10 +519,8 @@ TEST_P(ArchiveReaderAndWriterTest, LargeFile)
{
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
for(int i = 0; i < times; i++)
{
for (int i = 0; i < times; i++)
ASSERT_TRUE(checkString(String(contents), *in));
}
}
{
@ -543,7 +534,8 @@ TEST_P(ArchiveReaderAndWriterTest, LargeFile)
}
}
TEST(TarArchiveReaderTest, FileExists) {
TEST(TarArchiveReaderTest, FileExists)
{
String archive_path = "archive.tar";
String filename = "file.txt";
String contents = "test";
@ -554,7 +546,8 @@ TEST(TarArchiveReaderTest, FileExists) {
fs::remove(archive_path);
}
TEST(TarArchiveReaderTest, ReadFile) {
TEST(TarArchiveReaderTest, ReadFile)
{
String archive_path = "archive.tar";
String filename = "file.txt";
String contents = "test";
@ -568,7 +561,8 @@ TEST(TarArchiveReaderTest, ReadFile) {
fs::remove(archive_path);
}
TEST(TarArchiveReaderTest, ReadTwoFiles) {
TEST(TarArchiveReaderTest, ReadTwoFiles)
{
String archive_path = "archive.tar";
String file1 = "file1.txt";
String contents1 = "test1";
@ -584,14 +578,15 @@ TEST(TarArchiveReaderTest, ReadTwoFiles) {
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents1);
in = reader->readFile(file2, /*throw_on_not_found=*/true);
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents2);
fs::remove(archive_path);
}
TEST(TarArchiveReaderTest, CheckFileInfo) {
TEST(TarArchiveReaderTest, CheckFileInfo)
{
String archive_path = "archive.tar";
String filename = "file.txt";
String contents = "test";
@ -604,7 +599,8 @@ TEST(TarArchiveReaderTest, CheckFileInfo) {
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, FileExists) {
TEST(SevenZipArchiveReaderTest, FileExists)
{
String archive_path = "archive.7z";
String filename = "file.txt";
String contents = "test";
@ -615,7 +611,8 @@ TEST(SevenZipArchiveReaderTest, FileExists) {
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, ReadFile) {
TEST(SevenZipArchiveReaderTest, ReadFile)
{
String archive_path = "archive.7z";
String filename = "file.txt";
String contents = "test";
@ -629,7 +626,8 @@ TEST(SevenZipArchiveReaderTest, ReadFile) {
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, CheckFileInfo) {
TEST(SevenZipArchiveReaderTest, CheckFileInfo)
{
String archive_path = "archive.7z";
String filename = "file.txt";
String contents = "test";
@ -642,7 +640,8 @@ TEST(SevenZipArchiveReaderTest, CheckFileInfo) {
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, ReadTwoFiles) {
TEST(SevenZipArchiveReaderTest, ReadTwoFiles)
{
String archive_path = "archive.7z";
String file1 = "file1.txt";
String contents1 = "test1";
@ -658,28 +657,28 @@ TEST(SevenZipArchiveReaderTest, ReadTwoFiles) {
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents1);
in = reader->readFile(file2, /*throw_on_not_found=*/true);
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents2);
fs::remove(archive_path);
}
namespace
{
const char * supported_archive_file_exts[] =
{
#if USE_MINIZIP
".zip",
#endif
#if USE_LIBARCHIVE
".tar",
".tar.gz",
".tar.bz2",
".tar.lzma",
#endif
};
const char * supported_archive_file_exts[] = {
#if USE_MINIZIP
".zip",
#endif
#if USE_LIBARCHIVE
".tar",
".tar.gz",
".tar.bz2",
".tar.lzma",
".tar.zst",
".tar.xz",
#endif
};
}
INSTANTIATE_TEST_SUITE_P(All, ArchiveReaderAndWriterTest, ::testing::ValuesIn(supported_archive_file_exts));

View File

@ -655,6 +655,38 @@ def test_tar_lzma_archive():
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
def test_tar_zst_archive():
backup_name = f"Disk('backups', 'archive.tar.zst')"
create_and_fill_table()
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert os.path.isfile(get_path_to_backup(backup_name))
instance.query("DROP TABLE test.table")
assert instance.query("EXISTS test.table") == "0\n"
instance.query(f"RESTORE TABLE test.table FROM {backup_name}")
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
def test_tar_xz_archive():
backup_name = f"Disk('backups', 'archive.tar.xz')"
create_and_fill_table()
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
instance.query(f"BACKUP TABLE test.table TO {backup_name}")
assert os.path.isfile(get_path_to_backup(backup_name))
instance.query("DROP TABLE test.table")
assert instance.query("EXISTS test.table") == "0\n"
instance.query(f"RESTORE TABLE test.table FROM {backup_name}")
assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n"
def test_tar_archive_with_password():
backup_name = f"Disk('backups', 'archive_with_password.tar')"
create_and_fill_table()

View File

@ -482,6 +482,20 @@ def test_backup_to_tar_lzma():
check_backup_and_restore(storage_policy, backup_destination)
def test_backup_to_tar_zst():
storage_policy = "default"
backup_name = new_backup_name()
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.zst', 'minio', 'minio123')"
check_backup_and_restore(storage_policy, backup_destination)
def test_backup_to_tar_xz():
storage_policy = "default"
backup_name = new_backup_name()
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.xz', 'minio', 'minio123')"
check_backup_and_restore(storage_policy, backup_destination)
def test_user_specific_auth(start_cluster):
def create_user(user):
node.query(f"CREATE USER {user}")