Improve zip archive utilities.

This commit is contained in:
Vitaly Baranov 2022-02-22 07:59:41 +03:00 committed by Vitaly Baranov
parent e72a343994
commit a1b364626a
8 changed files with 105 additions and 58 deletions

View File

@ -23,7 +23,6 @@ public:
{
UInt64 uncompressed_size;
UInt64 compressed_size;
int compression_method;
bool is_encrypted;
};

View File

@ -29,7 +29,7 @@ public:
/// Sets compression method and level.
/// Changing them will affect next file in the archive.
virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} /// NOLINT
virtual void setCompression(const String & /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {}
/// Sets password. If the password is not empty it will enable encryption in the archive.
virtual void setPassword(const String & /* password */) {}

View File

@ -1,6 +1,7 @@
#include <IO/Archives/ZipArchiveReader.h>
#if USE_MINIZIP
#include <IO/Archives/ZipArchiveWriter.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Common/quoteString.h>
#include <unzip.h>
@ -18,6 +19,20 @@ namespace ErrorCodes
using RawHandle = unzFile;
namespace
{
void checkCompressionMethodIsEnabled(int compression_method_)
{
ZipArchiveWriter::checkCompressionMethodIsEnabled(compression_method_);
}
void checkEncryptionIsEnabled()
{
ZipArchiveWriter::checkEncryptionIsEnabled();
}
}
/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor.
class ZipArchiveReader::HandleHolder
{
@ -108,7 +123,7 @@ public:
return *file_name;
}
const FileInfo & getFileInfo() const
const FileInfoImpl & getFileInfo() const
{
if (!file_info)
retrieveFileInfo();
@ -161,7 +176,7 @@ private:
std::shared_ptr<ZipArchiveReader> reader;
RawHandle raw_handle = nullptr;
mutable std::optional<String> file_name;
mutable std::optional<FileInfo> file_info;
mutable std::optional<FileInfoImpl> file_info;
};
@ -174,7 +189,7 @@ public:
, handle(std::move(handle_))
{
const auto & file_info = handle.getFileInfo();
checkCompressionMethodIsEnabled(static_cast<CompressionMethod>(file_info.compression_method));
checkCompressionMethodIsEnabled(file_info.compression_method);
const char * password_cstr = nullptr;
if (file_info.is_encrypted)
@ -227,7 +242,7 @@ public:
if (new_pos > static_cast<off_t>(file_info.uncompressed_size))
throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
if (file_info.compression_method == static_cast<int>(CompressionMethod::kStore))
if (file_info.compression_method == MZ_COMPRESS_METHOD_STORE)
{
/// unzSeek64() works only for non-compressed files.
checkResult(unzSeek64(raw_handle, off, whence));

View File

@ -4,7 +4,6 @@
#if USE_MINIZIP
#include <IO/Archives/IArchiveReader.h>
#include <IO/Archives/ZipArchiveWriter.h>
#include <base/shared_ptr_helper.h>
#include <mutex>
#include <vector>
@ -20,8 +19,6 @@ class SeekableReadBuffer;
class ZipArchiveReader : public shared_ptr_helper<ZipArchiveReader>, public IArchiveReader
{
public:
using CompressionMethod = ZipArchiveWriter::CompressionMethod;
~ZipArchiveReader() override;
/// Returns true if there is a specified file in the archive.
@ -45,11 +42,6 @@ public:
/// Sets password used to decrypt the contents of the files in the archive.
void setPassword(const String & password_) override;
/// Utility functions.
static CompressionMethod parseCompressionMethod(const String & str) { return ZipArchiveWriter::parseCompressionMethod(str); }
static void checkCompressionMethodIsEnabled(CompressionMethod method) { ZipArchiveWriter::checkCompressionMethodIsEnabled(method); }
static void checkEncryptionIsEnabled() { ZipArchiveWriter::checkEncryptionIsEnabled(); }
private:
/// Constructs an archive's reader that will read from a file in the local filesystem.
explicit ZipArchiveReader(const String & path_to_archive_);
@ -66,6 +58,11 @@ private:
void init();
struct FileInfoImpl : public FileInfo
{
int compression_method;
};
HandleHolder acquireHandle();
RawHandle acquireRawHandle();
void releaseRawHandle(RawHandle handle_);

View File

@ -80,7 +80,7 @@ public:
{
auto compress_method = handle.getWriter()->compression_method;
auto compress_level = handle.getWriter()->compression_level;
checkCompressionMethodIsEnabled(static_cast<CompressionMethod>(compress_method));
checkCompressionMethodIsEnabled(compress_method);
const char * password_cstr = nullptr;
const String & password_str = handle.getWriter()->password;
@ -238,7 +238,7 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_)
}
ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr<WriteBuffer> archive_write_buffer_)
: path_to_archive(path_to_archive_)
: path_to_archive(path_to_archive_), compression_method(MZ_COMPRESS_METHOD_DEFLATE)
{
if (archive_write_buffer_)
handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_));
@ -246,6 +246,7 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_
handle = zipOpen64(path_to_archive.c_str(), /* append= */ false);
if (!handle)
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive));
}
ZipArchiveWriter::~ZipArchiveWriter()
@ -274,10 +275,10 @@ bool ZipArchiveWriter::isWritingFile() const
return !handle;
}
void ZipArchiveWriter::setCompression(int compression_method_, int compression_level_)
void ZipArchiveWriter::setCompression(const String & compression_method_, int compression_level_)
{
std::lock_guard lock{mutex};
compression_method = compression_method_;
compression_method = compressionMethodToInt(compression_method_);
compression_level = compression_level_;
}
@ -287,48 +288,62 @@ void ZipArchiveWriter::setPassword(const String & password_)
password = password_;
}
ZipArchiveWriter::CompressionMethod ZipArchiveWriter::parseCompressionMethod(const String & str)
int ZipArchiveWriter::compressionMethodToInt(const String & compression_method_)
{
if (str.empty())
return CompressionMethod::kDeflate; /// Default compression method is DEFLATE.
else if (boost::iequals(str, "store"))
return CompressionMethod::kStore;
else if (boost::iequals(str, "deflate"))
return CompressionMethod::kDeflate;
else if (boost::iequals(str, "bzip2"))
return CompressionMethod::kBzip2;
else if (boost::iequals(str, "lzma"))
return CompressionMethod::kLzma;
else if (boost::iequals(str, "zstd"))
return CompressionMethod::kZstd;
else if (boost::iequals(str, "xz"))
return CompressionMethod::kXz;
if (compression_method_.empty())
return MZ_COMPRESS_METHOD_DEFLATE; /// By default the compression method is "deflate".
else if (compression_method_ == kStore)
return MZ_COMPRESS_METHOD_STORE;
else if (compression_method_ == kDeflate)
return MZ_COMPRESS_METHOD_DEFLATE;
else if (compression_method_ == kBzip2)
return MZ_COMPRESS_METHOD_BZIP2;
else if (compression_method_ == kLzma)
return MZ_COMPRESS_METHOD_LZMA;
else if (compression_method_ == kZstd)
return MZ_COMPRESS_METHOD_ZSTD;
else if (compression_method_ == kXz)
return MZ_COMPRESS_METHOD_XZ;
else
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", str);
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
}
String ZipArchiveWriter::intToCompressionMethod(int compression_method_)
{
switch (compression_method_)
{
case MZ_COMPRESS_METHOD_STORE: return kStore;
case MZ_COMPRESS_METHOD_DEFLATE: return kDeflate;
case MZ_COMPRESS_METHOD_BZIP2: return kBzip2;
case MZ_COMPRESS_METHOD_LZMA: return kLzma;
case MZ_COMPRESS_METHOD_ZSTD: return kZstd;
case MZ_COMPRESS_METHOD_XZ: return kXz;
}
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
}
/// Checks that a passed compression method can be used.
void ZipArchiveWriter::checkCompressionMethodIsEnabled(CompressionMethod method)
void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_)
{
switch (method)
switch (compression_method_)
{
case CompressionMethod::kStore: [[fallthrough]];
case CompressionMethod::kDeflate:
case CompressionMethod::kLzma:
case CompressionMethod::kXz:
case CompressionMethod::kZstd:
case MZ_COMPRESS_METHOD_STORE: [[fallthrough]];
case MZ_COMPRESS_METHOD_DEFLATE:
case MZ_COMPRESS_METHOD_LZMA:
case MZ_COMPRESS_METHOD_ZSTD:
case MZ_COMPRESS_METHOD_XZ:
return;
case CompressionMethod::kBzip2:
case MZ_COMPRESS_METHOD_BZIP2:
{
#if USE_BZIP2
return;
#else
throw Exception("BZIP2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
throw Exception("bzip2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
#endif
}
}
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", static_cast<int>(method));
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
}
/// Checks that encryption is enabled.

View File

@ -31,16 +31,12 @@ public:
bool isWritingFile() const override;
/// Supported compression methods.
enum class CompressionMethod
{
/// See mz.h
kStore = 0,
kDeflate = 8,
kBzip2 = 12,
kLzma = 14,
kZstd = 93,
kXz = 95,
};
static constexpr const char kStore[] = "store";
static constexpr const char kDeflate[] = "deflate";
static constexpr const char kBzip2[] = "bzip2";
static constexpr const char kLzma[] = "lzma";
static constexpr const char kZstd[] = "zstd";
static constexpr const char kXz[] = "xz";
/// Some compression levels.
enum class CompressionLevels
@ -53,7 +49,7 @@ public:
/// Sets compression method and level.
/// Changing them will affect next file in the archive.
void setCompression(int compression_method_, int compression_level_) override;
void setCompression(const String & compression_method_, int compression_level_) override;
/// Sets password. Only contents of the files are encrypted,
/// names of files are not encrypted.
@ -61,8 +57,9 @@ public:
void setPassword(const String & password_) override;
/// Utility functions.
static CompressionMethod parseCompressionMethod(const String & str);
static void checkCompressionMethodIsEnabled(CompressionMethod method);
static int compressionMethodToInt(const String & compression_method_);
static String intToCompressionMethod(int compression_method_);
static void checkCompressionMethodIsEnabled(int compression_method_);
static void checkEncryptionIsEnabled();
private:
@ -85,7 +82,7 @@ private:
[[noreturn]] void showError(const String & message) const;
const String path_to_archive;
int compression_method = static_cast<int>(CompressionMethod::kDeflate);
int compression_method; /// By default the compression method is "deflate".
int compression_level = kDefaultCompressionLevel;
String password;
RawHandle handle = nullptr;

View File

@ -0,0 +1,12 @@
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
namespace DB
{
bool hasRegisteredArchiveFileExtension(const String & path)
{
return path.ends_with(".zip") || path.ends_with(".zipx");
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <base/types.h>
namespace DB
{
/// Returns true if a specified path has one of the registed file extensions for an archive.
bool hasRegisteredArchiveFileExtension(const String & path);
}