Improve zip archive utilities.

This commit is contained in:
Vitaly Baranov 2022-02-22 07:59:41 +03:00 committed by Vitaly Baranov
parent e72a343994
commit a1b364626a
8 changed files with 105 additions and 58 deletions

View File

@ -23,7 +23,6 @@ public:
{ {
UInt64 uncompressed_size; UInt64 uncompressed_size;
UInt64 compressed_size; UInt64 compressed_size;
int compression_method;
bool is_encrypted; bool is_encrypted;
}; };

View File

@ -29,7 +29,7 @@ public:
/// Sets compression method and level. /// Sets compression method and level.
/// Changing them will affect next file in the archive. /// Changing them will affect next file in the archive.
virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} /// NOLINT virtual void setCompression(const String & /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {}
/// Sets password. If the password is not empty it will enable encryption in the archive. /// Sets password. If the password is not empty it will enable encryption in the archive.
virtual void setPassword(const String & /* password */) {} virtual void setPassword(const String & /* password */) {}

View File

@ -1,6 +1,7 @@
#include <IO/Archives/ZipArchiveReader.h> #include <IO/Archives/ZipArchiveReader.h>
#if USE_MINIZIP #if USE_MINIZIP
#include <IO/Archives/ZipArchiveWriter.h>
#include <IO/ReadBufferFromFileBase.h> #include <IO/ReadBufferFromFileBase.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <unzip.h> #include <unzip.h>
@ -18,6 +19,20 @@ namespace ErrorCodes
using RawHandle = unzFile; using RawHandle = unzFile;
namespace
{
void checkCompressionMethodIsEnabled(int compression_method_)
{
ZipArchiveWriter::checkCompressionMethodIsEnabled(compression_method_);
}
void checkEncryptionIsEnabled()
{
ZipArchiveWriter::checkEncryptionIsEnabled();
}
}
/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor. /// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor.
class ZipArchiveReader::HandleHolder class ZipArchiveReader::HandleHolder
{ {
@ -108,7 +123,7 @@ public:
return *file_name; return *file_name;
} }
const FileInfo & getFileInfo() const const FileInfoImpl & getFileInfo() const
{ {
if (!file_info) if (!file_info)
retrieveFileInfo(); retrieveFileInfo();
@ -161,7 +176,7 @@ private:
std::shared_ptr<ZipArchiveReader> reader; std::shared_ptr<ZipArchiveReader> reader;
RawHandle raw_handle = nullptr; RawHandle raw_handle = nullptr;
mutable std::optional<String> file_name; mutable std::optional<String> file_name;
mutable std::optional<FileInfo> file_info; mutable std::optional<FileInfoImpl> file_info;
}; };
@ -174,7 +189,7 @@ public:
, handle(std::move(handle_)) , handle(std::move(handle_))
{ {
const auto & file_info = handle.getFileInfo(); const auto & file_info = handle.getFileInfo();
checkCompressionMethodIsEnabled(static_cast<CompressionMethod>(file_info.compression_method)); checkCompressionMethodIsEnabled(file_info.compression_method);
const char * password_cstr = nullptr; const char * password_cstr = nullptr;
if (file_info.is_encrypted) if (file_info.is_encrypted)
@ -227,7 +242,7 @@ public:
if (new_pos > static_cast<off_t>(file_info.uncompressed_size)) if (new_pos > static_cast<off_t>(file_info.uncompressed_size))
throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
if (file_info.compression_method == static_cast<int>(CompressionMethod::kStore)) if (file_info.compression_method == MZ_COMPRESS_METHOD_STORE)
{ {
/// unzSeek64() works only for non-compressed files. /// unzSeek64() works only for non-compressed files.
checkResult(unzSeek64(raw_handle, off, whence)); checkResult(unzSeek64(raw_handle, off, whence));

View File

@ -4,7 +4,6 @@
#if USE_MINIZIP #if USE_MINIZIP
#include <IO/Archives/IArchiveReader.h> #include <IO/Archives/IArchiveReader.h>
#include <IO/Archives/ZipArchiveWriter.h>
#include <base/shared_ptr_helper.h> #include <base/shared_ptr_helper.h>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
@ -20,8 +19,6 @@ class SeekableReadBuffer;
class ZipArchiveReader : public shared_ptr_helper<ZipArchiveReader>, public IArchiveReader class ZipArchiveReader : public shared_ptr_helper<ZipArchiveReader>, public IArchiveReader
{ {
public: public:
using CompressionMethod = ZipArchiveWriter::CompressionMethod;
~ZipArchiveReader() override; ~ZipArchiveReader() override;
/// Returns true if there is a specified file in the archive. /// Returns true if there is a specified file in the archive.
@ -45,11 +42,6 @@ public:
/// Sets password used to decrypt the contents of the files in the archive. /// Sets password used to decrypt the contents of the files in the archive.
void setPassword(const String & password_) override; void setPassword(const String & password_) override;
/// Utility functions.
static CompressionMethod parseCompressionMethod(const String & str) { return ZipArchiveWriter::parseCompressionMethod(str); }
static void checkCompressionMethodIsEnabled(CompressionMethod method) { ZipArchiveWriter::checkCompressionMethodIsEnabled(method); }
static void checkEncryptionIsEnabled() { ZipArchiveWriter::checkEncryptionIsEnabled(); }
private: private:
/// Constructs an archive's reader that will read from a file in the local filesystem. /// Constructs an archive's reader that will read from a file in the local filesystem.
explicit ZipArchiveReader(const String & path_to_archive_); explicit ZipArchiveReader(const String & path_to_archive_);
@ -66,6 +58,11 @@ private:
void init(); void init();
struct FileInfoImpl : public FileInfo
{
int compression_method;
};
HandleHolder acquireHandle(); HandleHolder acquireHandle();
RawHandle acquireRawHandle(); RawHandle acquireRawHandle();
void releaseRawHandle(RawHandle handle_); void releaseRawHandle(RawHandle handle_);

View File

@ -80,7 +80,7 @@ public:
{ {
auto compress_method = handle.getWriter()->compression_method; auto compress_method = handle.getWriter()->compression_method;
auto compress_level = handle.getWriter()->compression_level; auto compress_level = handle.getWriter()->compression_level;
checkCompressionMethodIsEnabled(static_cast<CompressionMethod>(compress_method)); checkCompressionMethodIsEnabled(compress_method);
const char * password_cstr = nullptr; const char * password_cstr = nullptr;
const String & password_str = handle.getWriter()->password; const String & password_str = handle.getWriter()->password;
@ -238,7 +238,7 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_)
} }
ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr<WriteBuffer> archive_write_buffer_) ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr<WriteBuffer> archive_write_buffer_)
: path_to_archive(path_to_archive_) : path_to_archive(path_to_archive_), compression_method(MZ_COMPRESS_METHOD_DEFLATE)
{ {
if (archive_write_buffer_) if (archive_write_buffer_)
handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_)); handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_));
@ -246,6 +246,7 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_
handle = zipOpen64(path_to_archive.c_str(), /* append= */ false); handle = zipOpen64(path_to_archive.c_str(), /* append= */ false);
if (!handle) if (!handle)
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive)); throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive));
} }
ZipArchiveWriter::~ZipArchiveWriter() ZipArchiveWriter::~ZipArchiveWriter()
@ -274,10 +275,10 @@ bool ZipArchiveWriter::isWritingFile() const
return !handle; return !handle;
} }
void ZipArchiveWriter::setCompression(int compression_method_, int compression_level_) void ZipArchiveWriter::setCompression(const String & compression_method_, int compression_level_)
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
compression_method = compression_method_; compression_method = compressionMethodToInt(compression_method_);
compression_level = compression_level_; compression_level = compression_level_;
} }
@ -287,48 +288,62 @@ void ZipArchiveWriter::setPassword(const String & password_)
password = password_; password = password_;
} }
ZipArchiveWriter::CompressionMethod ZipArchiveWriter::parseCompressionMethod(const String & str) int ZipArchiveWriter::compressionMethodToInt(const String & compression_method_)
{ {
if (str.empty()) if (compression_method_.empty())
return CompressionMethod::kDeflate; /// Default compression method is DEFLATE. return MZ_COMPRESS_METHOD_DEFLATE; /// By default the compression method is "deflate".
else if (boost::iequals(str, "store")) else if (compression_method_ == kStore)
return CompressionMethod::kStore; return MZ_COMPRESS_METHOD_STORE;
else if (boost::iequals(str, "deflate")) else if (compression_method_ == kDeflate)
return CompressionMethod::kDeflate; return MZ_COMPRESS_METHOD_DEFLATE;
else if (boost::iequals(str, "bzip2")) else if (compression_method_ == kBzip2)
return CompressionMethod::kBzip2; return MZ_COMPRESS_METHOD_BZIP2;
else if (boost::iequals(str, "lzma")) else if (compression_method_ == kLzma)
return CompressionMethod::kLzma; return MZ_COMPRESS_METHOD_LZMA;
else if (boost::iequals(str, "zstd")) else if (compression_method_ == kZstd)
return CompressionMethod::kZstd; return MZ_COMPRESS_METHOD_ZSTD;
else if (boost::iequals(str, "xz")) else if (compression_method_ == kXz)
return CompressionMethod::kXz; return MZ_COMPRESS_METHOD_XZ;
else else
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", str); throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
}
String ZipArchiveWriter::intToCompressionMethod(int compression_method_)
{
switch (compression_method_)
{
case MZ_COMPRESS_METHOD_STORE: return kStore;
case MZ_COMPRESS_METHOD_DEFLATE: return kDeflate;
case MZ_COMPRESS_METHOD_BZIP2: return kBzip2;
case MZ_COMPRESS_METHOD_LZMA: return kLzma;
case MZ_COMPRESS_METHOD_ZSTD: return kZstd;
case MZ_COMPRESS_METHOD_XZ: return kXz;
}
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
} }
/// Checks that a passed compression method can be used. /// Checks that a passed compression method can be used.
void ZipArchiveWriter::checkCompressionMethodIsEnabled(CompressionMethod method) void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_)
{ {
switch (method) switch (compression_method_)
{ {
case CompressionMethod::kStore: [[fallthrough]]; case MZ_COMPRESS_METHOD_STORE: [[fallthrough]];
case CompressionMethod::kDeflate: case MZ_COMPRESS_METHOD_DEFLATE:
case CompressionMethod::kLzma: case MZ_COMPRESS_METHOD_LZMA:
case CompressionMethod::kXz: case MZ_COMPRESS_METHOD_ZSTD:
case CompressionMethod::kZstd: case MZ_COMPRESS_METHOD_XZ:
return; return;
case CompressionMethod::kBzip2: case MZ_COMPRESS_METHOD_BZIP2:
{ {
#if USE_BZIP2 #if USE_BZIP2
return; return;
#else #else
throw Exception("BZIP2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED); throw Exception("bzip2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
#endif #endif
} }
} }
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", static_cast<int>(method)); throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
} }
/// Checks that encryption is enabled. /// Checks that encryption is enabled.

View File

@ -31,16 +31,12 @@ public:
bool isWritingFile() const override; bool isWritingFile() const override;
/// Supported compression methods. /// Supported compression methods.
enum class CompressionMethod static constexpr const char kStore[] = "store";
{ static constexpr const char kDeflate[] = "deflate";
/// See mz.h static constexpr const char kBzip2[] = "bzip2";
kStore = 0, static constexpr const char kLzma[] = "lzma";
kDeflate = 8, static constexpr const char kZstd[] = "zstd";
kBzip2 = 12, static constexpr const char kXz[] = "xz";
kLzma = 14,
kZstd = 93,
kXz = 95,
};
/// Some compression levels. /// Some compression levels.
enum class CompressionLevels enum class CompressionLevels
@ -53,7 +49,7 @@ public:
/// Sets compression method and level. /// Sets compression method and level.
/// Changing them will affect next file in the archive. /// Changing them will affect next file in the archive.
void setCompression(int compression_method_, int compression_level_) override; void setCompression(const String & compression_method_, int compression_level_) override;
/// Sets password. Only contents of the files are encrypted, /// Sets password. Only contents of the files are encrypted,
/// names of files are not encrypted. /// names of files are not encrypted.
@ -61,8 +57,9 @@ public:
void setPassword(const String & password_) override; void setPassword(const String & password_) override;
/// Utility functions. /// Utility functions.
static CompressionMethod parseCompressionMethod(const String & str); static int compressionMethodToInt(const String & compression_method_);
static void checkCompressionMethodIsEnabled(CompressionMethod method); static String intToCompressionMethod(int compression_method_);
static void checkCompressionMethodIsEnabled(int compression_method_);
static void checkEncryptionIsEnabled(); static void checkEncryptionIsEnabled();
private: private:
@ -85,7 +82,7 @@ private:
[[noreturn]] void showError(const String & message) const; [[noreturn]] void showError(const String & message) const;
const String path_to_archive; const String path_to_archive;
int compression_method = static_cast<int>(CompressionMethod::kDeflate); int compression_method; /// By default the compression method is "deflate".
int compression_level = kDefaultCompressionLevel; int compression_level = kDefaultCompressionLevel;
String password; String password;
RawHandle handle = nullptr; RawHandle handle = nullptr;

View File

@ -0,0 +1,12 @@
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
namespace DB
{
bool hasRegisteredArchiveFileExtension(const String & path)
{
return path.ends_with(".zip") || path.ends_with(".zipx");
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <base/types.h>
namespace DB
{
/// Returns true if a specified path has one of the registed file extensions for an archive.
bool hasRegisteredArchiveFileExtension(const String & path);
}