mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Improve zip archive utilities.
This commit is contained in:
parent
e72a343994
commit
a1b364626a
@ -23,7 +23,6 @@ public:
|
||||
{
|
||||
UInt64 uncompressed_size;
|
||||
UInt64 compressed_size;
|
||||
int compression_method;
|
||||
bool is_encrypted;
|
||||
};
|
||||
|
||||
|
@ -29,7 +29,7 @@ public:
|
||||
|
||||
/// Sets compression method and level.
|
||||
/// Changing them will affect next file in the archive.
|
||||
virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} /// NOLINT
|
||||
virtual void setCompression(const String & /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {}
|
||||
|
||||
/// Sets password. If the password is not empty it will enable encryption in the archive.
|
||||
virtual void setPassword(const String & /* password */) {}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <IO/Archives/ZipArchiveReader.h>
|
||||
|
||||
#if USE_MINIZIP
|
||||
#include <IO/Archives/ZipArchiveWriter.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <unzip.h>
|
||||
@ -18,6 +19,20 @@ namespace ErrorCodes
|
||||
using RawHandle = unzFile;
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
void checkCompressionMethodIsEnabled(int compression_method_)
|
||||
{
|
||||
ZipArchiveWriter::checkCompressionMethodIsEnabled(compression_method_);
|
||||
}
|
||||
|
||||
void checkEncryptionIsEnabled()
|
||||
{
|
||||
ZipArchiveWriter::checkEncryptionIsEnabled();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor.
|
||||
class ZipArchiveReader::HandleHolder
|
||||
{
|
||||
@ -108,7 +123,7 @@ public:
|
||||
return *file_name;
|
||||
}
|
||||
|
||||
const FileInfo & getFileInfo() const
|
||||
const FileInfoImpl & getFileInfo() const
|
||||
{
|
||||
if (!file_info)
|
||||
retrieveFileInfo();
|
||||
@ -161,7 +176,7 @@ private:
|
||||
std::shared_ptr<ZipArchiveReader> reader;
|
||||
RawHandle raw_handle = nullptr;
|
||||
mutable std::optional<String> file_name;
|
||||
mutable std::optional<FileInfo> file_info;
|
||||
mutable std::optional<FileInfoImpl> file_info;
|
||||
};
|
||||
|
||||
|
||||
@ -174,7 +189,7 @@ public:
|
||||
, handle(std::move(handle_))
|
||||
{
|
||||
const auto & file_info = handle.getFileInfo();
|
||||
checkCompressionMethodIsEnabled(static_cast<CompressionMethod>(file_info.compression_method));
|
||||
checkCompressionMethodIsEnabled(file_info.compression_method);
|
||||
|
||||
const char * password_cstr = nullptr;
|
||||
if (file_info.is_encrypted)
|
||||
@ -227,7 +242,7 @@ public:
|
||||
if (new_pos > static_cast<off_t>(file_info.uncompressed_size))
|
||||
throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||
|
||||
if (file_info.compression_method == static_cast<int>(CompressionMethod::kStore))
|
||||
if (file_info.compression_method == MZ_COMPRESS_METHOD_STORE)
|
||||
{
|
||||
/// unzSeek64() works only for non-compressed files.
|
||||
checkResult(unzSeek64(raw_handle, off, whence));
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#if USE_MINIZIP
|
||||
#include <IO/Archives/IArchiveReader.h>
|
||||
#include <IO/Archives/ZipArchiveWriter.h>
|
||||
#include <base/shared_ptr_helper.h>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
@ -20,8 +19,6 @@ class SeekableReadBuffer;
|
||||
class ZipArchiveReader : public shared_ptr_helper<ZipArchiveReader>, public IArchiveReader
|
||||
{
|
||||
public:
|
||||
using CompressionMethod = ZipArchiveWriter::CompressionMethod;
|
||||
|
||||
~ZipArchiveReader() override;
|
||||
|
||||
/// Returns true if there is a specified file in the archive.
|
||||
@ -45,11 +42,6 @@ public:
|
||||
/// Sets password used to decrypt the contents of the files in the archive.
|
||||
void setPassword(const String & password_) override;
|
||||
|
||||
/// Utility functions.
|
||||
static CompressionMethod parseCompressionMethod(const String & str) { return ZipArchiveWriter::parseCompressionMethod(str); }
|
||||
static void checkCompressionMethodIsEnabled(CompressionMethod method) { ZipArchiveWriter::checkCompressionMethodIsEnabled(method); }
|
||||
static void checkEncryptionIsEnabled() { ZipArchiveWriter::checkEncryptionIsEnabled(); }
|
||||
|
||||
private:
|
||||
/// Constructs an archive's reader that will read from a file in the local filesystem.
|
||||
explicit ZipArchiveReader(const String & path_to_archive_);
|
||||
@ -66,6 +58,11 @@ private:
|
||||
|
||||
void init();
|
||||
|
||||
struct FileInfoImpl : public FileInfo
|
||||
{
|
||||
int compression_method;
|
||||
};
|
||||
|
||||
HandleHolder acquireHandle();
|
||||
RawHandle acquireRawHandle();
|
||||
void releaseRawHandle(RawHandle handle_);
|
||||
|
@ -80,7 +80,7 @@ public:
|
||||
{
|
||||
auto compress_method = handle.getWriter()->compression_method;
|
||||
auto compress_level = handle.getWriter()->compression_level;
|
||||
checkCompressionMethodIsEnabled(static_cast<CompressionMethod>(compress_method));
|
||||
checkCompressionMethodIsEnabled(compress_method);
|
||||
|
||||
const char * password_cstr = nullptr;
|
||||
const String & password_str = handle.getWriter()->password;
|
||||
@ -238,7 +238,7 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_)
|
||||
}
|
||||
|
||||
ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr<WriteBuffer> archive_write_buffer_)
|
||||
: path_to_archive(path_to_archive_)
|
||||
: path_to_archive(path_to_archive_), compression_method(MZ_COMPRESS_METHOD_DEFLATE)
|
||||
{
|
||||
if (archive_write_buffer_)
|
||||
handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_));
|
||||
@ -246,6 +246,7 @@ ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_
|
||||
handle = zipOpen64(path_to_archive.c_str(), /* append= */ false);
|
||||
if (!handle)
|
||||
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive));
|
||||
|
||||
}
|
||||
|
||||
ZipArchiveWriter::~ZipArchiveWriter()
|
||||
@ -274,10 +275,10 @@ bool ZipArchiveWriter::isWritingFile() const
|
||||
return !handle;
|
||||
}
|
||||
|
||||
void ZipArchiveWriter::setCompression(int compression_method_, int compression_level_)
|
||||
void ZipArchiveWriter::setCompression(const String & compression_method_, int compression_level_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
compression_method = compression_method_;
|
||||
compression_method = compressionMethodToInt(compression_method_);
|
||||
compression_level = compression_level_;
|
||||
}
|
||||
|
||||
@ -287,48 +288,62 @@ void ZipArchiveWriter::setPassword(const String & password_)
|
||||
password = password_;
|
||||
}
|
||||
|
||||
ZipArchiveWriter::CompressionMethod ZipArchiveWriter::parseCompressionMethod(const String & str)
|
||||
int ZipArchiveWriter::compressionMethodToInt(const String & compression_method_)
|
||||
{
|
||||
if (str.empty())
|
||||
return CompressionMethod::kDeflate; /// Default compression method is DEFLATE.
|
||||
else if (boost::iequals(str, "store"))
|
||||
return CompressionMethod::kStore;
|
||||
else if (boost::iequals(str, "deflate"))
|
||||
return CompressionMethod::kDeflate;
|
||||
else if (boost::iequals(str, "bzip2"))
|
||||
return CompressionMethod::kBzip2;
|
||||
else if (boost::iequals(str, "lzma"))
|
||||
return CompressionMethod::kLzma;
|
||||
else if (boost::iequals(str, "zstd"))
|
||||
return CompressionMethod::kZstd;
|
||||
else if (boost::iequals(str, "xz"))
|
||||
return CompressionMethod::kXz;
|
||||
if (compression_method_.empty())
|
||||
return MZ_COMPRESS_METHOD_DEFLATE; /// By default the compression method is "deflate".
|
||||
else if (compression_method_ == kStore)
|
||||
return MZ_COMPRESS_METHOD_STORE;
|
||||
else if (compression_method_ == kDeflate)
|
||||
return MZ_COMPRESS_METHOD_DEFLATE;
|
||||
else if (compression_method_ == kBzip2)
|
||||
return MZ_COMPRESS_METHOD_BZIP2;
|
||||
else if (compression_method_ == kLzma)
|
||||
return MZ_COMPRESS_METHOD_LZMA;
|
||||
else if (compression_method_ == kZstd)
|
||||
return MZ_COMPRESS_METHOD_ZSTD;
|
||||
else if (compression_method_ == kXz)
|
||||
return MZ_COMPRESS_METHOD_XZ;
|
||||
else
|
||||
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", str);
|
||||
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
|
||||
}
|
||||
|
||||
String ZipArchiveWriter::intToCompressionMethod(int compression_method_)
|
||||
{
|
||||
switch (compression_method_)
|
||||
{
|
||||
case MZ_COMPRESS_METHOD_STORE: return kStore;
|
||||
case MZ_COMPRESS_METHOD_DEFLATE: return kDeflate;
|
||||
case MZ_COMPRESS_METHOD_BZIP2: return kBzip2;
|
||||
case MZ_COMPRESS_METHOD_LZMA: return kLzma;
|
||||
case MZ_COMPRESS_METHOD_ZSTD: return kZstd;
|
||||
case MZ_COMPRESS_METHOD_XZ: return kXz;
|
||||
}
|
||||
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
|
||||
}
|
||||
|
||||
/// Checks that a passed compression method can be used.
|
||||
void ZipArchiveWriter::checkCompressionMethodIsEnabled(CompressionMethod method)
|
||||
void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_)
|
||||
{
|
||||
switch (method)
|
||||
switch (compression_method_)
|
||||
{
|
||||
case CompressionMethod::kStore: [[fallthrough]];
|
||||
case CompressionMethod::kDeflate:
|
||||
case CompressionMethod::kLzma:
|
||||
case CompressionMethod::kXz:
|
||||
case CompressionMethod::kZstd:
|
||||
case MZ_COMPRESS_METHOD_STORE: [[fallthrough]];
|
||||
case MZ_COMPRESS_METHOD_DEFLATE:
|
||||
case MZ_COMPRESS_METHOD_LZMA:
|
||||
case MZ_COMPRESS_METHOD_ZSTD:
|
||||
case MZ_COMPRESS_METHOD_XZ:
|
||||
return;
|
||||
|
||||
case CompressionMethod::kBzip2:
|
||||
case MZ_COMPRESS_METHOD_BZIP2:
|
||||
{
|
||||
#if USE_BZIP2
|
||||
return;
|
||||
#else
|
||||
throw Exception("BZIP2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
|
||||
throw Exception("bzip2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", static_cast<int>(method));
|
||||
throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", compression_method_);
|
||||
}
|
||||
|
||||
/// Checks that encryption is enabled.
|
||||
|
@ -31,16 +31,12 @@ public:
|
||||
bool isWritingFile() const override;
|
||||
|
||||
/// Supported compression methods.
|
||||
enum class CompressionMethod
|
||||
{
|
||||
/// See mz.h
|
||||
kStore = 0,
|
||||
kDeflate = 8,
|
||||
kBzip2 = 12,
|
||||
kLzma = 14,
|
||||
kZstd = 93,
|
||||
kXz = 95,
|
||||
};
|
||||
static constexpr const char kStore[] = "store";
|
||||
static constexpr const char kDeflate[] = "deflate";
|
||||
static constexpr const char kBzip2[] = "bzip2";
|
||||
static constexpr const char kLzma[] = "lzma";
|
||||
static constexpr const char kZstd[] = "zstd";
|
||||
static constexpr const char kXz[] = "xz";
|
||||
|
||||
/// Some compression levels.
|
||||
enum class CompressionLevels
|
||||
@ -53,7 +49,7 @@ public:
|
||||
|
||||
/// Sets compression method and level.
|
||||
/// Changing them will affect next file in the archive.
|
||||
void setCompression(int compression_method_, int compression_level_) override;
|
||||
void setCompression(const String & compression_method_, int compression_level_) override;
|
||||
|
||||
/// Sets password. Only contents of the files are encrypted,
|
||||
/// names of files are not encrypted.
|
||||
@ -61,8 +57,9 @@ public:
|
||||
void setPassword(const String & password_) override;
|
||||
|
||||
/// Utility functions.
|
||||
static CompressionMethod parseCompressionMethod(const String & str);
|
||||
static void checkCompressionMethodIsEnabled(CompressionMethod method);
|
||||
static int compressionMethodToInt(const String & compression_method_);
|
||||
static String intToCompressionMethod(int compression_method_);
|
||||
static void checkCompressionMethodIsEnabled(int compression_method_);
|
||||
static void checkEncryptionIsEnabled();
|
||||
|
||||
private:
|
||||
@ -85,7 +82,7 @@ private:
|
||||
[[noreturn]] void showError(const String & message) const;
|
||||
|
||||
const String path_to_archive;
|
||||
int compression_method = static_cast<int>(CompressionMethod::kDeflate);
|
||||
int compression_method; /// By default the compression method is "deflate".
|
||||
int compression_level = kDefaultCompressionLevel;
|
||||
String password;
|
||||
RawHandle handle = nullptr;
|
||||
|
12
src/IO/Archives/hasRegisteredArchiveFileExtension.cpp
Normal file
12
src/IO/Archives/hasRegisteredArchiveFileExtension.cpp
Normal file
@ -0,0 +1,12 @@
|
||||
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool hasRegisteredArchiveFileExtension(const String & path)
|
||||
{
|
||||
return path.ends_with(".zip") || path.ends_with(".zipx");
|
||||
}
|
||||
|
||||
}
|
12
src/IO/Archives/hasRegisteredArchiveFileExtension.h
Normal file
12
src/IO/Archives/hasRegisteredArchiveFileExtension.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Returns true if a specified path has one of the registed file extensions for an archive.
|
||||
bool hasRegisteredArchiveFileExtension(const String & path);
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user