ClickHouse/src/IO/Archives/LibArchiveReader.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

211 lines
6.5 KiB
C++
Raw Normal View History

2023-07-31 08:50:09 +00:00
#include <IO/Archives/LibArchiveReader.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Common/quoteString.h>
2023-07-28 11:55:23 +00:00
#include <IO/Archives/ArchiveUtils.h>
2023-07-31 08:50:09 +00:00
2023-05-29 20:08:18 +00:00
namespace DB
{
2023-07-28 13:00:35 +00:00
#if USE_LIBARCHIVE
2023-05-29 20:08:18 +00:00
namespace ErrorCodes
{
extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR;
extern const int SEEK_POSITION_OUT_OF_BOUND;
2023-07-31 08:50:09 +00:00
extern const int NOT_IMPLEMENTED;
2023-05-29 20:08:18 +00:00
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
class LibArchiveReader<ArchiveInfo>::Handle
2023-05-29 20:08:18 +00:00
{
public:
2023-07-28 11:55:23 +00:00
explicit Handle(const String & path_to_archive_) : path_to_archive(path_to_archive_)
2023-05-29 20:08:18 +00:00
{
archive = archive_read_new();
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
2023-05-29 20:08:18 +00:00
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
{
2023-07-31 08:50:09 +00:00
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open {} archive: {}", ArchiveInfo::name, quoteString(path_to_archive));
}
entry = archive_entry_new();
}
2023-07-31 08:50:09 +00:00
2023-05-29 20:08:18 +00:00
~Handle()
{
archive_read_close(archive);
archive_read_free(archive);
}
2023-05-29 20:08:18 +00:00
bool locateFile(const String & filename)
{
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK)
{
if (archive_entry_pathname(entry) == filename)
return true;
}
return false;
}
2023-05-29 20:08:18 +00:00
struct archive * archive;
struct archive_entry * entry;
private:
const String path_to_archive;
};
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
class LibArchiveReader<ArchiveInfo>::ReadBufferFromLibArchive : public ReadBufferFromFileBase
{
public:
2023-07-31 08:50:09 +00:00
explicit ReadBufferFromLibArchive(const String & path_to_archive_, const String & filename_)
2023-05-29 20:08:18 +00:00
: ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
, handle(path_to_archive_)
, path_to_archive(path_to_archive_)
, filename(filename_)
{
handle.locateFile(filename_);
}
off_t seek(off_t off, int whence) override
{
off_t current_pos = getPosition();
off_t new_pos;
if (whence == SEEK_SET)
new_pos = off;
else if (whence == SEEK_CUR)
new_pos = off + current_pos;
else
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Only SEEK_SET and SEEK_CUR seek modes allowed.");
if (new_pos == current_pos)
return current_pos; /// The position is the same.
if (new_pos < 0)
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound");
off_t working_buffer_start_pos = current_pos - offset();
off_t working_buffer_end_pos = current_pos + available();
if ((working_buffer_start_pos <= new_pos) && (new_pos <= working_buffer_end_pos))
{
/// The new position is still inside the buffer.
position() += new_pos - current_pos;
return new_pos;
}
/// Check that the new position is now beyond the end of the file.
if (new_pos > archive_entry_size(handle.entry))
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound");
ignore(new_pos - current_pos);
return new_pos;
}
2023-05-29 20:08:18 +00:00
off_t getPosition() override { return archive_entry_size(handle.entry) - available(); }
String getFileName() const override { return filename; }
private:
bool nextImpl() override
{
auto bytes_read = archive_read_data(handle.archive, internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
2023-07-31 13:27:06 +00:00
if (bytes_read < 0)
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to read file {} from {}: {}", filename, path_to_archive, archive_error_string(handle.archive));
if (!bytes_read)
return false;
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
return true;
}
2023-07-31 13:27:06 +00:00
Handle handle;
const String path_to_archive;
const String filename;
};
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
LibArchiveReader<ArchiveInfo>::LibArchiveReader(const String & path_to_archive_) : path_to_archive(path_to_archive_)
2023-05-29 20:08:18 +00:00
{
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
LibArchiveReader<ArchiveInfo>::LibArchiveReader(const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_)
2023-07-28 13:00:35 +00:00
: path_to_archive(path_to_archive_), archive_read_function(archive_read_function_)
2023-05-29 20:08:18 +00:00
{
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
LibArchiveReader<ArchiveInfo>::~LibArchiveReader() = default;
2023-07-28 11:55:23 +00:00
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
bool LibArchiveReader<ArchiveInfo>::fileExists(const String & filename)
2023-05-29 20:08:18 +00:00
{
Handle handle(path_to_archive);
return handle.locateFile(filename);
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
LibArchiveReader<ArchiveInfo>::FileInfo LibArchiveReader<ArchiveInfo>::getFileInfo(const String & filename)
2023-05-29 20:08:18 +00:00
{
Handle handle(path_to_archive);
2023-05-29 20:08:18 +00:00
handle.locateFile(filename);
FileInfo info;
info.uncompressed_size = archive_entry_size(handle.entry);
info.compressed_size = archive_entry_size(handle.entry);
info.is_encrypted = false;
2023-05-29 20:08:18 +00:00
return info;
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
std::unique_ptr<typename LibArchiveReader<ArchiveInfo>::FileEnumerator> LibArchiveReader<ArchiveInfo>::firstFile()
2023-05-29 20:08:18 +00:00
{
2023-07-31 13:27:06 +00:00
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implemented for {} archives", ArchiveInfo::name);
2023-05-29 20:08:18 +00:00
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader<ArchiveInfo>::readFile(const String & filename)
2023-05-29 20:08:18 +00:00
{
Handle handle(path_to_archive);
handle.locateFile(filename);
2023-07-31 08:50:09 +00:00
return std::make_unique<ReadBufferFromLibArchive>(path_to_archive, filename);
2023-05-29 20:08:18 +00:00
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader<ArchiveInfo>::readFile(std::unique_ptr<FileEnumerator> /*enumerator*/)
2023-05-29 20:08:18 +00:00
{
2023-07-31 13:27:06 +00:00
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implemented for {} archives", ArchiveInfo::name);
2023-05-29 20:08:18 +00:00
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
std::unique_ptr<typename LibArchiveReader<ArchiveInfo>::FileEnumerator>
LibArchiveReader<ArchiveInfo>::nextFile(std::unique_ptr<ReadBuffer> /*read_buffer*/)
2023-05-29 20:08:18 +00:00
{
2023-07-31 13:27:06 +00:00
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implemented for {} archives", ArchiveInfo::name);
2023-05-29 20:08:18 +00:00
}
2023-07-31 08:50:09 +00:00
template <typename ArchiveInfo>
void LibArchiveReader<ArchiveInfo>::setPassword(const String & /*password_*/)
2023-05-29 20:08:18 +00:00
{
2023-07-31 08:50:09 +00:00
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", ArchiveInfo::name);
2023-05-29 20:08:18 +00:00
}
2023-07-31 08:50:09 +00:00
template class LibArchiveReader<TarArchiveInfo>;
template class LibArchiveReader<SevenZipArchiveInfo>;
2023-07-28 13:00:35 +00:00
#endif
}