ClickHouse/src/IO/Archives/TarArchiveReader.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

182 lines
5.3 KiB
C++
Raw Normal View History

2023-07-28 11:55:23 +00:00
#include <IO/Archives/TarArchiveReader.h>
#include <Common/quoteString.h>
2023-07-28 11:55:23 +00:00
#include <IO/ReadBufferFromFileBase.h>
#include <IO/Archives/ArchiveUtils.h>
2023-05-29 20:08:18 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR;
extern const int SEEK_POSITION_OUT_OF_BOUND;
}
class TarArchiveReader::Handle
{
public:
2023-07-28 11:55:23 +00:00
explicit Handle(const String & path_to_archive_) : path_to_archive(path_to_archive_)
2023-05-29 20:08:18 +00:00
{
archive = archive_read_new();
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
2023-05-29 20:08:18 +00:00
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
{
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open tar archive {}", quoteString(path_to_archive));
}
entry = archive_entry_new();
}
2023-05-29 20:08:18 +00:00
~Handle()
{
archive_read_close(archive);
archive_read_free(archive);
}
2023-05-29 20:08:18 +00:00
bool locateFile(const String & filename)
{
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK)
{
if (archive_entry_pathname(entry) == filename)
return true;
}
return false;
}
2023-05-29 20:08:18 +00:00
struct archive * archive;
struct archive_entry * entry;
private:
const String path_to_archive;
};
class TarArchiveReader::ReadBufferFromTarArchive : public ReadBufferFromFileBase
{
public:
explicit ReadBufferFromTarArchive(const String & path_to_archive_, const String & filename_)
2023-05-29 20:08:18 +00:00
: ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
, handle(path_to_archive_)
, path_to_archive(path_to_archive_)
, filename(filename_)
{
handle.locateFile(filename_);
}
off_t seek(off_t off, int whence) override
{
off_t current_pos = getPosition();
off_t new_pos;
if (whence == SEEK_SET)
new_pos = off;
else if (whence == SEEK_CUR)
new_pos = off + current_pos;
else
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Only SEEK_SET and SEEK_CUR seek modes allowed.");
if (new_pos == current_pos)
return current_pos; /// The position is the same.
if (new_pos < 0)
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound");
off_t working_buffer_start_pos = current_pos - offset();
off_t working_buffer_end_pos = current_pos + available();
if ((working_buffer_start_pos <= new_pos) && (new_pos <= working_buffer_end_pos))
{
/// The new position is still inside the buffer.
position() += new_pos - current_pos;
return new_pos;
}
/// Check that the new position is now beyond the end of the file.
if (new_pos > archive_entry_size(handle.entry))
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound");
ignore(new_pos - current_pos);
return new_pos;
}
2023-05-29 20:08:18 +00:00
off_t getPosition() override { return archive_entry_size(handle.entry) - available(); }
String getFileName() const override { return filename; }
private:
bool nextImpl() override
{
auto bytes_read = archive_read_data(handle.archive, internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
if (!bytes_read)
return false;
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
return true;
}
Handle handle;
const String path_to_archive;
const String filename;
};
2023-05-29 20:08:18 +00:00
TarArchiveReader::TarArchiveReader(const String & path_to_archive_) : path_to_archive(path_to_archive_)
{
}
2023-05-29 20:08:18 +00:00
TarArchiveReader::TarArchiveReader(
const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_, UInt64 archive_size_)
: path_to_archive(path_to_archive_), archive_read_function(archive_read_function_), archive_size(archive_size_)
{
}
2023-07-28 11:55:23 +00:00
TarArchiveReader::~TarArchiveReader() = default;
2023-05-29 20:08:18 +00:00
bool TarArchiveReader::fileExists(const String & filename)
{
Handle handle(path_to_archive);
return handle.locateFile(filename);
}
2023-05-29 20:08:18 +00:00
TarArchiveReader::FileInfo TarArchiveReader::getFileInfo(const String & filename)
{
Handle handle(path_to_archive);
2023-05-29 20:08:18 +00:00
handle.locateFile(filename);
FileInfo info;
info.uncompressed_size = archive_entry_size(handle.entry);
info.compressed_size = archive_entry_size(handle.entry);
info.is_encrypted = false;
2023-05-29 20:08:18 +00:00
return info;
}
2023-05-29 20:08:18 +00:00
std::unique_ptr<TarArchiveReader::FileEnumerator> TarArchiveReader::firstFile()
{
return nullptr;
}
2023-05-29 20:08:18 +00:00
std::unique_ptr<ReadBufferFromFileBase> TarArchiveReader::readFile(const String & filename)
{
Handle handle(path_to_archive);
handle.locateFile(filename);
2023-05-29 20:08:18 +00:00
return std::make_unique<ReadBufferFromTarArchive>(path_to_archive, filename);
}
2023-05-29 20:08:18 +00:00
std::unique_ptr<ReadBufferFromFileBase> TarArchiveReader::readFile([[maybe_unused]] std::unique_ptr<FileEnumerator> enumerator)
{
return nullptr;
}
2023-05-29 20:08:18 +00:00
std::unique_ptr<TarArchiveReader::FileEnumerator> TarArchiveReader::nextFile([[maybe_unused]] std::unique_ptr<ReadBuffer> read_buffer)
{
return nullptr;
}
void TarArchiveReader::setPassword([[maybe_unused]] const String & password_)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to .tar archive");
}
}