2023-07-28 11:55:23 +00:00
|
|
|
#include <IO/Archives/TarArchiveReader.h>
|
2023-05-29 17:10:03 +00:00
|
|
|
#include <Common/quoteString.h>
|
|
|
|
|
2023-07-28 11:55:23 +00:00
|
|
|
#include <IO/ReadBufferFromFileBase.h>
|
|
|
|
#include <IO/Archives/ArchiveUtils.h>
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2023-07-28 13:00:35 +00:00
|
|
|
|
|
|
|
#if USE_LIBARCHIVE
|
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_UNPACK_ARCHIVE;
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int SEEK_POSITION_OUT_OF_BOUND;
|
|
|
|
}
|
|
|
|
class TarArchiveReader::Handle
|
|
|
|
{
|
2023-05-29 17:10:03 +00:00
|
|
|
public:
|
2023-07-28 11:55:23 +00:00
|
|
|
explicit Handle(const String & path_to_archive_) : path_to_archive(path_to_archive_)
|
2023-05-29 20:08:18 +00:00
|
|
|
{
|
2023-05-29 17:10:03 +00:00
|
|
|
archive = archive_read_new();
|
|
|
|
archive_read_support_filter_all(archive);
|
|
|
|
archive_read_support_format_all(archive);
|
2023-05-29 20:08:18 +00:00
|
|
|
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
|
|
|
|
{
|
2023-05-29 17:10:03 +00:00
|
|
|
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open tar archive {}", quoteString(path_to_archive));
|
|
|
|
}
|
|
|
|
entry = archive_entry_new();
|
|
|
|
}
|
2023-05-29 20:08:18 +00:00
|
|
|
~Handle()
|
|
|
|
{
|
2023-05-29 17:10:03 +00:00
|
|
|
archive_read_close(archive);
|
|
|
|
archive_read_free(archive);
|
|
|
|
}
|
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
bool locateFile(const String & filename)
|
|
|
|
{
|
2023-05-29 17:10:03 +00:00
|
|
|
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK)
|
|
|
|
{
|
|
|
|
if (archive_entry_pathname(entry) == filename)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
struct archive * archive;
|
|
|
|
struct archive_entry * entry;
|
2023-05-29 17:10:03 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
const String path_to_archive;
|
|
|
|
};
|
|
|
|
|
|
|
|
class TarArchiveReader::ReadBufferFromTarArchive : public ReadBufferFromFileBase
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit ReadBufferFromTarArchive(const String & path_to_archive_, const String & filename_)
|
2023-05-29 20:08:18 +00:00
|
|
|
: ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
|
|
|
|
, handle(path_to_archive_)
|
|
|
|
, path_to_archive(path_to_archive_)
|
|
|
|
, filename(filename_)
|
2023-05-29 17:10:03 +00:00
|
|
|
{
|
|
|
|
handle.locateFile(filename_);
|
|
|
|
}
|
|
|
|
|
|
|
|
off_t seek(off_t off, int whence) override
|
|
|
|
{
|
|
|
|
off_t current_pos = getPosition();
|
|
|
|
off_t new_pos;
|
|
|
|
if (whence == SEEK_SET)
|
|
|
|
new_pos = off;
|
|
|
|
else if (whence == SEEK_CUR)
|
|
|
|
new_pos = off + current_pos;
|
|
|
|
else
|
|
|
|
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Only SEEK_SET and SEEK_CUR seek modes allowed.");
|
|
|
|
|
|
|
|
if (new_pos == current_pos)
|
|
|
|
return current_pos; /// The position is the same.
|
|
|
|
|
|
|
|
if (new_pos < 0)
|
|
|
|
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound");
|
|
|
|
|
|
|
|
off_t working_buffer_start_pos = current_pos - offset();
|
|
|
|
off_t working_buffer_end_pos = current_pos + available();
|
|
|
|
|
|
|
|
if ((working_buffer_start_pos <= new_pos) && (new_pos <= working_buffer_end_pos))
|
|
|
|
{
|
|
|
|
/// The new position is still inside the buffer.
|
|
|
|
position() += new_pos - current_pos;
|
|
|
|
return new_pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check that the new position is now beyond the end of the file.
|
|
|
|
if (new_pos > archive_entry_size(handle.entry))
|
|
|
|
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound");
|
|
|
|
|
|
|
|
ignore(new_pos - current_pos);
|
|
|
|
return new_pos;
|
|
|
|
}
|
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
off_t getPosition() override { return archive_entry_size(handle.entry) - available(); }
|
2023-05-29 17:10:03 +00:00
|
|
|
|
|
|
|
String getFileName() const override { return filename; }
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
bool nextImpl() override
|
|
|
|
{
|
|
|
|
auto bytes_read = archive_read_data(handle.archive, internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
|
|
|
|
|
|
|
|
if (!bytes_read)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
working_buffer = internal_buffer;
|
|
|
|
working_buffer.resize(bytes_read);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
Handle handle;
|
|
|
|
const String path_to_archive;
|
|
|
|
const String filename;
|
|
|
|
};
|
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
TarArchiveReader::TarArchiveReader(const String & path_to_archive_) : path_to_archive(path_to_archive_)
|
|
|
|
{
|
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-07-28 13:00:35 +00:00
|
|
|
TarArchiveReader::TarArchiveReader(const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_)
|
|
|
|
: path_to_archive(path_to_archive_), archive_read_function(archive_read_function_)
|
2023-05-29 20:08:18 +00:00
|
|
|
{
|
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-07-28 11:55:23 +00:00
|
|
|
TarArchiveReader::~TarArchiveReader() = default;
|
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
bool TarArchiveReader::fileExists(const String & filename)
|
|
|
|
{
|
|
|
|
Handle handle(path_to_archive);
|
|
|
|
return handle.locateFile(filename);
|
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
TarArchiveReader::FileInfo TarArchiveReader::getFileInfo(const String & filename)
|
|
|
|
{
|
|
|
|
Handle handle(path_to_archive);
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
handle.locateFile(filename);
|
|
|
|
FileInfo info;
|
|
|
|
info.uncompressed_size = archive_entry_size(handle.entry);
|
|
|
|
info.compressed_size = archive_entry_size(handle.entry);
|
|
|
|
info.is_encrypted = false;
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
return info;
|
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
std::unique_ptr<TarArchiveReader::FileEnumerator> TarArchiveReader::firstFile()
|
|
|
|
{
|
2023-07-28 13:00:35 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implementaed for tar archives");
|
2023-05-29 20:08:18 +00:00
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
std::unique_ptr<ReadBufferFromFileBase> TarArchiveReader::readFile(const String & filename)
|
|
|
|
{
|
|
|
|
Handle handle(path_to_archive);
|
|
|
|
handle.locateFile(filename);
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-05-29 20:08:18 +00:00
|
|
|
return std::make_unique<ReadBufferFromTarArchive>(path_to_archive, filename);
|
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-07-28 13:00:35 +00:00
|
|
|
std::unique_ptr<ReadBufferFromFileBase> TarArchiveReader::readFile(std::unique_ptr<FileEnumerator> /*enumerator*/)
|
2023-05-29 20:08:18 +00:00
|
|
|
{
|
2023-07-28 13:00:35 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implementaed for tar archives");
|
2023-05-29 20:08:18 +00:00
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-07-28 13:00:35 +00:00
|
|
|
std::unique_ptr<TarArchiveReader::FileEnumerator> TarArchiveReader::nextFile(std::unique_ptr<ReadBuffer> /*read_buffer*/)
|
2023-05-29 20:08:18 +00:00
|
|
|
{
|
2023-07-28 13:00:35 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implementaed for tar archives");
|
2023-05-29 20:08:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-07-28 13:00:35 +00:00
|
|
|
void TarArchiveReader::setPassword(const String & /*password_*/)
|
2023-05-29 20:08:18 +00:00
|
|
|
{
|
2023-07-28 13:00:35 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to tar archive");
|
2023-05-29 20:08:18 +00:00
|
|
|
}
|
2023-05-29 17:10:03 +00:00
|
|
|
|
2023-07-28 13:00:35 +00:00
|
|
|
#endif
|
|
|
|
|
2023-05-29 17:10:03 +00:00
|
|
|
}
|