From 8e7eb7f3fa739ef4d02594ca25fccb815a706a73 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 4 Aug 2023 08:59:55 +0000 Subject: [PATCH] Fix tests --- src/IO/Archives/LibArchiveReader.cpp | 169 ++++++++++-------- src/IO/Archives/LibArchiveReader.h | 25 ++- src/TableFunctions/ITableFunctionFileLike.cpp | 3 +- src/TableFunctions/TableFunctionFile.cpp | 1 + 4 files changed, 110 insertions(+), 88 deletions(-) diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index d819547c9bd..1686b12f37a 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -1,9 +1,11 @@ #include #include #include +#include #include +#include namespace DB { @@ -18,12 +20,11 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } - -template -class LibArchiveReader::Handle +class LibArchiveReader::Handle { public: - explicit Handle(const String & path_to_archive_) : path_to_archive(path_to_archive_) + explicit Handle(std::string path_to_archive_, bool lock_on_reading_) + : path_to_archive(path_to_archive_), lock_on_reading(lock_on_reading_) { current_archive = open(path_to_archive); current_entry = archive_entry_new(); @@ -40,11 +41,7 @@ public: ~Handle() { - if (current_archive) - { - archive_read_close(current_archive); - archive_read_free(current_archive); - } + close(current_archive); } bool locateFile(const std::string & filename) @@ -58,7 +55,7 @@ public: int err = ARCHIVE_OK; while (true) { - err = archive_read_next_header(current_archive, ¤t_entry); + err = readNextHeader(current_archive, ¤t_entry); if (err == ARCHIVE_RETRY) continue; @@ -80,59 +77,37 @@ public: int err = ARCHIVE_OK; do { - err = archive_read_next_header(current_archive, ¤t_entry); + err = readNextHeader(current_archive, ¤t_entry); } while (err == ARCHIVE_RETRY); checkError(err); return err == ARCHIVE_OK; } - static struct archive * open(const String & path_to_archive) - { - auto * archive = archive_read_new(); - archive_read_support_filter_all(archive); - archive_read_support_format_all(archive); - if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK) - throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open {} archive: {}", ArchiveInfo::name, quoteString(path_to_archive)); - - return archive; - } - std::vector getAllFiles(NameFilter filter) { auto * archive = open(path_to_archive); + SCOPE_EXIT( + close(archive); + ); + auto * entry = archive_entry_new(); std::vector files; - int error = archive_read_next_header(archive, &entry); + int error = readNextHeader(archive, &entry); while (error == ARCHIVE_OK || error == ARCHIVE_RETRY) { std::string name = archive_entry_pathname(entry); if (!filter || filter(name)) files.push_back(std::move(name)); - error = archive_read_next_header(archive, &entry); + error = readNextHeader(archive, &entry); } - archive_read_close(archive); - archive_read_free(archive); - checkError(error); return files; } - void checkError(int error) - { - if (error == ARCHIVE_FATAL) - throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Failed to read archive while fetching all files: {}", archive_error_string(current_archive)); - } - - void resetFileInfo() - { - file_name.reset(); - file_info.reset(); - } - const String & getFileName() const { if (!file_name) @@ -157,13 +132,67 @@ public: struct archive * current_archive; struct archive_entry * current_entry; private: + void checkError(int error) const + { + if (error == ARCHIVE_FATAL) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Failed to read archive while fetching all files: {}", archive_error_string(current_archive)); + } + + void resetFileInfo() + { + file_name.reset(); + file_info.reset(); + } + + static struct archive * open(const String & path_to_archive) + { + auto * archive = archive_read_new(); + try + { + archive_read_support_filter_all(archive); + archive_read_support_format_all(archive); + if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive: {}", quoteString(path_to_archive)); + } + catch (...) + { + close(archive); + throw; + } + + return archive; + } + + static void close(struct archive * archive) + { + if (archive) + { + archive_read_close(archive); + archive_read_free(archive); + } + } + + int readNextHeader(struct archive * archive, struct archive_entry ** entry) const + { + std::unique_lock lock(Handle::read_lock, std::defer_lock); + if (lock_on_reading) + lock.lock(); + + return archive_read_next_header(archive, entry); + } + const String path_to_archive; + + /// for some archive types when we are reading headers static variables are used + /// which are not thread-safe + const bool lock_on_reading = false; + static inline std::mutex read_lock; + mutable std::optional file_name; mutable std::optional file_info; }; -template -class LibArchiveReader::FileEnumeratorImpl : public FileEnumerator +class LibArchiveReader::FileEnumeratorImpl : public FileEnumerator { public: explicit FileEnumeratorImpl(Handle handle_) : handle(std::move(handle_)) {} @@ -178,8 +207,7 @@ private: Handle handle; }; -template -class LibArchiveReader::ReadBufferFromLibArchive : public ReadBufferFromFileBase +class LibArchiveReader::ReadBufferFromLibArchive : public ReadBufferFromFileBase { public: explicit ReadBufferFromLibArchive(Handle handle_, std::string path_to_archive_) @@ -228,63 +256,55 @@ private: size_t total_bytes_read = 0; }; -template -LibArchiveReader::LibArchiveReader(const String & path_to_archive_) : path_to_archive(path_to_archive_) +LibArchiveReader::LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_) + : archive_name(std::move(archive_name_)), lock_on_reading(lock_on_reading_), path_to_archive(std::move(path_to_archive_)) {} -template -LibArchiveReader::~LibArchiveReader() = default; +LibArchiveReader::~LibArchiveReader() = default; -template -const std::string & LibArchiveReader::getPath() const +const std::string & LibArchiveReader::getPath() const { return path_to_archive; } -template -bool LibArchiveReader::fileExists(const String & filename) +bool LibArchiveReader::fileExists(const String & filename) { - Handle handle(path_to_archive); + Handle handle(path_to_archive, lock_on_reading); return handle.locateFile(filename); } -template -LibArchiveReader::FileInfo LibArchiveReader::getFileInfo(const String & filename) +LibArchiveReader::FileInfo LibArchiveReader::getFileInfo(const String & filename) { - Handle handle(path_to_archive); + Handle handle(path_to_archive, lock_on_reading); if (!handle.locateFile(filename)) throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: file not found", path_to_archive); return handle.getFileInfo(); } -template -std::unique_ptr::FileEnumerator> LibArchiveReader::firstFile() +std::unique_ptr LibArchiveReader::firstFile() { - Handle handle(path_to_archive); + Handle handle(path_to_archive, lock_on_reading); if (!handle.nextFile()) return nullptr; return std::make_unique(std::move(handle)); } -template -std::unique_ptr LibArchiveReader::readFile(const String & filename) +std::unique_ptr LibArchiveReader::readFile(const String & filename) { return readFile([&](const std::string & file) { return file == filename; }); } -template -std::unique_ptr LibArchiveReader::readFile(NameFilter filter) +std::unique_ptr LibArchiveReader::readFile(NameFilter filter) { - Handle handle(path_to_archive); + Handle handle(path_to_archive, lock_on_reading); if (!handle.locateFile(filter)) throw Exception( ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive); return std::make_unique(std::move(handle), path_to_archive); } -template -std::unique_ptr LibArchiveReader::readFile(std::unique_ptr enumerator) +std::unique_ptr LibArchiveReader::readFile(std::unique_ptr enumerator) { if (!dynamic_cast(enumerator.get())) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong enumerator passed to readFile()"); @@ -293,8 +313,7 @@ std::unique_ptr LibArchiveReader::readFile( return std::make_unique(std::move(handle), path_to_archive); } -template std::unique_ptr::FileEnumerator> -LibArchiveReader::nextFile(std::unique_ptr read_buffer) +std::unique_ptr LibArchiveReader::nextFile(std::unique_ptr read_buffer) { if (!dynamic_cast(read_buffer.get())) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()"); @@ -305,28 +324,22 @@ LibArchiveReader::nextFile(std::unique_ptr read_buffer) return std::make_unique(std::move(handle)); } -template -std::vector LibArchiveReader::getAllFiles() +std::vector LibArchiveReader::getAllFiles() { return getAllFiles({}); } -template -std::vector LibArchiveReader::getAllFiles(NameFilter filter) +std::vector LibArchiveReader::getAllFiles(NameFilter filter) { - Handle handle(path_to_archive); + Handle handle(path_to_archive, lock_on_reading); return handle.getAllFiles(filter); } -template -void LibArchiveReader::setPassword(const String & /*password_*/) +void LibArchiveReader::setPassword(const String & /*password_*/) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", ArchiveInfo::name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", archive_name); } -template class LibArchiveReader; -template class LibArchiveReader; - #endif } diff --git a/src/IO/Archives/LibArchiveReader.h b/src/IO/Archives/LibArchiveReader.h index 86127fa6953..700e8f70d04 100644 --- a/src/IO/Archives/LibArchiveReader.h +++ b/src/IO/Archives/LibArchiveReader.h @@ -16,13 +16,9 @@ class ReadBufferFromFileBase; class SeekableReadBuffer; /// Implementation of IArchiveReader for reading archives using libarchive. -template class LibArchiveReader : public IArchiveReader { public: - /// Constructs an archive's reader that will read from a file in the local filesystem. - explicit LibArchiveReader(const String & path_to_archive_); - ~LibArchiveReader() override; const std::string & getPath() const override; @@ -52,18 +48,31 @@ public: /// Sets password used to decrypt the contents of the files in the archive. void setPassword(const String & password_) override; +protected: + /// Constructs an archive's reader that will read from a file in the local filesystem. + LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_); + private: class ReadBufferFromLibArchive; class Handle; class FileEnumeratorImpl; + const std::string archive_name; + const bool lock_on_reading; const String path_to_archive; }; -struct TarArchiveInfo { static constexpr std::string_view name = "tar"; }; -using TarArchiveReader = LibArchiveReader; -struct SevenZipArchiveInfo { static constexpr std::string_view name = "7z"; }; -using SevenZipArchiveReader = LibArchiveReader; +class TarArchiveReader : public LibArchiveReader +{ +public: + explicit TarArchiveReader(std::string path_to_archive) : LibArchiveReader("tar", /*lock_on_reading_=*/ true, std::move(path_to_archive)) { } +}; + +class SevenZipArchiveReader : public LibArchiveReader +{ +public: + explicit SevenZipArchiveReader(std::string path_to_archive) : LibArchiveReader("7z", /*lock_on_reading_=*/ false, std::move(path_to_archive)) { } +}; #endif diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index d99d0856da6..487826dc363 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -24,8 +24,7 @@ namespace ErrorCodes void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, const ContextPtr &) { - String path = checkAndGetLiteralArgument(arg, "source"); - StorageFile::parseFileSource(std::move(path), filename, path_to_archive); + filename = checkAndGetLiteralArgument(arg, "source"); } String ITableFunctionFileLike::getFormatFromFirstArgument() diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 4b0e71ba60c..56a6839ddbb 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -25,6 +25,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr if (context->getApplicationType() != Context::ApplicationType::LOCAL) { ITableFunctionFileLike::parseFirstArguments(arg, context); + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); return; }