More formats supported, read single archive from 1 thread

This commit is contained in:
Antonio Andelic 2023-08-09 10:04:29 +00:00
parent 3ffffb0b5e
commit 0e17d26b88
18 changed files with 582 additions and 137 deletions

View File

@ -147,7 +147,7 @@ target_compile_definitions(_libarchive PUBLIC
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
if (TARGET ch_contrib::xz)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
endif()
@ -156,6 +156,16 @@ if (TARGET ch_contrib::zlib)
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
endif()
if (TARGET ch_contrib::zstd)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
endif()
if (TARGET ch_contrib::bzip2)
target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2)
endif()
if (OS_LINUX)
target_compile_definitions(
_libarchive PUBLIC

View File

@ -375,7 +375,7 @@ void BackupImpl::readBackupMetadata()
if (!archive_reader->fileExists(".backup"))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Archive {} is not a backup", backup_name_for_logging);
setCompressedSize();
in = archive_reader->readFile(".backup");
in = archive_reader->readFile(".backup", /*throw_on_not_found=*/true);
}
else
{
@ -685,7 +685,7 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecks
{
/// Make `read_buffer` if there is data for this backup entry in this backup.
if (use_archive)
read_buffer = archive_reader->readFile(info.data_file_name);
read_buffer = archive_reader->readFile(info.data_file_name, /*throw_on_not_found=*/true);
else
read_buffer = reader->readFile(info.data_file_name);
}

View File

@ -50,8 +50,8 @@ public:
/// Starts reading a file from the archive. The function returns a read buffer,
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) = 0;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) = 0;

View File

@ -155,7 +155,7 @@ private:
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive: {}", quoteString(path_to_archive));
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive {}: {}", quoteString(path_to_archive), archive_error_string(archive));
}
catch (...)
{
@ -293,17 +293,21 @@ std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::firstFile()
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
}
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(const String & filename)
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(const String & filename, bool throw_on_not_found)
{
return readFile([&](const std::string & file) { return file == filename; });
return readFile([&](const std::string & file) { return file == filename; }, throw_on_not_found);
}
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(NameFilter filter)
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(NameFilter filter, bool throw_on_not_found)
{
Handle handle(path_to_archive, lock_on_reading);
if (!handle.locateFile(filter))
throw Exception(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
{
if (throw_on_not_found)
throw Exception(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
return nullptr;
}
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
}

View File

@ -35,8 +35,8 @@ public:
/// Starts reading a file from the archive. The function returns a read buffer,
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) override;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;

View File

@ -75,21 +75,22 @@ public:
RawHandle getRawHandle() const { return raw_handle; }
std::shared_ptr<ZipArchiveReader> getReader() const { return reader; }
void locateFile(const String & file_name_)
bool locateFile(const String & file_name_)
{
resetFileInfo();
bool case_sensitive = true;
int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast<unzFileNameComparer>(static_cast<size_t>(case_sensitive)));
if (err == UNZ_END_OF_LIST_OF_FILE)
showError("File " + quoteString(file_name_) + " not found");
return false;
file_name = file_name_;
return true;
}
void locateFile(NameFilter filter)
bool locateFile(NameFilter filter)
{
int err = unzGoToFirstFile(raw_handle);
if (err == UNZ_END_OF_LIST_OF_FILE)
showError("No file was found satisfying the filter");
return false;
do
{
@ -97,12 +98,12 @@ public:
resetFileInfo();
retrieveFileInfo();
if (filter(getFileName()))
return;
return true;
err = unzGoToNextFile(raw_handle);
} while (err != UNZ_END_OF_LIST_OF_FILE);
showError("No file was found satisfying the filter");
return false;
}
bool tryLocateFile(const String & file_name_)
@ -513,7 +514,9 @@ bool ZipArchiveReader::fileExists(const String & filename)
ZipArchiveReader::FileInfo ZipArchiveReader::getFileInfo(const String & filename)
{
auto handle = acquireHandle();
handle.locateFile(filename);
if (!handle.locateFile(filename))
showError(fmt::format("File {} was not found in archive", quoteString(filename)));
return handle.getFileInfo();
}
@ -525,17 +528,31 @@ std::unique_ptr<ZipArchiveReader::FileEnumerator> ZipArchiveReader::firstFile()
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
}
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(const String & filename)
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(const String & filename, bool throw_on_not_found)
{
auto handle = acquireHandle();
handle.locateFile(filename);
if (!handle.locateFile(filename))
{
if (throw_on_not_found)
showError(fmt::format("File {} was not found in archive", quoteString(filename)));
return nullptr;
}
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
}
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(NameFilter filter)
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(NameFilter filter, bool throw_on_not_found)
{
auto handle = acquireHandle();
handle.locateFile(filter);
if (!handle.locateFile(filter))
{
if (throw_on_not_found)
showError(fmt::format("No file satisfying filter in archive"));
return nullptr;
}
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
}

View File

@ -41,8 +41,8 @@ public:
/// Starts reading a file from the archive. The function returns a read buffer,
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename, bool throw_on_not_found) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter, bool throw_on_not_found) override;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;

View File

@ -24,6 +24,18 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
[[maybe_unused]] const std::function<std::unique_ptr<SeekableReadBuffer>()> & archive_read_function,
[[maybe_unused]] size_t archive_size)
{
using namespace std::literals;
static constexpr std::array tar_extensions
{
".tar"sv,
".tar.gz"sv,
".tgz"sv,
".tar.zst"sv,
".tzst"sv,
".tar.xz"sv,
".tar.bz2"sv
};
if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx"))
{
#if USE_MINIZIP
@ -32,7 +44,8 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled");
#endif
}
else if (path_to_archive.ends_with(".tar") || path_to_archive.ends_with("tar.gz"))
else if (std::any_of(
tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); }))
{
#if USE_LIBARCHIVE
return std::make_shared<TarArchiveReader>(path_to_archive);

View File

@ -113,11 +113,11 @@ TEST_P(ArchiveReaderAndWriterTest, EmptyArchive)
EXPECT_FALSE(reader->fileExists("nofile.txt"));
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found",
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive",
[&]{ reader->getFileInfo("nofile.txt"); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found",
[&]{ reader->readFile("nofile.txt"); });
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' was not found in archive",
[&]{ reader->readFile("nofile.txt", /*throw_on_not_found=*/true); });
EXPECT_EQ(reader->firstFile(), nullptr);
}
@ -145,7 +145,7 @@ TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive)
EXPECT_GT(file_info.compressed_size, 0);
{
auto in = reader->readFile("a.txt");
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents);
@ -215,14 +215,14 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive)
EXPECT_EQ(reader->getFileInfo("b/c.txt").uncompressed_size, c_contents.size());
{
auto in = reader->readFile("a.txt");
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, a_contents);
}
{
auto in = reader->readFile("b/c.txt");
auto in = reader->readFile("b/c.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, c_contents);
@ -230,7 +230,7 @@ TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive)
{
/// Read a.txt again.
auto in = reader->readFile("a.txt");
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, a_contents);
@ -302,14 +302,14 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory)
EXPECT_EQ(reader->getFileInfo("b.txt").uncompressed_size, b_contents.size());
{
auto in = reader->readFile("a.txt");
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, a_contents);
}
{
auto in = reader->readFile("b.txt");
auto in = reader->readFile("b.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, b_contents);
@ -317,7 +317,7 @@ TEST_P(ArchiveReaderAndWriterTest, InMemory)
{
/// Read a.txt again.
auto in = reader->readFile("a.txt");
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, a_contents);
@ -343,19 +343,19 @@ TEST_P(ArchiveReaderAndWriterTest, Password)
/// Try to read without a password.
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required",
[&]{ reader->readFile("a.txt"); });
[&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); });
{
/// Try to read with a wrong password.
reader->setPassword("123Qwe");
expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password",
[&]{ reader->readFile("a.txt"); });
[&]{ reader->readFile("a.txt", /*throw_on_not_found=*/true); });
}
{
/// Reading with the right password is successful.
reader->setPassword("Qwe123");
auto in = reader->readFile("a.txt");
auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents);
@ -387,7 +387,7 @@ TEST(TarArchiveReaderTest, ReadFile) {
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
auto in = reader->readFile(filename);
auto in = reader->readFile(filename, /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents);
@ -405,11 +405,11 @@ TEST(TarArchiveReaderTest, ReadTwoFiles) {
auto reader = createArchiveReader(archive_path);
EXPECT_EQ(reader->fileExists(file1), true);
EXPECT_EQ(reader->fileExists(file2), true);
auto in = reader->readFile(file1);
auto in = reader->readFile(file1, /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents1);
in = reader->readFile(file2);
in = reader->readFile(file2, /*throw_on_not_found=*/true);
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents2);
@ -448,7 +448,7 @@ TEST(SevenZipArchiveReaderTest, ReadFile) {
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
auto in = reader->readFile(filename);
auto in = reader->readFile(filename, /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents);
@ -479,11 +479,11 @@ TEST(SevenZipArchiveReaderTest, ReadTwoFiles) {
auto reader = createArchiveReader(archive_path);
EXPECT_EQ(reader->fileExists(file1), true);
EXPECT_EQ(reader->fileExists(file2), true);
auto in = reader->readFile(file1);
auto in = reader->readFile(file1, /*throw_on_not_found=*/true);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents1);
in = reader->readFile(file2);
in = reader->readFile(file2, /*throw_on_not_found=*/true);
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents2);

View File

@ -399,11 +399,11 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
return reader->readFile([my_matcher = std::move(matcher)](const std::string & path)
{
return re2::RE2::FullMatch(path, *my_matcher);
});
}, /*throw_on_not_found=*/true);
}
else
{
return reader->readFile(current_path);
return reader->readFile(current_path, /*throw_on_not_found=*/true);
}
}
@ -721,28 +721,20 @@ public:
{
public:
explicit FilesIterator(
const Strings & files_, std::vector<std::string> archives_, std::vector<std::pair<uint64_t, std::string>> files_in_archive_)
: files(files_), archives(std::move(archives_)), files_in_archive(std::move(files_in_archive_))
const Strings & files_, std::vector<std::string> archives_, const IArchiveReader::NameFilter & name_filter_)
: files(files_), archives(std::move(archives_)), name_filter(name_filter_)
{
}
String next()
{
const auto & fs = fromArchive() ? archives : files;
auto current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= files.size())
if (current_index >= fs.size())
return "";
return files[current_index];
}
std::pair<String, String> nextFileFromArchive()
{
auto current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= files_in_archive.size())
return {"", ""};
const auto & [archive_index, filename] = files_in_archive[current_index];
return {archives[archive_index], filename};
return fs[current_index];
}
bool fromArchive() const
@ -750,10 +742,29 @@ public:
return !archives.empty();
}
bool readSingleFileFromArchive() const
{
return !name_filter;
}
IArchiveReader::NameFilter getNameFilter() const
{
return name_filter;
}
const String & getFileName()
{
if (files.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but got {}", files.size());
return files[0];
}
private:
std::vector<std::string> files;
std::vector<std::string> archives;
std::vector<std::pair<uint64_t, std::string>> files_in_archive;
IArchiveReader::NameFilter name_filter;
std::atomic<size_t> index = 0;
};
@ -863,25 +874,64 @@ public:
{
if (files_iterator->fromArchive())
{
auto [archive, filename] = files_iterator->nextFileFromArchive();
if (archive.empty())
return {};
current_path = std::move(filename);
if (!archive_reader || archive_reader->getPath() != archive)
if (files_iterator->readSingleFileFromArchive())
{
auto archive = files_iterator->next();
if (archive.empty())
return {};
struct stat file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName());
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
continue;
archive_reader = createArchiveReader(archive);
file_enumerator = archive_reader->firstFile();
current_path = files_iterator->getFileName();
read_buf = archive_reader->readFile(current_path, /*throw_on_not_found=*/false);
if (!read_buf)
continue;
}
if (file_enumerator == nullptr)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to find a file in archive {}", archive);
while (file_enumerator->getFileName() != current_path)
else
{
if (!file_enumerator->nextFile())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected file {} is missing from archive {}", current_path, archive);
auto name_filter = files_iterator->getNameFilter();
chassert(name_filter);
while (true)
{
if (file_enumerator == nullptr)
{
auto archive = files_iterator->next();
if (archive.empty())
return {};
struct stat file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName());
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
continue;
archive_reader = createArchiveReader(archive);
file_enumerator = archive_reader->firstFile();
continue;
}
bool file_found = true;
while (!name_filter(file_enumerator->getFileName()))
{
if (!file_enumerator->nextFile())
{
file_found = false;
break;
}
}
if (file_found)
{
current_path = file_enumerator->getFileName();
break;
}
file_enumerator = nullptr;
}
chassert(file_enumerator);
read_buf = archive_reader->readFile(std::move(file_enumerator));
}
}
else
@ -903,23 +953,12 @@ public:
if (!read_buf)
{
struct stat file_stat;
if (archive_reader == nullptr)
{
file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
continue;
}
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
continue;
if (archive_reader == nullptr)
{
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
}
else
{
chassert(file_enumerator);
read_buf = archive_reader->readFile(std::move(file_enumerator));
}
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
}
const Settings & settings = context->getSettingsRef();
@ -987,10 +1026,10 @@ public:
pipeline.reset();
input_format.reset();
if (archive_reader != nullptr)
if (files_iterator->fromArchive() && !files_iterator->readSingleFileFromArchive())
file_enumerator = archive_reader->nextFile(std::move(read_buf));
else
read_buf.reset();
read_buf.reset();
}
return {};
@ -1050,9 +1089,7 @@ Pipe StorageFile::read(
}
}
std::vector<std::pair<uint64_t, std::string>> files_in_archive;
size_t files_in_archive_num = 0;
IArchiveReader::NameFilter filter;
if (!paths_to_archive.empty())
{
if (paths.size() != 1)
@ -1060,7 +1097,6 @@ Pipe StorageFile::read(
const auto & path = paths[0];
IArchiveReader::NameFilter filter;
if (path.find_first_of("*?{") != std::string::npos)
{
auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(path));
@ -1073,32 +1109,14 @@ Pipe StorageFile::read(
return re2::RE2::FullMatch(p, *matcher);
};
}
for (size_t i = 0; i < paths_to_archive.size(); ++i)
{
if (filter)
{
const auto & path_to_archive = paths_to_archive[i];
auto archive_reader = createArchiveReader(path_to_archive);
auto files = archive_reader->getAllFiles(filter);
for (auto & file : files)
files_in_archive.push_back({i, std::move(file)});
}
else
{
files_in_archive.push_back({i, path});
}
}
files_in_archive_num = files_in_archive.size();
}
auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, paths_to_archive, std::move(files_in_archive));
auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, paths_to_archive, std::move(filter));
auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
size_t num_streams = max_num_streams;
auto files_to_read = std::max(files_in_archive_num, paths.size());
auto files_to_read = std::max(paths_to_archive.size(), paths.size());
if (max_num_streams > files_to_read)
num_streams = files_to_read;

View File

@ -919,7 +919,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock))
{
disable_reason = "Some parts have differ mmutatuon version";
disable_reason = "Some parts have different mutation version";
return false;
}

View File

@ -16,33 +16,35 @@ function read_archive_file() {
function run_archive_test() {
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
FILE_PREFIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_$1_"
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv
echo -e "5,6\n7,8" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv
echo -e "9,10\n11,12" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv
echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv
echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv
echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null"
eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null"
eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null"
echo "archive1 data1.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv"
read_archive_file "${FILE_PREFIX}_archive1.$1 :: ${FILE_PREFIX}_data1.csv"
echo "archive{1..2} data1.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv"
read_archive_file "${FILE_PREFIX}_archive{1..2}.$1 :: ${FILE_PREFIX}_data1.csv"
echo "archive{1,2} data{1,3}.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1,2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1,3}.csv"
read_archive_file "${FILE_PREFIX}_archive{1,2}.$1 :: ${FILE_PREFIX}_data{1,3}.csv"
echo "archive3 data*.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.csv"
read_archive_file "${FILE_PREFIX}_archive3.$1 :: ${FILE_PREFIX}_data*.csv"
echo "archive* *.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive*.$1 :: *.csv"
read_archive_file "${FILE_PREFIX}_archive*.$1 :: *.csv"
echo "archive* {2..3}.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive*.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{2..3}.csv"
read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..3}.$1
rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1..3}.csv
rm ${FILE_PREFIX}_data{1..3}.csv
}

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "tar.bz2" "tar -cjf"

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "tar.xz" "tar -cJf"

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "tzst" "tar -caf"