Merge pull request #40426 from ClickHouse/util-compressor-exec

Self-extracting: add --exec param, allow compressed to be renamed
This commit is contained in:
Yakov Olkhovskiy 2022-08-23 11:20:28 -04:00 committed by GitHub
commit d11d494587
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 103 additions and 29 deletions

View File

@ -7,6 +7,7 @@
#include <cstdio>
#include <cstring>
#include <cerrno>
#include <iomanip>
#include <memory>
#include <iostream>
@ -76,9 +77,10 @@ int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offse
}
/// compress data from opened file into output file
int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct stat & info_in)
int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct stat & info_in, uint64_t & compressed_size)
{
off_t in_offset = 0;
compressed_size = 0;
/// mmap files
char * input = static_cast<char*>(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE, in_fd, 0));
@ -141,6 +143,8 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
return 1;
}
uint64_t total_size = 0;
/// Compress data
while (in_offset < info_in.st_size)
{
@ -171,8 +175,15 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
}
pointer += current_block_size;
printf("...block compression rate: %.2f%%\n", static_cast<float>(current_block_size) / size * 100);
total_size += size;
compressed_size += current_block_size;
current_block_size = 0;
}
std::cout <<
"Compressed size: " << compressed_size <<
", compression rate: " << std::fixed << std::setprecision(2) <<
static_cast<float>(compressed_size) / total_size * 100 << "%"
<< std::endl;
if (0 != munmap(input, info_in.st_size) ||
0 != munmap(output, 2 * max_block_size))
@ -187,7 +198,7 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
}
/// Save Metadata at the end of file
int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& metadata,
int saveMetaData(const char* filenames[], int count, int output_fd, const MetaData& metadata,
FileData* files_data, size_t pointer, size_t sum_file_size)
{
/// Allocate memory for metadata
@ -228,21 +239,36 @@ int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& me
}
/// Fills metadata and calls compression function for each file
int compressFiles(char* filenames[], int count, int output_fd, int level, const struct stat& info_out)
int compressFiles(const char* out_name, const char* exec, char* filenames[], int count, int output_fd, int level, const struct stat& info_out)
{
MetaData metadata;
size_t sum_file_size = 0;
metadata.number_of_files = htole64(count);
int is_exec = exec && *exec ? 1 : 0;
metadata.number_of_files = htole64(count + is_exec);
off_t pointer = info_out.st_size;
/// Store information about each file and compress it
FileData* files_data = new FileData[count];
char * names[count];
for (int i = 0; i < count; ++i)
{
printf("Compressing: %s\n", filenames[i]);
uint64_t total_size = 0;
uint64_t total_compressed_size = 0;
int input_fd = open(filenames[i], O_RDONLY);
/// Store information about each file and compress it
FileData* files_data = new FileData[count + is_exec];
const char * names[count + is_exec];
for (int i = 0; i <= count; ++i)
{
const char* filename = nullptr;
if (i == count)
{
if (!is_exec)
continue;
filename = exec;
files_data[i].exec = true;
}
else
filename = filenames[i];
printf("Compressing: %s\n", filename);
int input_fd = open(filename, O_RDONLY);
if (input_fd == -1)
{
perror(nullptr);
@ -253,14 +279,17 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
/// Remember information about file name
/// This should be made after the file is opened
/// because filename should be extracted from path
names[i] = strrchr(filenames[i], '/');
names[i] = strrchr(filename, '/');
if (names[i])
++names[i];
else
names[i] = filenames[i];
names[i] = filename;
size_t nlen = strlen(names[i]) + 1;
files_data[i].name_length = htole64(nlen);
sum_file_size += nlen;
/// if no --exec is specified nor it's empty - file which is matching output name is executable
if (!is_exec && !exec && strcmp(names[i], out_name) == 0)
files_data[i].exec = true;
/// read data about input file
struct stat info_in;
@ -278,6 +307,7 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
}
std::cout << "Size: " << info_in.st_size << std::endl;
total_size += info_in.st_size;
/// Save umask
files_data[i].umask = htole64(info_in.st_mode);
@ -287,15 +317,19 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
files_data[i].uncompressed_size = htole64(info_in.st_size);
files_data[i].start = htole64(pointer);
uint64_t compressed_size = 0;
/// Compressed data will be added to the end of file
/// It will allow to create self extracting executable from file
if (0 != compress(input_fd, output_fd, level, pointer, info_in))
if (0 != compress(input_fd, output_fd, level, pointer, info_in, compressed_size))
{
perror(nullptr);
delete [] files_data;
return 1;
}
total_compressed_size += compressed_size;
/// This error is less important, than others.
/// If file cannot be closed, in some cases it will lead to
/// error in other function that will stop compression process
@ -308,12 +342,16 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
/// save location of files information
metadata.start_of_files_data = htole64(pointer);
if (0 != saveMetaData(names, count, output_fd, metadata, files_data, pointer, sum_file_size))
if (0 != saveMetaData(names, count + is_exec, output_fd, metadata, files_data, pointer, sum_file_size))
{
delete [] files_data;
return 1;
}
std::cout << "Compression rate: " << std::fixed << std::setprecision(2) <<
static_cast<float>(total_compressed_size) / total_size * 100 << "%"
<< std::endl;
delete [] files_data;
return 0;
}
@ -424,10 +462,14 @@ int copy_decompressor_file(const char *path, int output_fd)
inline void usage(FILE * out, const char * name)
{
(void)fprintf(out,
"%s [--level=<level>] [--decompressor=<path>] <output_file> <input_file> [... <input_file>]\n"
"%s [--level=<level>] [--decompressor=<path>] [--exec=<path>] <output_file> [<input_file> [... <input_file>]]\n"
"\t--level - compression level, max is %d, negative - prefer speed over compression\n"
"\t default is 5\n"
"\t--decompressor - path to decompressor\n",
"\t--decompressor - path to decompressor\n"
"\t--exec - path to an input file to execute after decompression, if omitted then\n"
"\t an <input_file> having the same name as <output_file> becomes such executable.\n"
"\t This executable upon decompression will substitute started compressed preserving compressed name.\n"
"\t If no <path> is specified - nothing will be run - only decompression will be performed.\n",
name, ZSTD_maxCLevel());
}
@ -497,7 +539,12 @@ int main(int argc, char* argv[])
++start_of_files;
}
if (argc < start_of_files + 1)
/// Specified executable
const char * exec = get_param(argc, argv, "exec");
if (exec != nullptr)
++start_of_files;
if (argc < start_of_files + (exec == nullptr || *exec == 0 ? 1 : 0))
{
usage(stderr, argv[0]);
return 1;
@ -516,6 +563,12 @@ int main(int argc, char* argv[])
perror(nullptr);
return 1;
}
const char* out_name = strrchr(argv[start_of_files], '/');
if (out_name)
++out_name;
else
out_name = argv[start_of_files];
++start_of_files;
if (decompressor != nullptr)
@ -536,7 +589,7 @@ int main(int argc, char* argv[])
}
std::cout << "Compression with level: " << level << std::endl;
if (0 != compressFiles(&argv[start_of_files], argc - start_of_files, output_fd, level, info_out))
if (0 != compressFiles(out_name, exec, &argv[start_of_files], argc - start_of_files, output_fd, level, info_out))
{
printf("Compression failed.\n");
close(output_fd);

View File

@ -168,7 +168,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
/// Read data about files and decomrpess them.
int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, char * decompressed_suffix, uint64_t * decompressed_umask)
int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask)
{
/// Read data about output file.
/// Compressed data will replace data in file
@ -226,8 +226,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
file_info = *reinterpret_cast<FileData*>(input + files_pointer);
files_pointer += sizeof(FileData);
size_t file_name_len =
(strcmp(input + files_pointer, name) ? le64toh(file_info.name_length) : le64toh(file_info.name_length) + 13 + 7);
/// for output filename matching compressed allow additional 13 + 7 symbols for ".decompressed.XXXXXX" suffix
size_t file_name_len = file_info.exec ? strlen(name) + 13 + 7 + 1 : le64toh(file_info.name_length);
size_t file_path_len = path ? strlen(path) + 1 + file_name_len : file_name_len;
@ -238,9 +238,22 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
strcat(file_name, path);
strcat(file_name, "/");
}
strcat(file_name, input + files_pointer);
bool same_name = false;
if (file_info.exec)
{
has_exec = true;
strcat(file_name, name);
}
else
{
if (strcmp(name, input + files_pointer) == 0)
same_name = true;
strcat(file_name, input + files_pointer);
}
files_pointer += le64toh(file_info.name_length);
if (file_name_len != le64toh(file_info.name_length))
if (file_info.exec || same_name)
{
strcat(file_name, ".decompressed.XXXXXX");
int fd = mkstemp(file_name);
@ -377,11 +390,12 @@ int main(int/* argc*/, char* argv[])
}
bool have_compressed_analoge = false;
bool has_exec = false;
char decompressed_suffix[7] = {0};
uint64_t decompressed_umask = 0;
/// Decompress all files
if (0 != decompressFiles(input_fd, path, name, have_compressed_analoge, decompressed_suffix, &decompressed_umask))
if (0 != decompressFiles(input_fd, path, name, have_compressed_analoge, has_exec, decompressed_suffix, &decompressed_umask))
{
printf("Error happened during decompression.\n");
if (0 != close(input_fd))
@ -427,10 +441,15 @@ int main(int/* argc*/, char* argv[])
return 1;
}
execv(self, argv);
if (has_exec)
{
execv(self, argv);
/// This part of code will be reached only if error happened
perror("execv");
return 1;
/// This part of code will be reached only if error happened
perror("execv");
return 1;
}
printf("No target executable - decompression only was performed.\n");
}
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <cstdint>
#include <sys/types.h>
#include <sys/stat.h>
@ -43,5 +44,6 @@ struct FileData
uint64_t name_length = 0;
uint64_t uncompressed_size = 0;
uint64_t umask = 0;
bool exec = false;
};