mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #40426 from ClickHouse/util-compressor-exec
Self-extracting: add --exec param, allow compressed to be renamed
This commit is contained in:
commit
d11d494587
@ -7,6 +7,7 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cerrno>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
|
||||
@ -76,9 +77,10 @@ int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offse
|
||||
}
|
||||
|
||||
/// compress data from opened file into output file
|
||||
int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct stat & info_in)
|
||||
int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct stat & info_in, uint64_t & compressed_size)
|
||||
{
|
||||
off_t in_offset = 0;
|
||||
compressed_size = 0;
|
||||
|
||||
/// mmap files
|
||||
char * input = static_cast<char*>(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE, in_fd, 0));
|
||||
@ -141,6 +143,8 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint64_t total_size = 0;
|
||||
|
||||
/// Compress data
|
||||
while (in_offset < info_in.st_size)
|
||||
{
|
||||
@ -171,8 +175,15 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
|
||||
}
|
||||
pointer += current_block_size;
|
||||
printf("...block compression rate: %.2f%%\n", static_cast<float>(current_block_size) / size * 100);
|
||||
total_size += size;
|
||||
compressed_size += current_block_size;
|
||||
current_block_size = 0;
|
||||
}
|
||||
std::cout <<
|
||||
"Compressed size: " << compressed_size <<
|
||||
", compression rate: " << std::fixed << std::setprecision(2) <<
|
||||
static_cast<float>(compressed_size) / total_size * 100 << "%"
|
||||
<< std::endl;
|
||||
|
||||
if (0 != munmap(input, info_in.st_size) ||
|
||||
0 != munmap(output, 2 * max_block_size))
|
||||
@ -187,7 +198,7 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
|
||||
}
|
||||
|
||||
/// Save Metadata at the end of file
|
||||
int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& metadata,
|
||||
int saveMetaData(const char* filenames[], int count, int output_fd, const MetaData& metadata,
|
||||
FileData* files_data, size_t pointer, size_t sum_file_size)
|
||||
{
|
||||
/// Allocate memory for metadata
|
||||
@ -228,21 +239,36 @@ int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& me
|
||||
}
|
||||
|
||||
/// Fills metadata and calls compression function for each file
|
||||
int compressFiles(char* filenames[], int count, int output_fd, int level, const struct stat& info_out)
|
||||
int compressFiles(const char* out_name, const char* exec, char* filenames[], int count, int output_fd, int level, const struct stat& info_out)
|
||||
{
|
||||
MetaData metadata;
|
||||
size_t sum_file_size = 0;
|
||||
metadata.number_of_files = htole64(count);
|
||||
int is_exec = exec && *exec ? 1 : 0;
|
||||
metadata.number_of_files = htole64(count + is_exec);
|
||||
off_t pointer = info_out.st_size;
|
||||
|
||||
/// Store information about each file and compress it
|
||||
FileData* files_data = new FileData[count];
|
||||
char * names[count];
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
printf("Compressing: %s\n", filenames[i]);
|
||||
uint64_t total_size = 0;
|
||||
uint64_t total_compressed_size = 0;
|
||||
|
||||
int input_fd = open(filenames[i], O_RDONLY);
|
||||
/// Store information about each file and compress it
|
||||
FileData* files_data = new FileData[count + is_exec];
|
||||
const char * names[count + is_exec];
|
||||
for (int i = 0; i <= count; ++i)
|
||||
{
|
||||
const char* filename = nullptr;
|
||||
if (i == count)
|
||||
{
|
||||
if (!is_exec)
|
||||
continue;
|
||||
filename = exec;
|
||||
files_data[i].exec = true;
|
||||
}
|
||||
else
|
||||
filename = filenames[i];
|
||||
|
||||
printf("Compressing: %s\n", filename);
|
||||
|
||||
int input_fd = open(filename, O_RDONLY);
|
||||
if (input_fd == -1)
|
||||
{
|
||||
perror(nullptr);
|
||||
@ -253,14 +279,17 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
|
||||
/// Remember information about file name
|
||||
/// This should be made after the file is opened
|
||||
/// because filename should be extracted from path
|
||||
names[i] = strrchr(filenames[i], '/');
|
||||
names[i] = strrchr(filename, '/');
|
||||
if (names[i])
|
||||
++names[i];
|
||||
else
|
||||
names[i] = filenames[i];
|
||||
names[i] = filename;
|
||||
size_t nlen = strlen(names[i]) + 1;
|
||||
files_data[i].name_length = htole64(nlen);
|
||||
sum_file_size += nlen;
|
||||
/// if no --exec is specified nor it's empty - file which is matching output name is executable
|
||||
if (!is_exec && !exec && strcmp(names[i], out_name) == 0)
|
||||
files_data[i].exec = true;
|
||||
|
||||
/// read data about input file
|
||||
struct stat info_in;
|
||||
@ -278,6 +307,7 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
|
||||
}
|
||||
|
||||
std::cout << "Size: " << info_in.st_size << std::endl;
|
||||
total_size += info_in.st_size;
|
||||
|
||||
/// Save umask
|
||||
files_data[i].umask = htole64(info_in.st_mode);
|
||||
@ -287,15 +317,19 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
|
||||
files_data[i].uncompressed_size = htole64(info_in.st_size);
|
||||
files_data[i].start = htole64(pointer);
|
||||
|
||||
uint64_t compressed_size = 0;
|
||||
|
||||
/// Compressed data will be added to the end of file
|
||||
/// It will allow to create self extracting executable from file
|
||||
if (0 != compress(input_fd, output_fd, level, pointer, info_in))
|
||||
if (0 != compress(input_fd, output_fd, level, pointer, info_in, compressed_size))
|
||||
{
|
||||
perror(nullptr);
|
||||
delete [] files_data;
|
||||
return 1;
|
||||
}
|
||||
|
||||
total_compressed_size += compressed_size;
|
||||
|
||||
/// This error is less important, than others.
|
||||
/// If file cannot be closed, in some cases it will lead to
|
||||
/// error in other function that will stop compression process
|
||||
@ -308,12 +342,16 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
|
||||
/// save location of files information
|
||||
metadata.start_of_files_data = htole64(pointer);
|
||||
|
||||
if (0 != saveMetaData(names, count, output_fd, metadata, files_data, pointer, sum_file_size))
|
||||
if (0 != saveMetaData(names, count + is_exec, output_fd, metadata, files_data, pointer, sum_file_size))
|
||||
{
|
||||
delete [] files_data;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "Compression rate: " << std::fixed << std::setprecision(2) <<
|
||||
static_cast<float>(total_compressed_size) / total_size * 100 << "%"
|
||||
<< std::endl;
|
||||
|
||||
delete [] files_data;
|
||||
return 0;
|
||||
}
|
||||
@ -424,10 +462,14 @@ int copy_decompressor_file(const char *path, int output_fd)
|
||||
inline void usage(FILE * out, const char * name)
|
||||
{
|
||||
(void)fprintf(out,
|
||||
"%s [--level=<level>] [--decompressor=<path>] <output_file> <input_file> [... <input_file>]\n"
|
||||
"%s [--level=<level>] [--decompressor=<path>] [--exec=<path>] <output_file> [<input_file> [... <input_file>]]\n"
|
||||
"\t--level - compression level, max is %d, negative - prefer speed over compression\n"
|
||||
"\t default is 5\n"
|
||||
"\t--decompressor - path to decompressor\n",
|
||||
"\t--decompressor - path to decompressor\n"
|
||||
"\t--exec - path to an input file to execute after decompression, if omitted then\n"
|
||||
"\t an <input_file> having the same name as <output_file> becomes such executable.\n"
|
||||
"\t This executable upon decompression will substitute started compressed preserving compressed name.\n"
|
||||
"\t If no <path> is specified - nothing will be run - only decompression will be performed.\n",
|
||||
name, ZSTD_maxCLevel());
|
||||
}
|
||||
|
||||
@ -497,7 +539,12 @@ int main(int argc, char* argv[])
|
||||
++start_of_files;
|
||||
}
|
||||
|
||||
if (argc < start_of_files + 1)
|
||||
/// Specified executable
|
||||
const char * exec = get_param(argc, argv, "exec");
|
||||
if (exec != nullptr)
|
||||
++start_of_files;
|
||||
|
||||
if (argc < start_of_files + (exec == nullptr || *exec == 0 ? 1 : 0))
|
||||
{
|
||||
usage(stderr, argv[0]);
|
||||
return 1;
|
||||
@ -516,6 +563,12 @@ int main(int argc, char* argv[])
|
||||
perror(nullptr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char* out_name = strrchr(argv[start_of_files], '/');
|
||||
if (out_name)
|
||||
++out_name;
|
||||
else
|
||||
out_name = argv[start_of_files];
|
||||
++start_of_files;
|
||||
|
||||
if (decompressor != nullptr)
|
||||
@ -536,7 +589,7 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
|
||||
std::cout << "Compression with level: " << level << std::endl;
|
||||
if (0 != compressFiles(&argv[start_of_files], argc - start_of_files, output_fd, level, info_out))
|
||||
if (0 != compressFiles(out_name, exec, &argv[start_of_files], argc - start_of_files, output_fd, level, info_out))
|
||||
{
|
||||
printf("Compression failed.\n");
|
||||
close(output_fd);
|
||||
|
@ -168,7 +168,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
|
||||
|
||||
|
||||
/// Read data about files and decomrpess them.
|
||||
int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, char * decompressed_suffix, uint64_t * decompressed_umask)
|
||||
int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask)
|
||||
{
|
||||
/// Read data about output file.
|
||||
/// Compressed data will replace data in file
|
||||
@ -226,8 +226,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
|
||||
file_info = *reinterpret_cast<FileData*>(input + files_pointer);
|
||||
files_pointer += sizeof(FileData);
|
||||
|
||||
size_t file_name_len =
|
||||
(strcmp(input + files_pointer, name) ? le64toh(file_info.name_length) : le64toh(file_info.name_length) + 13 + 7);
|
||||
/// for output filename matching compressed allow additional 13 + 7 symbols for ".decompressed.XXXXXX" suffix
|
||||
size_t file_name_len = file_info.exec ? strlen(name) + 13 + 7 + 1 : le64toh(file_info.name_length);
|
||||
|
||||
size_t file_path_len = path ? strlen(path) + 1 + file_name_len : file_name_len;
|
||||
|
||||
@ -238,9 +238,22 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
|
||||
strcat(file_name, path);
|
||||
strcat(file_name, "/");
|
||||
}
|
||||
strcat(file_name, input + files_pointer);
|
||||
|
||||
bool same_name = false;
|
||||
if (file_info.exec)
|
||||
{
|
||||
has_exec = true;
|
||||
strcat(file_name, name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strcmp(name, input + files_pointer) == 0)
|
||||
same_name = true;
|
||||
strcat(file_name, input + files_pointer);
|
||||
}
|
||||
|
||||
files_pointer += le64toh(file_info.name_length);
|
||||
if (file_name_len != le64toh(file_info.name_length))
|
||||
if (file_info.exec || same_name)
|
||||
{
|
||||
strcat(file_name, ".decompressed.XXXXXX");
|
||||
int fd = mkstemp(file_name);
|
||||
@ -377,11 +390,12 @@ int main(int/* argc*/, char* argv[])
|
||||
}
|
||||
|
||||
bool have_compressed_analoge = false;
|
||||
bool has_exec = false;
|
||||
char decompressed_suffix[7] = {0};
|
||||
uint64_t decompressed_umask = 0;
|
||||
|
||||
/// Decompress all files
|
||||
if (0 != decompressFiles(input_fd, path, name, have_compressed_analoge, decompressed_suffix, &decompressed_umask))
|
||||
if (0 != decompressFiles(input_fd, path, name, have_compressed_analoge, has_exec, decompressed_suffix, &decompressed_umask))
|
||||
{
|
||||
printf("Error happened during decompression.\n");
|
||||
if (0 != close(input_fd))
|
||||
@ -427,10 +441,15 @@ int main(int/* argc*/, char* argv[])
|
||||
return 1;
|
||||
}
|
||||
|
||||
execv(self, argv);
|
||||
if (has_exec)
|
||||
{
|
||||
execv(self, argv);
|
||||
|
||||
/// This part of code will be reached only if error happened
|
||||
perror("execv");
|
||||
return 1;
|
||||
/// This part of code will be reached only if error happened
|
||||
perror("execv");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("No target executable - decompression only was performed.\n");
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
@ -43,5 +44,6 @@ struct FileData
|
||||
uint64_t name_length = 0;
|
||||
uint64_t uncompressed_size = 0;
|
||||
uint64_t umask = 0;
|
||||
bool exec = false;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user