ClickHouse/utils/self-extr-exec/decompressor.cpp

375 lines
11 KiB
C++
Raw Normal View History

//#include <cstddef>
//#include <cstdio>
//#include <cstring>
2022-03-29 19:30:09 +00:00
#include <zstd.h>
#include <sys/mman.h>
2022-04-07 12:21:14 +00:00
#include <sys/statfs.h>
2022-03-29 19:30:09 +00:00
#include <fcntl.h>
#include <sys/wait.h>
2022-03-29 19:30:09 +00:00
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
2022-03-29 19:30:09 +00:00
#include "types.h"
2022-04-07 12:21:14 +00:00
2022-03-29 19:30:09 +00:00
/// decompress part
int doDecompress(char * input, char * output, off_t & in_offset, off_t & out_offset,
off_t input_size, off_t output_size, ZSTD_DCtx* dctx)
{
size_t decompressed_size = ZSTD_decompressDCtx(dctx, output + out_offset, output_size, input + in_offset, input_size);
if (ZSTD_isError(decompressed_size))
{
fprintf(stderr, "Error (ZSTD): %zu %s\n", decompressed_size, ZSTD_getErrorName(decompressed_size));
2022-03-29 19:30:09 +00:00
return 1;
}
return 0;
}
/// decompress data from in_fd into out_fd
int decompress(char * input, char * output, off_t start, off_t end, size_t max_number_of_forks=10)
2022-04-07 12:21:14 +00:00
{
off_t in_pointer = start, out_pointer = 0;
off_t size = 0;
off_t max_block_size = 1ull<<27;
off_t decompressed_size = 0;
size_t number_of_forks = 0;
2022-04-07 12:21:14 +00:00
/// Create context
ZSTD_DCtx * dctx = ZSTD_createDCtx();
if (dctx == nullptr)
{
fprintf(stderr, "Error (ZSTD): failed to create decompression context\n");
2022-04-07 12:21:14 +00:00
return 1;
}
pid_t pid;
bool error_happened = false;
2022-04-07 12:21:14 +00:00
/// Compress data
while (in_pointer < end && !error_happened)
2022-04-07 12:21:14 +00:00
{
size = ZSTD_findFrameCompressedSize(input + in_pointer, max_block_size);
if (ZSTD_isError(size))
{
fprintf(stderr, "Error (ZSTD): %zu %s\n", size, ZSTD_getErrorName(size));
error_happened = true;
break;
}
2022-04-07 12:21:14 +00:00
decompressed_size = ZSTD_getFrameContentSize(input + in_pointer, max_block_size);
if (ZSTD_isError(decompressed_size))
{
fprintf(stderr, "Error (ZSTD): %zu %s\n", decompressed_size, ZSTD_getErrorName(decompressed_size));
error_happened = true;
break;
}
pid = fork();
if (-1 == pid)
{
perror(nullptr);
/// If fork failed just decompress data in main process.
if (0 != doDecompress(input, output, in_pointer, out_pointer, size, max_block_size, dctx))
break;
}
else if (pid == 0)
{
/// Decompress data in child process.
if (0 != doDecompress(input, output, in_pointer, out_pointer, size, max_block_size, dctx))
exit(1);
exit(0);
}
else
{
++number_of_forks;
while (number_of_forks >= max_number_of_forks)
{
/// Wait any fork
int status;
waitpid(0, &status, 0);
/// If error happened, stop processing
if (WEXITSTATUS(status) != 0)
{
error_happened = true;
break;
}
--number_of_forks;
}
in_pointer += size;
out_pointer += decompressed_size;
}
}
/// wait for all working decompressions
while (number_of_forks > 0)
{
/// Wait any fork
int status;
waitpid(0, &status, 0);
if (WEXITSTATUS(status) != 0)
error_happened = true;
--number_of_forks;
2022-04-07 12:21:14 +00:00
}
/// If error happen end of processed part will not reach end
if (in_pointer < end || error_happened)
return 1;
2022-04-07 12:21:14 +00:00
return 0;
}
/// Read data about files and decomrpess them.
int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge)
2022-03-29 19:30:09 +00:00
{
/// Read data about output file.
/// Compressed data will replace data in file
struct stat info_in;
2022-04-07 12:21:14 +00:00
if (0 != fstat(input_fd, &info_in))
{
perror(nullptr);
return 1;
}
2022-03-29 19:30:09 +00:00
2022-04-07 12:21:14 +00:00
/// mmap input file
2022-04-07 12:31:41 +00:00
char * input = static_cast<char*>(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0));
2022-04-07 12:21:14 +00:00
if (input == MAP_FAILED)
2022-03-29 19:30:09 +00:00
{
perror(nullptr);
return 1;
}
2022-04-07 12:21:14 +00:00
/// Read metadata from end of file
MetaData metadata = *reinterpret_cast<MetaData*>(input + info_in.st_size - sizeof(MetaData));
2022-03-29 19:30:09 +00:00
2022-04-07 12:21:14 +00:00
/// Prepare to read information about files and decompress them
off_t files_pointer = metadata.start_of_files_data;
size_t decompressed_full_size = 0;
/// Read files metadata and check if decompression is possible
off_t check_pointer = metadata.start_of_files_data;
for (size_t i = 0; i < metadata.number_of_files; ++i)
{
FileData data = *reinterpret_cast<FileData*>(input + check_pointer);
decompressed_full_size += data.uncompressed_size;
check_pointer += sizeof(FileData) + data.name_length;
}
/// Check free space
struct statfs fs_info;
if (0 != fstatfs(input_fd, &fs_info))
2022-03-29 19:30:09 +00:00
{
perror(nullptr);
2022-04-07 12:21:14 +00:00
if (0 != munmap(input, info_in.st_size))
perror(nullptr);
2022-03-29 19:30:09 +00:00
return 1;
2022-04-07 12:21:14 +00:00
}
if (fs_info.f_blocks * info_in.st_blksize < decompressed_full_size)
{
fprintf(stderr, "Not enough space for decompression. Have %lu, need %zu.",
2022-04-07 12:21:14 +00:00
fs_info.f_blocks * info_in.st_blksize, decompressed_full_size);
return 1;
}
2022-03-29 19:30:09 +00:00
2022-04-07 12:21:14 +00:00
FileData file_info;
/// Decompress files with appropriate file names
for (size_t i = 0; i < metadata.number_of_files; ++i)
2022-03-29 19:30:09 +00:00
{
2022-04-07 12:21:14 +00:00
/// Read information about file
file_info = *reinterpret_cast<FileData*>(input + files_pointer);
files_pointer += sizeof(FileData);
2022-03-29 19:30:09 +00:00
size_t file_name_len =
(strcmp(input + files_pointer, name) ? file_info.name_length : file_info.name_length + 13);
size_t file_path_len = path ? strlen(path) + 1 + file_name_len : file_name_len;
char file_name[file_path_len];
memset(file_name, '\0', file_path_len);
if (path)
2022-04-07 12:21:14 +00:00
{
strcat(file_name, path);
strcat(file_name, "/");
2022-04-07 12:21:14 +00:00
}
strcat(file_name, input + files_pointer);
files_pointer += file_info.name_length;
if (file_name_len != file_info.name_length)
2022-04-07 12:21:14 +00:00
{
strcat(file_name, ".decompressed");
have_compressed_analoge = true;
2022-04-07 12:21:14 +00:00
}
int output_fd = open(file_name, O_RDWR | O_CREAT, file_info.umask);
2022-04-07 12:21:14 +00:00
if (output_fd == -1)
2022-03-29 19:30:09 +00:00
{
2022-04-07 12:21:14 +00:00
perror(nullptr);
if (0 != munmap(input, info_in.st_size))
perror(nullptr);
return 1;
2022-04-07 12:21:14 +00:00
}
/// Prepare output file
if (0 != ftruncate(output_fd, file_info.uncompressed_size))
{
perror(nullptr);
if (0 != munmap(input, info_in.st_size))
perror(nullptr);
return 1;
}
char * output = static_cast<char*>(
mmap(nullptr,
file_info.uncompressed_size,
PROT_READ | PROT_WRITE, MAP_SHARED,
output_fd,
2022-04-07 12:21:14 +00:00
0)
);
if (output == MAP_FAILED)
{
perror(nullptr);
if (0 != munmap(input, info_in.st_size))
perror(nullptr);
2022-03-29 19:30:09 +00:00
return 1;
}
2022-04-07 12:21:14 +00:00
/// Decompress data into file
if (0 != decompress(input, output, file_info.start, file_info.end))
{
if (0 != munmap(input, info_in.st_size))
perror(nullptr);
if (0 != munmap(output, file_info.uncompressed_size))
perror(nullptr);
return 1;
}
if (0 != fsync(output_fd))
perror(nullptr);
if (0 != close(output_fd))
perror(nullptr);
}
2022-04-07 12:21:14 +00:00
if (0 != munmap(input, info_in.st_size))
perror(nullptr);
2022-03-29 19:30:09 +00:00
return 0;
}
2022-03-24 19:45:54 +00:00
/// Copy particular part of command and update shift
2022-04-07 12:21:14 +00:00
void fill(char * dest, char * source, size_t length, size_t& shift)
{
memcpy(dest + shift, source, length);
shift += length;
}
/// Set command to `mv filename.decompressed filename && filename agrs...`
void fillCommand(char command[], int argc, char * argv[], size_t length)
{
memset(command, '\0', 3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc);
/// position in command
size_t shift = 0;
/// Support variables to create command
char mv[] = "mv ";
char decompressed[] = ".decompressed ";
char add_command[] = " && ";
char space[] = " ";
fill(command, mv, 3, shift);
fill(command, argv[0], strlen(argv[0]), shift);
fill(command, decompressed, 14, shift);
fill(command, argv[0], strlen(argv[0]), shift);
fill(command, add_command, 4, shift);
fill(command, argv[0], strlen(argv[0]), shift);
fill(command, space, 1, shift);
/// forward all arguments
for (int i = 1; i < argc; ++i)
{
fill(command, argv[i], strlen(argv[i]), shift);
if (i != argc - 1)
fill(command, space, 1, shift);
}
}
int main(int argc, char* argv[])
2022-03-24 19:45:54 +00:00
{
char file_path[strlen(argv[0]) + 1];
memset(file_path, 0, sizeof(file_path));
strcpy(file_path, argv[0]);
char * path = nullptr;
char * name = strrchr(file_path, '/');
if (name)
{
path = file_path;
*name = 0;
++name;
}
else
name = file_path;
2022-03-30 22:01:50 +00:00
int input_fd = open(argv[0], O_RDONLY);
2022-03-29 19:30:09 +00:00
if (input_fd == -1)
{
perror(nullptr);
return 1;
2022-03-29 19:30:09 +00:00
}
bool have_compressed_analoge = false;
2022-04-07 12:21:14 +00:00
/// Decompress all files
if (0 != decompressFiles(input_fd, path, name, have_compressed_analoge))
2022-03-29 19:30:09 +00:00
{
printf("Error happened during decompression.\n");
2022-04-07 12:21:14 +00:00
if (0 != close(input_fd))
perror(nullptr);
2022-03-29 19:30:09 +00:00
return 1;
}
2022-04-07 12:21:14 +00:00
if (0 != close(input_fd))
perror(nullptr);
/// According to documentation `mv` will rename file if it
2022-04-19 17:04:04 +00:00
/// doesn't move to other directory.
/// Sometimes `rename` doesn't exist by default and
/// `rename.ul` is set instead. It will lead to errors
/// that can be easily avoided with help of `mv`
2022-03-30 22:01:50 +00:00
if (!have_compressed_analoge)
{
printf("No target executable - decompression only was performed.\n");
/// remove file
execlp("rm", "rm", argv[0], NULL);
perror(nullptr);
return 1;
}
else
{
/// move decompressed file instead of this binary and apply command
char bash[] = "sh";
char executable[] = "-c";
/// length of forwarded args
size_t length = 0;
for (int i = 1; i < argc; ++i)
length += strlen(argv[i]);
/// mv filename.decompressed filename && filename agrs...
char command[3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc];
fillCommand(command, argc, argv, length);
/// replace file and call executable
char * newargv[] = { bash, executable, command, nullptr };
execvp(bash, newargv);
/// This part of code will be reached only if error happened
perror(nullptr);
return 1;
}
2022-03-24 19:45:54 +00:00
}