add compressor

This commit is contained in:
FArthur-cmd 2022-03-24 22:45:54 +03:00
parent 065305ab65
commit 5a40d868ea
4 changed files with 154 additions and 0 deletions

View File

@ -32,6 +32,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
add_subdirectory (check-mysql-binlog)
add_subdirectory (keeper-bench)
add_subdirectory (graphite-rollup)
add_subdirectory (self-extr-exec)
if (TARGET ch_contrib::nuraft)
add_subdirectory (keeper-data-dumper)

View File

@ -0,0 +1,5 @@
add_executable (compressor compressor.cpp)
target_link_libraries(compressor PUBLIC ch_contrib::zstd)
add_executable (decompressor decompressor.cpp)
target_link_libraries(compressor PUBLIC ch_contrib::zstd)

View File

@ -0,0 +1,141 @@
#include <iostream>
#include <zstd.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
/// Main compression part
int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offset,
off_t input_size, off_t output_size, ZSTD_CCtx * cctx)
{
size_t compressed_size = ZSTD_compress2(cctx, output + out_offset, output_size, input + in_offset, input_size);
if (ZSTD_isError(compressed_size))
{
std::cout << "Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(compressed_size)) << std::endl;
return 1;
}
in_offset += input_size;
out_offset += output_size;
return 0;
}
/// compress data from opened file into output file
int compress(int in_fd, int out_fd, int level=3)
{
/// read data about input file
struct stat info_in;
fstat(in_fd, &info_in);
if (info_in.st_size == 0) {
std::cout << "Empty input file" << std::endl;
return 1;
}
/// Read data about output file.
/// Compressed data will be added to the end of file
/// It will allow to create self extracting executable from file
struct stat info_out;
fstat(out_fd, &info_out);
/// As experiments showed, size of compressed file is 4 times less than clickhouse executable
/// Get a little bit more memory to prevent errors with size.
/// For compression this difference will not be huge
ftruncate(out_fd, info_out.st_size + info_in.st_size / 3);
off_t in_offset = 0, out_offset = info_out.st_size;
/// mmap files
char * input = static_cast<char*>(mmap(nullptr, info_in.st_size, PROT_READ | PROT_EXEC, MAP_SHARED , in_fd, 0));
char * output = static_cast<char*>(mmap(nullptr, info_out.st_size + info_in.st_size / 3, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0));
if (input == reinterpret_cast<char*>(-1) || output == reinterpret_cast<char*>(-1))
{
std::cout << (input == reinterpret_cast<char*>(-1)) << " " << (output == reinterpret_cast<char*>(-1)) << std::endl;
perror(nullptr);
return 1;
}
/// Create context
ZSTD_CCtx * cctx = ZSTD_createCCtx();
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
// ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, 9);
/// Remember size of file. It will help to avoid using additional memory
/// during decompression
// char * file_size = reinterpret_cast<char *>(&info_in.st_size);
// for (size_t i = 0; i < sizeof(info_in.st_size)/sizeof(char); ++i)
// output[out_offset++] = *file_size;
/// limits for size of block to prevent high memory usage or bad compression
// off_t max_block_size = 100000000ull;
// off_t min_block_size = 10000000ull;
off_t size = 0;
/// Compress data
while (in_offset < info_in.st_size)
{
/// take blocks of maximum size
/// optimize last block (it can be bigger, if it is not too huge)
// if (info_in.st_size - in_offset < max_block_size || info_in.st_size - in_offset < max_block_size + min_block_size)
// size = info_in.st_size - in_offset;
// else
// size = max_block_size;
size = info_in.st_size - in_offset;
/// Compress data or exit if error happens
if (0 != doCompress(input, output, in_offset, out_offset, size, ZSTD_compressBound(size), cctx))
{
ftruncate(out_fd, info_out.st_size);
munmap(input, info_in.st_size);
munmap(output, info_out.st_size + info_in.st_size / 3);
return 1;
}
std::cout << in_offset << " " << out_offset << std::endl;
}
/// Shrink file size and unmap
ftruncate(out_fd, out_offset);
munmap(input, info_in.st_size);
munmap(output, info_out.st_size + info_in.st_size / 3);
return 0;
}
int main(int argc, char* argv[])
{
if (argc < 3)
{
std::cout << "Not enough arguments.\ncompressor [file that should be compressed] [file name for compressed file] [OPTIONAL level of compression]" << std::endl;
return 0;
}
int input_fd = open(argv[1], O_RDWR);
if (input_fd == -1)
{
perror(nullptr);
return 0;
}
int output_fd = open(argv[2], O_RDWR | O_CREAT, 0777);
if (input_fd == -1)
{
perror(nullptr);
return 0;
}
int result;
if (argc == 4)
result = compress(input_fd, output_fd, strtol(argv[3], nullptr, 10));
else
result = compress(input_fd, output_fd);
if (result == 0)
std::cout << "Successfully compressed" << std::endl;
else
std::cout << "An error has occurred" << std::endl;
close(input_fd);
close(output_fd);
return 0;
}

View File

@ -0,0 +1,7 @@
#include <iostream>
int main()
{
std::cout << "Test" << std::endl;
return 0;
}