#include #include #include #include #include #include #include #include #include #include #include /** This program checks correctness of .mrk (marks) file for corresponding compressed .bin file. */ namespace DB { namespace ErrorCodes { extern const int TOO_LARGE_SIZE_COMPRESSED; } } /// Read and check header of compressed block. Print size of decompressed and compressed data. std::pair stat(DB::ReadBuffer & in, DB::WriteBuffer & out) { if (in.eof()) return {}; in.ignore(16); /// checksum char header[COMPRESSED_BLOCK_HEADER_SIZE]; in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE); UInt32 size_compressed = unalignedLoad(&header[1]); if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) throw DB::Exception("Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); UInt32 size_decompressed = unalignedLoad(&header[5]); return {size_compressed, size_decompressed}; } void checkCompressedHeaders(const std::string & mrk_path, const std::string & bin_path) { DB::ReadBufferFromFile mrk_in(mrk_path); DB::ReadBufferFromFile bin_in(bin_path, 4096); /// Small buffer size just to check header of compressed block. DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) { UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; DB::readBinary(offset_in_compressed_file, mrk_in); DB::readBinary(offset_in_decompressed_block, mrk_in); out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block << ". "; bin_in.seek(offset_in_compressed_file); auto sizes = stat(bin_in, out); out << "Block sizes: " << sizes.first << ", " << sizes.second << '\n' << DB::flush; } } void checkByCompressedReadBuffer(const std::string & mrk_path, const std::string & bin_path) { DB::ReadBufferFromFile mrk_in(mrk_path); DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0); DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); std::cerr << "MRKPATH:"<