From ed4c9476eeebbc649a04dc007d7f20a37503f7ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jan 2018 02:54:23 +0300 Subject: [PATCH 001/151] Added experimental data scrambler; experiments in performance of LZ4_decompress_fast function [#CLICKHOUSE-2] --- utils/compressor/CMakeLists.txt | 6 + utils/compressor/decompress_perf.cpp | 279 ++++++++++++++++++ utils/compressor/mutator.cpp | 405 +++++++++++++++++++++++++++ 3 files changed, 690 insertions(+) create mode 100644 utils/compressor/decompress_perf.cpp create mode 100644 utils/compressor/mutator.cpp diff --git a/utils/compressor/CMakeLists.txt b/utils/compressor/CMakeLists.txt index aff97c839db..95b6b05cdad 100644 --- a/utils/compressor/CMakeLists.txt +++ b/utils/compressor/CMakeLists.txt @@ -8,3 +8,9 @@ set_target_properties(util-clickhouse-compressor PROPERTIES OUTPUT_NAME "clickho add_executable (zstd_test zstd_test.cpp) target_link_libraries (zstd_test ${ZSTD_LIBRARY} Threads::Threads) + +add_executable (mutator mutator.cpp) +target_link_libraries (mutator clickhouse_common_io) + +add_executable (decompress_perf decompress_perf.cpp) +target_link_libraries (decompress_perf clickhouse_common_io) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp new file mode 100644 index 00000000000..93b1e676bd9 --- /dev/null +++ b/utils/compressor/decompress_perf.cpp @@ -0,0 +1,279 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/** for i in *.bin; do ./decompress_perf < $i > /dev/null; done + */ + + +static void LZ4_wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) +{ + do + { + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), + _mm_loadu_si128(reinterpret_cast(src))); + + dst += 16; + src += 16; + } while (dst < dst_end); +} + + +void LZ4_decompress_faster( + const char * const source, + char * const dest, + size_t dest_size) +{ + const UInt8 * ip = (UInt8 *)source; + UInt8 * op = (UInt8 *)dest; + UInt8 * const output_end = op + dest_size; + + while (1) + { + size_t length; + + auto continue_read_length = [&] + { + unsigned s; + do + { + s = *ip++; + length += s; + } while (unlikely(s == 255)); + }; + + /// Get literal length. + + auto token = *ip++; + length = token >> 4; + if (length == 0x0F) + continue_read_length(); + + /// Copy literals. + + UInt8 * copy_end = op + length; + LZ4_wildCopy(op, ip, copy_end); + ip += length; + op = copy_end; + + if (copy_end > output_end) + return; + + /// Get match offset. + + size_t offset = unalignedLoad(ip); + ip += 2; + const UInt8 * match = op - offset; + + /// Get match length. + + length = token & 0x0F; + if (length == 0x0F) + continue_read_length(); + length += 4; + + /// Copy match within block, that produce overlapping pattern. + + copy_end = op + length; + + if (unlikely(offset < 16)) + { + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + op[4] = match[4]; + op[5] = match[5]; + op[6] = match[6]; + op[7] = match[7]; + op[8] = match[8]; + op[9] = match[9]; + op[10] = match[10]; + op[11] = match[11]; + op[12] = match[12]; + op[13] = match[13]; + op[14] = match[14]; + op[15] = match[15]; + + op += 16; + + /// 16 % N + const unsigned shift[] = { 0, 0, 0, 1, 0, 1, 4, 2, 0, 7, 6, 5, 4, 3, 2, 1 }; + match += shift[offset]; + } + + LZ4_wildCopy(op, match, copy_end); + op = copy_end; + } +} + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_COMPRESSION_METHOD; + extern const int TOO_LARGE_SIZE_COMPRESSED; + extern const int CANNOT_DECOMPRESS; +} + +class FasterCompressedReadBufferBase +{ +protected: + ReadBuffer * compressed_in; + + /// If 'compressed_in' buffer has whole compressed block - then use it. Otherwise copy parts of data to 'own_compressed_buffer'. + PODArray own_compressed_buffer; + /// Points to memory, holding compressed block. + char * compressed_buffer = nullptr; + + size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum) + { + if (compressed_in->eof()) + return 0; + + CityHash_v1_0_2::uint128 checksum; + compressed_in->readStrict(reinterpret_cast(&checksum), sizeof(checksum)); + + own_compressed_buffer.resize(COMPRESSED_BLOCK_HEADER_SIZE); + compressed_in->readStrict(&own_compressed_buffer[0], COMPRESSED_BLOCK_HEADER_SIZE); + + UInt8 method = own_compressed_buffer[0]; /// See CompressedWriteBuffer.h + + size_t & size_compressed = size_compressed_without_checksum; + + if (method == static_cast(CompressionMethodByte::LZ4) || + method == static_cast(CompressionMethodByte::ZSTD) || + method == static_cast(CompressionMethodByte::NONE)) + { + size_compressed = unalignedLoad(&own_compressed_buffer[1]); + size_decompressed = unalignedLoad(&own_compressed_buffer[5]); + } + else + throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + + if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) + throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); + + /// Is whole compressed block located in 'compressed_in' buffer? + if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE && + compressed_in->position() + size_compressed - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end()) + { + compressed_in->position() -= COMPRESSED_BLOCK_HEADER_SIZE; + compressed_buffer = compressed_in->position(); + compressed_in->position() += size_compressed; + } + else + { + own_compressed_buffer.resize(size_compressed); + compressed_buffer = &own_compressed_buffer[0]; + compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); + } + + return size_compressed + sizeof(checksum); + } + + void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum) + { + UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h + + if (method == static_cast(CompressionMethodByte::LZ4)) + { + LZ4_decompress_faster(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed); + } + else + throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + } + +public: + /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. + FasterCompressedReadBufferBase(ReadBuffer * in = nullptr) + : compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE) + { + } +}; + + +class FasterCompressedReadBuffer : public FasterCompressedReadBufferBase, public BufferWithOwnMemory +{ +private: + size_t size_compressed = 0; + + bool nextImpl() override + { + size_t size_decompressed; + size_t size_compressed_without_checksum; + size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum); + if (!size_compressed) + return false; + + memory.resize(size_decompressed + 15); + working_buffer = Buffer(&memory[0], &memory[size_decompressed]); + + decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); + + return true; + } + +public: + FasterCompressedReadBuffer(ReadBuffer & in_) + : FasterCompressedReadBufferBase(&in_), BufferWithOwnMemory(0) + { + } +}; + +} + + +int main(int, char **) +try +{ + using namespace DB; + + ReadBufferFromFileDescriptor in(STDIN_FILENO); + FasterCompressedReadBuffer decompressing_in(in); + WriteBufferFromFileDescriptor out(STDOUT_FILENO); + HashingWriteBuffer hashing_out(out); + + Stopwatch watch; + copyData(decompressing_in, hashing_out); + watch.stop(); + + auto hash = hashing_out.getHash(); + + double seconds = watch.elapsedSeconds(); + std::cerr << std::fixed << std::setprecision(3) + << "Elapsed: " << seconds + << ", " << formatReadableSizeWithBinarySuffix(in.count()) << " compressed" + << ", " << formatReadableSizeWithBinarySuffix(decompressing_in.count()) << " decompressed" + << ", ratio: " << static_cast(decompressing_in.count()) / in.count() + << ", " << formatReadableSizeWithBinarySuffix(in.count() / seconds) << "/sec. compressed" + << ", " << formatReadableSizeWithBinarySuffix(decompressing_in.count() / seconds) << "/sec. decompressed" + << ", checksum: " << hash.first << "_" << hash.second + << "\n"; + + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true); + return DB::getCurrentExceptionCode(); +} diff --git a/utils/compressor/mutator.cpp b/utils/compressor/mutator.cpp new file mode 100644 index 00000000000..94c99807b9b --- /dev/null +++ b/utils/compressor/mutator.cpp @@ -0,0 +1,405 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/** Quick and dirty implementation of data scrambler. + * + * The task is to replace the data with pseudorandom values. + * But with keeping some probability distributions + * and with maintaining the same compression ratio. + * + * The solution is to operate directly on compressed LZ4 stream. + * The stream consists of independent compressed blocks. + * Each block is a stream of "literals" and "matches". + * Liteal is an instruction to literally put some following bytes, + * and match is an instruction to copy some bytes that was already seen before. + * + * We get literals and apply some scramble operation on it. + * But we keep literal length and matches without changes. + * + * That's how we get pseudorandom data but with keeping + * all repetitive patterns and maintaining the same compression ratio. + * + * Actually, the compression ratio, if you decompress scrambled data and compress again + * become slightly worse, because LZ4 use simple match finder based on value of hash function, + * and it can find different matches due to collisions in hash function. + * + * Scramble operation replace literals with pseudorandom bytes, + * but with some heuristics to keep some sort of data structure. + * + * It's in question, is it scramble data enough and while is it safe to publish scrambled data. + * In general, you should assume that it is not safe. + */ + + +#define ML_BITS 4 +#define ML_MASK ((1U<(src); + UInt8 * end = pos + length; + + while (pos < end) + { + if (pos + strlen("https") <= end && 0 == memcmp(pos, "https", strlen("https"))) + { + pos += strlen("https"); + continue; + } + + if (pos + strlen("http") <= end && 0 == memcmp(pos, "http", strlen("http"))) + { + pos += strlen("http"); + continue; + } + + if (pos + strlen("www") <= end && 0 == memcmp(pos, "www", strlen("www"))) + { + pos += strlen("www"); + continue; + } + + if (*pos >= '1' && *pos <= '9') + *pos = rand(generator, '1', '9'); + else if (*pos >= 'a' && *pos <= 'z') + *pos = rand(generator, 'a', 'z'); + else if (*pos >= 'A' && *pos <= 'Z') + *pos = rand(generator, 'A', 'Z'); + else if (*pos >= 0x80 && *pos <= 0xBF) + *pos = rand(generator, *pos & 0xF0U, *pos | 0x0FU); + else if (*pos == '\\') + ++pos; + + ++pos; + } + + pos = static_cast(src); + while (pos < end) + { + if (pos + 3 <= end + && isAlphaASCII(pos[0]) + && !isAlphaASCII(pos[1]) && pos[1] != '\\' && pos[1] >= 0x20 + && isAlphaASCII(pos[2])) + { + auto res = rand(generator, 0, 3); + if (res == 2) + std::swap(pos[0], pos[1]); + if (res == 3) + std::swap(pos[1], pos[2]); + + pos += 3; + } + else if (pos + 5 <= end + && pos[0] >= 0xC0 && pos[0] <= 0xDF && pos[1] >= 0x80 && pos[1] <= 0xBF + && pos[2] >= 0x20 && pos[2] < 0x80 && !isAlphaASCII(pos[2]) + && pos[3] >= 0xC0 && pos[0] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF) + { + auto res = rand(generator, 0, 3); + if (res == 2) + { + std::swap(pos[1], pos[2]); + std::swap(pos[0], pos[1]); + } + if (res == 3) + { + std::swap(pos[3], pos[2]); + std::swap(pos[4], pos[3]); + } + + pos += 5; + } + else + ++pos; + } +} + + +static void LZ4_copy8(void* dst, const void* src) +{ + memcpy(dst,src,8); +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + UInt8* d = (UInt8*)dstPtr; + const UInt8* s = (const UInt8*)srcPtr; + UInt8* const e = (UInt8*)dstEnd; + + do { LZ4_copy8(d,s); d+=8; s+=8; } while (d>ML_BITS)) == RUN_MASK) { + unsigned s; + do { + s = *ip++; + length += s; + } while (s==255); + } + + /* copy literals */ + cpy = op+length; + if (cpy>oend-WILDCOPYLENGTH) + { + if (cpy != oend) goto _output_error; /* Error : block decoding must stop exactly there */ + mutate(generator, ip, length); + memcpy(op, ip, length); + ip += length; + op += length; + break; /* Necessarily EOF, due to parsing restrictions */ + } + mutate(generator, ip, cpy - op); + LZ4_wildCopy(op, ip, cpy); + ip += length; op = cpy; + + /* get offset */ + offset = LZ4_read16(ip); ip+=2; + match = op - offset; + LZ4_write32(op, (UInt32)offset); /* costs ~1%; silence an msan warning when offset==0 */ + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + unsigned s; + do { + s = *ip++; + length += s; + } while (s==255); + } + length += MINMATCH; + + /* copy match within block */ + cpy = op + length; + if (unlikely(offset<8)) { + const int dec64 = dec64table[offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[offset]; + memcpy(op+4, match, 4); + match -= dec64; + } else { LZ4_copy8(op, match); match+=8; } + op += 8; + + if (unlikely(cpy>oend-12)) { + UInt8* const oCopyLimit = oend-(WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op16) LZ4_wildCopy(op+8, match+8, cpy); + } + op=cpy; /* correction */ + } + + return (int) (((const char*)ip)-source); /* Nb of input bytes read */ + + /* Overflow error detected */ +_output_error: + return (int) (-(((const char*)ip)-source))-1; +} + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_COMPRESSION_METHOD; + extern const int TOO_LARGE_SIZE_COMPRESSED; + extern const int CANNOT_DECOMPRESS; +} + +class MutatingCompressedReadBufferBase +{ +protected: + ReadBuffer * compressed_in; + + /// If 'compressed_in' buffer has whole compressed block - then use it. Otherwise copy parts of data to 'own_compressed_buffer'. + PODArray own_compressed_buffer; + /// Points to memory, holding compressed block. + char * compressed_buffer = nullptr; + + size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum) + { + if (compressed_in->eof()) + return 0; + + CityHash_v1_0_2::uint128 checksum; + compressed_in->readStrict(reinterpret_cast(&checksum), sizeof(checksum)); + + own_compressed_buffer.resize(COMPRESSED_BLOCK_HEADER_SIZE); + compressed_in->readStrict(&own_compressed_buffer[0], COMPRESSED_BLOCK_HEADER_SIZE); + + UInt8 method = own_compressed_buffer[0]; /// See CompressedWriteBuffer.h + + size_t & size_compressed = size_compressed_without_checksum; + + if (method == static_cast(CompressionMethodByte::LZ4) || + method == static_cast(CompressionMethodByte::ZSTD) || + method == static_cast(CompressionMethodByte::NONE)) + { + size_compressed = unalignedLoad(&own_compressed_buffer[1]); + size_decompressed = unalignedLoad(&own_compressed_buffer[5]); + } + else + throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + + if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) + throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); + + /// Is whole compressed block located in 'compressed_in' buffer? + if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE && + compressed_in->position() + size_compressed - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end()) + { + compressed_in->position() -= COMPRESSED_BLOCK_HEADER_SIZE; + compressed_buffer = compressed_in->position(); + compressed_in->position() += size_compressed; + } + else + { + own_compressed_buffer.resize(size_compressed); + compressed_buffer = &own_compressed_buffer[0]; + compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); + } + + return size_compressed + sizeof(checksum); + } + + void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum) + { + UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h + + if (method == static_cast(CompressionMethodByte::LZ4)) + { + if (LZ4_decompress_mutate(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed) < 0) + throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS); + } + else + throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + } + +public: + /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. + MutatingCompressedReadBufferBase(ReadBuffer * in = nullptr) + : compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE) + { + } +}; + + +class MutatingCompressedReadBuffer : public MutatingCompressedReadBufferBase, public BufferWithOwnMemory +{ +private: + size_t size_compressed = 0; + + bool nextImpl() override + { + size_t size_decompressed; + size_t size_compressed_without_checksum; + size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum); + if (!size_compressed) + return false; + + memory.resize(size_decompressed); + working_buffer = Buffer(&memory[0], &memory[size_decompressed]); + + decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); + + return true; + } + +public: + MutatingCompressedReadBuffer(ReadBuffer & in_) + : MutatingCompressedReadBufferBase(&in_), BufferWithOwnMemory(0) + { + } +}; + +} + + +int main(int, char **) +try +{ + DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); + DB::MutatingCompressedReadBuffer mutating_in(in); + DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); + + DB::copyData(mutating_in, out); + + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true); + return DB::getCurrentExceptionCode(); +} From 53ab52f95ca3c1b18c6d173886cfdb7a6644297e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jan 2018 08:54:28 +0300 Subject: [PATCH 002/151] Experiments with LZ4 [#CLICKHOUSE-2] --- dbms/src/Common/LZ4_decompress_faster.cpp | 297 +++++++++++++++++++ dbms/src/Common/LZ4_decompress_faster.h | 37 +++ dbms/src/IO/CachedCompressedReadBuffer.cpp | 3 +- dbms/src/IO/CompressedReadBuffer.cpp | 7 +- dbms/src/IO/CompressedReadBufferBase.cpp | 5 +- dbms/src/IO/CompressedReadBufferFromFile.cpp | 7 +- dbms/src/IO/CompressedStream.h | 3 + dbms/src/Server/LocalServer.cpp | 2 +- utils/compressor/decompress_perf.cpp | 152 +++++++--- 9 files changed, 466 insertions(+), 47 deletions(-) create mode 100644 dbms/src/Common/LZ4_decompress_faster.cpp create mode 100644 dbms/src/Common/LZ4_decompress_faster.h diff --git a/dbms/src/Common/LZ4_decompress_faster.cpp b/dbms/src/Common/LZ4_decompress_faster.cpp new file mode 100644 index 00000000000..b3b0cbaaaef --- /dev/null +++ b/dbms/src/Common/LZ4_decompress_faster.cpp @@ -0,0 +1,297 @@ +#include +#include + +#include +#include + +#include +#include +#include + +#if __SSE2__ +#include +#endif + + +/** for i in *.bin; do ./decompress_perf < $i > /dev/null; done + */ + +namespace LZ4 +{ + +namespace +{ + +template void copy(UInt8 * dst, const UInt8 * src); +template void wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end); +template void copyOverlap(UInt8 * op, const UInt8 *& match, const size_t offset); + + +inline void copy8(UInt8 * dst, const UInt8 * src) +{ + memcpy(dst, src, 8); +} + +inline void wildCopy8(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) +{ + do + { + copy8(dst, src); + dst += 8; + src += 8; + } while (dst < dst_end); +} + +inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) +{ + /// 4 % n. + static constexpr int shift1[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; + + /// 8 % n - 4 % n + static constexpr int shift2[] = { 0, 0, 0, 1, 0, -1, -2, -3 }; + + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + + match += shift1[offset]; + memcpy(op + 4, match, 4); + match += shift2[offset]; +} + +template <> void inline copy<8>(UInt8 * dst, const UInt8 * src) { copy8(dst, src); }; +template <> void inline wildCopy<8>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy8(dst, src, dst_end); }; +template <> void inline copyOverlap<8>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8(op, match, offset); }; + + +#if __SSE2__ + +inline void copy16(UInt8 * dst, const UInt8 * src) +{ + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), + _mm_loadu_si128(reinterpret_cast(src))); +} + +inline void wildCopy16(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) +{ + do + { + copy16(dst, src); + dst += 16; + src += 16; + } while (dst < dst_end); +} + +inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) +{ + /// 4 % n. + static constexpr int shift1[] + = { 0, 1, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; + + /// 8 % n - 4 % n + static constexpr int shift2[] + = { 0, 0, 0, 1, 0, -1, -2, -3, -4, 4, 4, 4, 4, 4, 4, 4 }; + + /// 16 % n - 8 % n + static constexpr int shift3[] + = { 0, 0, 0, -1, 0, -2, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7 }; + + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + + match += shift1[offset]; + memcpy(op + 4, match, 4); + match += shift2[offset]; + memcpy(op + 8, match, 8); + match += shift3[offset]; +} + +template <> void inline copy<16>(UInt8 * dst, const UInt8 * src) { copy16(dst, src); }; +template <> void inline wildCopy<16>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy16(dst, src, dst_end); }; +template <> void inline copyOverlap<16>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16(op, match, offset); }; + +#endif + + +template +void NO_INLINE decompressImpl( + const char * const source, + char * const dest, + size_t dest_size) +{ + const UInt8 * ip = (UInt8 *)source; + UInt8 * op = (UInt8 *)dest; + UInt8 * const output_end = op + dest_size; + + while (1) + { + size_t length; + + auto continue_read_length = [&] + { + unsigned s; + do + { + s = *ip++; + length += s; + } while (unlikely(s == 255)); + }; + + /// Get literal length. + + const unsigned token = *ip++; + length = token >> 4; + if (length == 0x0F) + continue_read_length(); + + /// Copy literals. + + UInt8 * copy_end = op + length; + + wildCopy(op, ip, copy_end); + + ip += length; + op = copy_end; + + if (copy_end > output_end) + return; + + /// Get match offset. + + size_t offset = unalignedLoad(ip); + ip += 2; + const UInt8 * match = op - offset; + + /// Get match length. + + length = token & 0x0F; + if (length == 0x0F) + continue_read_length(); + length += 4; + + /// Copy match within block, that produce overlapping pattern. Match may replicate itself. + + copy_end = op + length; + + if (unlikely(offset < copy_amount)) + { + copyOverlap(op, match, offset); + } + else + { + copy(op, match); + match += copy_amount; + } + + op += copy_amount; + + copy(op, match); + if (length > copy_amount * 2) + wildCopy(op + copy_amount, match + copy_amount, copy_end); + + op = copy_end; + } +} + +} + + +void decompress( + const char * const source, + char * const dest, + size_t source_size, + size_t dest_size) +{ +#if __SSE2__ + if (dest_size / source_size >= 16) + decompressImpl<16>(source, dest, dest_size); + else +#endif + decompressImpl<8>(source, dest, dest_size); +} + + +void Stat::literal(size_t length) +{ + ++num_tokens; + sum_literal_lengths += length; +} + +void Stat::match(size_t length, size_t offset) +{ + ++num_tokens; + sum_match_lengths += length; + sum_match_offsets += offset; + count_match_offset_less_8 += offset < 8; + count_match_offset_less_16 += offset < 16; + count_match_replicate_itself += offset < length; +} + +void Stat::print() const +{ + std::cerr + << "Num tokens: " << num_tokens + << ", Avg literal length: " << double(sum_literal_lengths) / num_tokens + << ", Avg match length: " << double(sum_match_lengths) / num_tokens + << ", Avg match offset: " << double(sum_match_offsets) / num_tokens + << ", Offset < 8 ratio: " << double(count_match_offset_less_8) / num_tokens + << ", Offset < 16 ratio: " << double(count_match_offset_less_16) / num_tokens + << ", Match replicate itself: " << double(count_match_replicate_itself) / num_tokens + << "\n"; +} + +Stat statistics( + const char * const source, + char * const dest, + size_t dest_size, + Stat & stat) +{ + const UInt8 * ip = (UInt8 *)source; + UInt8 * op = (UInt8 *)dest; + UInt8 * const output_end = op + dest_size; + + while (1) + { + size_t length; + + auto continue_read_length = [&] + { + unsigned s; + do + { + s = *ip++; + length += s; + } while (unlikely(s == 255)); + }; + + auto token = *ip++; + length = token >> 4; + if (length == 0x0F) + continue_read_length(); + + stat.literal(length); + + ip += length; + op += length; + + if (op > output_end) + return stat; + + size_t offset = unalignedLoad(ip); + ip += 2; + + length = token & 0x0F; + if (length == 0x0F) + continue_read_length(); + length += 4; + + stat.match(length, offset); + + op += length; + } +} + +} diff --git a/dbms/src/Common/LZ4_decompress_faster.h b/dbms/src/Common/LZ4_decompress_faster.h new file mode 100644 index 00000000000..dfba8cb73f0 --- /dev/null +++ b/dbms/src/Common/LZ4_decompress_faster.h @@ -0,0 +1,37 @@ +namespace LZ4 +{ + +/** This method dispatch to one of different implementations depending on compression ratio. + * 'dest' buffer must have at least 15 excessive bytes, that is allowed to overwrite with garbage. + */ +void decompress( + const char * const source, + char * const dest, + size_t source_size, + size_t dest_size); + +/** Obtain statistics about LZ4 block useful for development. + */ +struct Stat +{ + size_t num_tokens = 0; + size_t sum_literal_lengths = 0; + size_t sum_match_lengths = 0; + size_t sum_match_offsets = 0; + size_t count_match_offset_less_8 = 0; + size_t count_match_offset_less_16 = 0; + size_t count_match_replicate_itself = 0; + + void literal(size_t length); + void match(size_t length, size_t offset); + + void print() const; +}; + +Stat statistics( + const char * const source, + char * const dest, + size_t dest_size, + Stat & stat); + +} diff --git a/dbms/src/IO/CachedCompressedReadBuffer.cpp b/dbms/src/IO/CachedCompressedReadBuffer.cpp index f65780fa7f6..963f755361c 100644 --- a/dbms/src/IO/CachedCompressedReadBuffer.cpp +++ b/dbms/src/IO/CachedCompressedReadBuffer.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB @@ -45,7 +46,7 @@ bool CachedCompressedReadBuffer::nextImpl() if (owned_cell->compressed_size) { - owned_cell->data.resize(size_decompressed); + owned_cell->data.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); decompress(owned_cell->data.m_data, size_decompressed, size_compressed_without_checksum); /// Put data into cache. diff --git a/dbms/src/IO/CompressedReadBuffer.cpp b/dbms/src/IO/CompressedReadBuffer.cpp index e14cb568e7a..cd1da8e9935 100644 --- a/dbms/src/IO/CompressedReadBuffer.cpp +++ b/dbms/src/IO/CompressedReadBuffer.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB @@ -12,7 +13,7 @@ bool CompressedReadBuffer::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed); + memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -38,7 +39,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block is placed entirely where it needs to be copied. - if (size_decompressed <= n - bytes_read) + if (size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -47,7 +48,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n) else { bytes += offset(); - memory.resize(size_decompressed); + memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index feb753f01a8..b20e49ca4d2 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -4,12 +4,12 @@ #include #include -#include #include #include #include #include +#include #include #include #include @@ -99,8 +99,7 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s if (method == static_cast(CompressionMethodByte::LZ4)) { - if (LZ4_decompress_fast(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed) < 0) - throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS); + LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed); } else if (method == static_cast(CompressionMethodByte::ZSTD)) { diff --git a/dbms/src/IO/CompressedReadBufferFromFile.cpp b/dbms/src/IO/CompressedReadBufferFromFile.cpp index fb5639afcc3..57ac6ad0c63 100644 --- a/dbms/src/IO/CompressedReadBufferFromFile.cpp +++ b/dbms/src/IO/CompressedReadBufferFromFile.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -20,7 +21,7 @@ bool CompressedReadBufferFromFile::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed); + memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -88,7 +89,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block fits entirely where it needs to be copied. - if (size_decompressed <= n - bytes_read) + if (size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -98,7 +99,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_compressed = new_size_compressed; bytes += offset(); - memory.resize(size_decompressed); + memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/src/IO/CompressedStream.h b/dbms/src/IO/CompressedStream.h index 5a00db0201d..d884023c6d2 100644 --- a/dbms/src/IO/CompressedStream.h +++ b/dbms/src/IO/CompressedStream.h @@ -49,4 +49,7 @@ enum class CompressionMethodByte : uint8_t ZSTD = 0x90, }; +/// This is required for faster LZ4 decompression method. +constexpr size_t ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER = 15; + } diff --git a/dbms/src/Server/LocalServer.cpp b/dbms/src/Server/LocalServer.cpp index e61fed50003..1217b71b61f 100644 --- a/dbms/src/Server/LocalServer.cpp +++ b/dbms/src/Server/LocalServer.cpp @@ -111,7 +111,7 @@ void LocalServer::defineOptions(Poco::Util::OptionSet& _options) /// Alias for previous one, required for clickhouse-client compability _options.addOption( - Poco::Util::Option("format", "", "Default ouput format") + Poco::Util::Option("format", "", "Default output format") .required(false) .repeatable(false) .argument("[TSV]", true) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 93b1e676bd9..e0b5a57fe9b 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,24 +21,105 @@ /** for i in *.bin; do ./decompress_perf < $i > /dev/null; done */ +namespace LZ4 +{ -static void LZ4_wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) +static void copy16(UInt8 * dst, const UInt8 * src) +{ + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), + _mm_loadu_si128(reinterpret_cast(src))); +} + +static void wildCopy16(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { do { - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), - _mm_loadu_si128(reinterpret_cast(src))); - + copy16(dst, src); dst += 16; src += 16; } while (dst < dst_end); } +static void copy8(UInt8 * dst, const UInt8 * src) +{ + memcpy(dst, src, 8); +} -void LZ4_decompress_faster( - const char * const source, - char * const dest, - size_t dest_size) +static void wildCopy8(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) +{ + do + { + copy8(dst, src); + dst += 8; + src += 8; + } while (dst < dst_end); +} + + +static void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) +{ + /// 4 % n. + static constexpr int shift1[] + = { 0, 1, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; + + /// 8 % n - 4 % n + static constexpr int shift2[] + = { 0, 0, 0, 1, 0, -1, -2, -3, -4, 4, 4, 4, 4, 4, 4, 4 }; + + /// 16 % n - 8 % n + static constexpr int shift3[] + = { 0, 0, 0, -1, 0, -2, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7 }; + + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + + match += shift1[offset]; + memcpy(op + 4, match, 4); + match += shift2[offset]; + memcpy(op + 8, match, 8); + match += shift3[offset]; +} + + +static void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) +{ + /// 4 % n. + static constexpr int shift1[] = {0, 1, 2, 1, 4, 4, 4, 4}; + + /// 8 % n - 4 % n + static constexpr int shift2[] = {0, 0, 0, 1, 0, -1, -2, -3}; + + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + + match += shift1[offset]; + memcpy(op + 4, match, 4); + match += shift2[offset]; +} + + +template void copy(UInt8 * dst, const UInt8 * src); +template <> void copy<8>(UInt8 * dst, const UInt8 * src) { copy8(dst, src); }; +template <> void copy<16>(UInt8 * dst, const UInt8 * src) { copy16(dst, src); }; + +template void wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end); +template <> void wildCopy<8>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy8(dst, src, dst_end); }; +template <> void wildCopy<16>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy16(dst, src, dst_end); }; + +template void copyOverlap(UInt8 * op, const UInt8 *& match, const size_t offset); +template <> void copyOverlap<8>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8(op, match, offset); }; +template <> void copyOverlap<16>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16(op, match, offset); }; + + +template +void decompress( + const char * const __restrict source, + char * const __restrict dest, + size_t dest_size) { const UInt8 * ip = (UInt8 *)source; UInt8 * op = (UInt8 *)dest; @@ -61,7 +141,7 @@ void LZ4_decompress_faster( /// Get literal length. - auto token = *ip++; + const unsigned token = *ip++; length = token >> 4; if (length == 0x0F) continue_read_length(); @@ -69,7 +149,9 @@ void LZ4_decompress_faster( /// Copy literals. UInt8 * copy_end = op + length; - LZ4_wildCopy(op, ip, copy_end); + + wildCopy(op, ip, copy_end); + ip += length; op = copy_end; @@ -93,37 +175,28 @@ void LZ4_decompress_faster( copy_end = op + length; - if (unlikely(offset < 16)) + if (unlikely(offset < copy_amount)) { - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - op[4] = match[4]; - op[5] = match[5]; - op[6] = match[6]; - op[7] = match[7]; - op[8] = match[8]; - op[9] = match[9]; - op[10] = match[10]; - op[11] = match[11]; - op[12] = match[12]; - op[13] = match[13]; - op[14] = match[14]; - op[15] = match[15]; - - op += 16; - - /// 16 % N - const unsigned shift[] = { 0, 0, 0, 1, 0, 1, 4, 2, 0, 7, 6, 5, 4, 3, 2, 1 }; - match += shift[offset]; + copyOverlap(op, match, offset); + } + else + { + copy(op, match); + match += copy_amount; } - LZ4_wildCopy(op, match, copy_end); + op += copy_amount; + + copy(op, match); + if (length > copy_amount * 2) + wildCopy(op + copy_amount, match + copy_amount, copy_end); + op = copy_end; } } +} + namespace DB { @@ -145,6 +218,8 @@ protected: /// Points to memory, holding compressed block. char * compressed_buffer = nullptr; + LZ4Stat stat; + size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum) { if (compressed_in->eof()) @@ -197,7 +272,8 @@ protected: if (method == static_cast(CompressionMethodByte::LZ4)) { - LZ4_decompress_faster(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed); + //LZ4::statistics(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed, stat); + LZ4::decompress<8>(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed); } else throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); @@ -209,6 +285,8 @@ public: : compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE) { } + + LZ4Stat getStatistics() const { return stat; } }; @@ -270,6 +348,8 @@ try << ", checksum: " << hash.first << "_" << hash.second << "\n"; + decompressing_in.getStatistics().print(); + return 0; } catch (...) From 844c6d7786de4f697c2eaaa603ea62d9233899cd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jan 2018 08:56:50 +0300 Subject: [PATCH 003/151] Experiments with LZ4 [#CLICKHOUSE-2] --- utils/compressor/decompress_perf.cpp | 184 +-------------------------- 1 file changed, 4 insertions(+), 180 deletions(-) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index e0b5a57fe9b..73aba287db1 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -15,189 +15,13 @@ #include #include #include +#include #include /** for i in *.bin; do ./decompress_perf < $i > /dev/null; done */ -namespace LZ4 -{ - -static void copy16(UInt8 * dst, const UInt8 * src) -{ - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), - _mm_loadu_si128(reinterpret_cast(src))); -} - -static void wildCopy16(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) -{ - do - { - copy16(dst, src); - dst += 16; - src += 16; - } while (dst < dst_end); -} - -static void copy8(UInt8 * dst, const UInt8 * src) -{ - memcpy(dst, src, 8); -} - -static void wildCopy8(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) -{ - do - { - copy8(dst, src); - dst += 8; - src += 8; - } while (dst < dst_end); -} - - -static void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) -{ - /// 4 % n. - static constexpr int shift1[] - = { 0, 1, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; - - /// 8 % n - 4 % n - static constexpr int shift2[] - = { 0, 0, 0, 1, 0, -1, -2, -3, -4, 4, 4, 4, 4, 4, 4, 4 }; - - /// 16 % n - 8 % n - static constexpr int shift3[] - = { 0, 0, 0, -1, 0, -2, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7 }; - - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - - match += shift1[offset]; - memcpy(op + 4, match, 4); - match += shift2[offset]; - memcpy(op + 8, match, 8); - match += shift3[offset]; -} - - -static void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) -{ - /// 4 % n. - static constexpr int shift1[] = {0, 1, 2, 1, 4, 4, 4, 4}; - - /// 8 % n - 4 % n - static constexpr int shift2[] = {0, 0, 0, 1, 0, -1, -2, -3}; - - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - - match += shift1[offset]; - memcpy(op + 4, match, 4); - match += shift2[offset]; -} - - -template void copy(UInt8 * dst, const UInt8 * src); -template <> void copy<8>(UInt8 * dst, const UInt8 * src) { copy8(dst, src); }; -template <> void copy<16>(UInt8 * dst, const UInt8 * src) { copy16(dst, src); }; - -template void wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end); -template <> void wildCopy<8>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy8(dst, src, dst_end); }; -template <> void wildCopy<16>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy16(dst, src, dst_end); }; - -template void copyOverlap(UInt8 * op, const UInt8 *& match, const size_t offset); -template <> void copyOverlap<8>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8(op, match, offset); }; -template <> void copyOverlap<16>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16(op, match, offset); }; - - -template -void decompress( - const char * const __restrict source, - char * const __restrict dest, - size_t dest_size) -{ - const UInt8 * ip = (UInt8 *)source; - UInt8 * op = (UInt8 *)dest; - UInt8 * const output_end = op + dest_size; - - while (1) - { - size_t length; - - auto continue_read_length = [&] - { - unsigned s; - do - { - s = *ip++; - length += s; - } while (unlikely(s == 255)); - }; - - /// Get literal length. - - const unsigned token = *ip++; - length = token >> 4; - if (length == 0x0F) - continue_read_length(); - - /// Copy literals. - - UInt8 * copy_end = op + length; - - wildCopy(op, ip, copy_end); - - ip += length; - op = copy_end; - - if (copy_end > output_end) - return; - - /// Get match offset. - - size_t offset = unalignedLoad(ip); - ip += 2; - const UInt8 * match = op - offset; - - /// Get match length. - - length = token & 0x0F; - if (length == 0x0F) - continue_read_length(); - length += 4; - - /// Copy match within block, that produce overlapping pattern. - - copy_end = op + length; - - if (unlikely(offset < copy_amount)) - { - copyOverlap(op, match, offset); - } - else - { - copy(op, match); - match += copy_amount; - } - - op += copy_amount; - - copy(op, match); - if (length > copy_amount * 2) - wildCopy(op + copy_amount, match + copy_amount, copy_end); - - op = copy_end; - } -} - -} - - namespace DB { @@ -218,7 +42,7 @@ protected: /// Points to memory, holding compressed block. char * compressed_buffer = nullptr; - LZ4Stat stat; + LZ4::Stat stat; size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum) { @@ -273,7 +97,7 @@ protected: if (method == static_cast(CompressionMethodByte::LZ4)) { //LZ4::statistics(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed, stat); - LZ4::decompress<8>(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed); + LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed); } else throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); @@ -286,7 +110,7 @@ public: { } - LZ4Stat getStatistics() const { return stat; } + LZ4::Stat getStatistics() const { return stat; } }; From 9e1e079a3434eb49ff0abaf59e2efe733854a0a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jan 2018 00:00:26 +0300 Subject: [PATCH 004/151] Experiments continued [#CLICKHOUSE-2] --- dbms/src/Common/LZ4_decompress_faster.cpp | 96 ++++++++++++++++++++--- utils/compressor/decompress_perf.cpp | 2 +- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/dbms/src/Common/LZ4_decompress_faster.cpp b/dbms/src/Common/LZ4_decompress_faster.cpp index b3b0cbaaaef..860ed197fd1 100644 --- a/dbms/src/Common/LZ4_decompress_faster.cpp +++ b/dbms/src/Common/LZ4_decompress_faster.cpp @@ -12,6 +12,10 @@ #include #endif +#if __SSSE3__ +#include +#endif + /** for i in *.bin; do ./decompress_perf < $i > /dev/null; done */ @@ -22,9 +26,9 @@ namespace LZ4 namespace { -template void copy(UInt8 * dst, const UInt8 * src); -template void wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end); -template void copyOverlap(UInt8 * op, const UInt8 *& match, const size_t offset); +template [[maybe_unused]] void copy(UInt8 * dst, const UInt8 * src); +template [[maybe_unused]] void wildCopy(UInt8 * dst, const UInt8 * src, UInt8 * dst_end); +template [[maybe_unused]] void copyOverlap(UInt8 * op, const UInt8 *& match, const size_t offset); inline void copy8(UInt8 * dst, const UInt8 * src) @@ -60,9 +64,37 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) match += shift2[offset]; } +inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) +{ +#ifdef __SSSE3__ + + static constexpr UInt8 __attribute__((__aligned__(8))) masks[] = + { + 0, 1, 2, 2, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ + 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 2, 0, 1, 2, 0, 1, + 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 0, 1, 2, + 0, 1, 2, 3, 4, 5, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 0, + }; + + unalignedStore(op, _mm_shuffle_pi8( + unalignedLoad<__m64>(match), + unalignedLoad<__m64>(masks + 8 * offset))); + + match += masks[offset]; + +#else + copyOverlap8(op, match, offset); +#endif +} + template <> void inline copy<8>(UInt8 * dst, const UInt8 * src) { copy8(dst, src); }; template <> void inline wildCopy<8>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy8(dst, src, dst_end); }; -template <> void inline copyOverlap<8>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8(op, match, offset); }; +template <> void inline copyOverlap<8, false>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8(op, match, offset); }; +template <> void inline copyOverlap<8, true>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8Shuffle(op, match, offset); }; #if __SSE2__ @@ -95,7 +127,7 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) /// 16 % n - 8 % n static constexpr int shift3[] - = { 0, 0, 0, -1, 0, -2, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7 }; + = { 0, 0, 0, -1, 0, -2, 2, 1, 8, -1, -2, -3, -4, -5, -6, -7 }; op[0] = match[0]; op[1] = match[1]; @@ -109,14 +141,51 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) match += shift3[offset]; } +inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) +{ +#ifdef __SSSE3__ + + static constexpr UInt8 __attribute__((__aligned__(16))) masks[] = + { + 0, 1, 2, 1, 4, 1, 4, 2, 8, 7, 6, 5, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + }; + + _mm_storeu_si128(reinterpret_cast<__m128i *>(op), + _mm_shuffle_epi8( + _mm_loadu_si128(reinterpret_cast(match)), + _mm_load_si128(reinterpret_cast(masks) + offset))); + + match += masks[offset]; + +#else + copyOverlap16(op, match, offset); +#endif +} + template <> void inline copy<16>(UInt8 * dst, const UInt8 * src) { copy16(dst, src); }; template <> void inline wildCopy<16>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy16(dst, src, dst_end); }; -template <> void inline copyOverlap<16>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16(op, match, offset); }; +template <> void inline copyOverlap<16, false>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16(op, match, offset); }; +template <> void inline copyOverlap<16, true>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16Shuffle(op, match, offset); }; #endif -template +template void NO_INLINE decompressImpl( const char * const source, char * const dest, @@ -178,7 +247,7 @@ void NO_INLINE decompressImpl( if (unlikely(offset < copy_amount)) { - copyOverlap(op, match, offset); + copyOverlap(op, match, offset); } else { @@ -205,12 +274,13 @@ void decompress( size_t source_size, size_t dest_size) { -#if __SSE2__ - if (dest_size / source_size >= 16) - decompressImpl<16>(source, dest, dest_size); + decompressImpl<16, true>(source, dest, dest_size); + (void) source_size; + +/* if (dest_size / source_size >= 16) + decompressImpl<16, true>(source, dest, dest_size); else -#endif - decompressImpl<8>(source, dest, dest_size); + decompressImpl<8, true>(source, dest, dest_size);*/ } diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 73aba287db1..2958a0fe9b5 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -172,7 +172,7 @@ try << ", checksum: " << hash.first << "_" << hash.second << "\n"; - decompressing_in.getStatistics().print(); +// decompressing_in.getStatistics().print(); return 0; } From 6d1e68baf91348775d56226c62385fe740239265 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jan 2018 00:52:24 +0300 Subject: [PATCH 005/151] Experiments continued [#CLICKHOUSE-2] --- dbms/src/Common/LZ4_decompress_faster.cpp | 12 +++++++++--- utils/compressor/decompress_perf.cpp | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/LZ4_decompress_faster.cpp b/dbms/src/Common/LZ4_decompress_faster.cpp index 860ed197fd1..233af72a739 100644 --- a/dbms/src/Common/LZ4_decompress_faster.cpp +++ b/dbms/src/Common/LZ4_decompress_faster.cpp @@ -184,6 +184,8 @@ template <> void inline copyOverlap<16, true>(UInt8 * op, const UInt8 *& match, #endif +/// See also https://stackoverflow.com/a/30669632 + template void NO_INLINE decompressImpl( @@ -220,12 +222,12 @@ void NO_INLINE decompressImpl( UInt8 * copy_end = op + length; - wildCopy(op, ip, copy_end); + wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. ip += length; op = copy_end; - if (copy_end > output_end) + if (copy_end >= output_end) return; /// Get match offset. @@ -245,6 +247,10 @@ void NO_INLINE decompressImpl( copy_end = op + length; + /** Here we can write up to copy_amount - 1 - 4 * 2 bytes after buffer. + * The worst case when offset = 1 and length = 4 + */ + if (unlikely(offset < copy_amount)) { copyOverlap(op, match, offset); @@ -257,7 +263,7 @@ void NO_INLINE decompressImpl( op += copy_amount; - copy(op, match); + copy(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer. if (length > copy_amount * 2) wildCopy(op + copy_amount, match + copy_amount, copy_end); diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 2958a0fe9b5..5752f3d10fc 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -82,7 +82,7 @@ protected: } else { - own_compressed_buffer.resize(size_compressed); + own_compressed_buffer.resize(size_compressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); compressed_buffer = &own_compressed_buffer[0]; compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } @@ -127,7 +127,7 @@ private: if (!size_compressed) return false; - memory.resize(size_decompressed + 15); + memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); From 2fa8992e2c04f401c0e99f278461f46248fe0d3d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jan 2018 04:59:51 +0300 Subject: [PATCH 006/151] Continued experiments [#CLICKHOUSE-2] --- dbms/src/Common/LZ4_decompress_faster.h | 37 ----- dbms/src/IO/CachedCompressedReadBuffer.cpp | 3 +- dbms/src/IO/CompressedReadBuffer.cpp | 7 +- dbms/src/IO/CompressedReadBufferBase.cpp | 7 +- dbms/src/IO/CompressedReadBufferBase.h | 3 + dbms/src/IO/CompressedReadBufferFromFile.cpp | 7 +- dbms/src/IO/CompressedStream.h | 3 - .../{Common => IO}/LZ4_decompress_faster.cpp | 54 +++++-- dbms/src/IO/LZ4_decompress_faster.h | 147 ++++++++++++++++++ utils/compressor/decompress_perf.cpp | 27 +++- 10 files changed, 228 insertions(+), 67 deletions(-) delete mode 100644 dbms/src/Common/LZ4_decompress_faster.h rename dbms/src/{Common => IO}/LZ4_decompress_faster.cpp (88%) create mode 100644 dbms/src/IO/LZ4_decompress_faster.h diff --git a/dbms/src/Common/LZ4_decompress_faster.h b/dbms/src/Common/LZ4_decompress_faster.h deleted file mode 100644 index dfba8cb73f0..00000000000 --- a/dbms/src/Common/LZ4_decompress_faster.h +++ /dev/null @@ -1,37 +0,0 @@ -namespace LZ4 -{ - -/** This method dispatch to one of different implementations depending on compression ratio. - * 'dest' buffer must have at least 15 excessive bytes, that is allowed to overwrite with garbage. - */ -void decompress( - const char * const source, - char * const dest, - size_t source_size, - size_t dest_size); - -/** Obtain statistics about LZ4 block useful for development. - */ -struct Stat -{ - size_t num_tokens = 0; - size_t sum_literal_lengths = 0; - size_t sum_match_lengths = 0; - size_t sum_match_offsets = 0; - size_t count_match_offset_less_8 = 0; - size_t count_match_offset_less_16 = 0; - size_t count_match_replicate_itself = 0; - - void literal(size_t length); - void match(size_t length, size_t offset); - - void print() const; -}; - -Stat statistics( - const char * const source, - char * const dest, - size_t dest_size, - Stat & stat); - -} diff --git a/dbms/src/IO/CachedCompressedReadBuffer.cpp b/dbms/src/IO/CachedCompressedReadBuffer.cpp index 963f755361c..83eb6c23e28 100644 --- a/dbms/src/IO/CachedCompressedReadBuffer.cpp +++ b/dbms/src/IO/CachedCompressedReadBuffer.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -46,7 +47,7 @@ bool CachedCompressedReadBuffer::nextImpl() if (owned_cell->compressed_size) { - owned_cell->data.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + owned_cell->data.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); decompress(owned_cell->data.m_data, size_decompressed, size_compressed_without_checksum); /// Put data into cache. diff --git a/dbms/src/IO/CompressedReadBuffer.cpp b/dbms/src/IO/CompressedReadBuffer.cpp index cd1da8e9935..368c033f0af 100644 --- a/dbms/src/IO/CompressedReadBuffer.cpp +++ b/dbms/src/IO/CompressedReadBuffer.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB @@ -13,7 +14,7 @@ bool CompressedReadBuffer::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -39,7 +40,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block is placed entirely where it needs to be copied. - if (size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER <= n - bytes_read) + if (size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -48,7 +49,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n) else { bytes += offset(); - memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index b20e49ca4d2..56ce982c26f 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -70,7 +69,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, /// Is whole compressed block located in 'compressed_in' buffer? if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE && - compressed_in->position() + size_compressed - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end()) + compressed_in->position() + size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end()) { compressed_in->position() -= COMPRESSED_BLOCK_HEADER_SIZE; compressed_buffer = compressed_in->position(); @@ -78,7 +77,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, } else { - own_compressed_buffer.resize(size_compressed); + own_compressed_buffer.resize(size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); compressed_buffer = &own_compressed_buffer[0]; compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } @@ -99,7 +98,7 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s if (method == static_cast(CompressionMethodByte::LZ4)) { - LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed); + LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, lz4_stat); } else if (method == static_cast(CompressionMethodByte::ZSTD)) { diff --git a/dbms/src/IO/CompressedReadBufferBase.h b/dbms/src/IO/CompressedReadBufferBase.h index 18ae060abc3..5b6d0e0ca47 100644 --- a/dbms/src/IO/CompressedReadBufferBase.h +++ b/dbms/src/IO/CompressedReadBufferBase.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -24,6 +25,8 @@ protected: /// Don't checksum on decompressing. bool disable_checksum = false; + LZ4::PerformanceStatistics lz4_stat; + /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need. /// Returns number of compressed bytes read. diff --git a/dbms/src/IO/CompressedReadBufferFromFile.cpp b/dbms/src/IO/CompressedReadBufferFromFile.cpp index 57ac6ad0c63..fa9e69b82e5 100644 --- a/dbms/src/IO/CompressedReadBufferFromFile.cpp +++ b/dbms/src/IO/CompressedReadBufferFromFile.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -21,7 +22,7 @@ bool CompressedReadBufferFromFile::nextImpl() if (!size_compressed) return false; - memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -89,7 +90,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) return bytes_read; /// If the decompressed block fits entirely where it needs to be copied. - if (size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER <= n - bytes_read) + if (size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER <= n - bytes_read) { decompress(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; @@ -99,7 +100,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_compressed = new_size_compressed; bytes += offset(); - memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); pos = working_buffer.begin(); diff --git a/dbms/src/IO/CompressedStream.h b/dbms/src/IO/CompressedStream.h index d884023c6d2..5a00db0201d 100644 --- a/dbms/src/IO/CompressedStream.h +++ b/dbms/src/IO/CompressedStream.h @@ -49,7 +49,4 @@ enum class CompressionMethodByte : uint8_t ZSTD = 0x90, }; -/// This is required for faster LZ4 decompression method. -constexpr size_t ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER = 15; - } diff --git a/dbms/src/Common/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp similarity index 88% rename from dbms/src/Common/LZ4_decompress_faster.cpp rename to dbms/src/IO/LZ4_decompress_faster.cpp index 233af72a739..459647408f3 100644 --- a/dbms/src/Common/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -1,8 +1,11 @@ #include #include +#include +#include -#include +#include #include +#include #include #include @@ -278,10 +281,41 @@ void decompress( const char * const source, char * const dest, size_t source_size, - size_t dest_size) + size_t dest_size, + PerformanceStatistics & statistics) { - decompressImpl<16, true>(source, dest, dest_size); - (void) source_size; + if (source_size == 0 || dest_size == 0) + return; + + /// Don't run timer if the block is too small. + + if (dest_size >= 32768) + { + size_t best_variant = statistics.select(); + + /// Run the selected method and measure time. + + Stopwatch watch; + + if (best_variant == 0) + decompressImpl<8, false>(source, dest, dest_size); + if (best_variant == 1) + decompressImpl<8, true>(source, dest, dest_size); + if (best_variant == 2) + decompressImpl<16, false>(source, dest, dest_size); + if (best_variant == 3) + decompressImpl<16, true>(source, dest, dest_size); + + watch.stop(); + + /// Update performance statistics. + + statistics.data[best_variant].update(watch.elapsedSeconds(), dest_size); + } + else + { + decompressImpl<8, false>(source, dest, dest_size); + } /* if (dest_size / source_size >= 16) decompressImpl<16, true>(source, dest, dest_size); @@ -290,13 +324,13 @@ void decompress( } -void Stat::literal(size_t length) +void StreamStatistics::literal(size_t length) { ++num_tokens; sum_literal_lengths += length; } -void Stat::match(size_t length, size_t offset) +void StreamStatistics::match(size_t length, size_t offset) { ++num_tokens; sum_match_lengths += length; @@ -306,7 +340,7 @@ void Stat::match(size_t length, size_t offset) count_match_replicate_itself += offset < length; } -void Stat::print() const +void StreamStatistics::print() const { std::cerr << "Num tokens: " << num_tokens @@ -319,11 +353,11 @@ void Stat::print() const << "\n"; } -Stat statistics( +void statistics( const char * const source, char * const dest, size_t dest_size, - Stat & stat) + StreamStatistics & stat) { const UInt8 * ip = (UInt8 *)source; UInt8 * op = (UInt8 *)dest; @@ -354,7 +388,7 @@ Stat statistics( op += length; if (op > output_end) - return stat; + return; size_t offset = unalignedLoad(ip); ip += 2; diff --git a/dbms/src/IO/LZ4_decompress_faster.h b/dbms/src/IO/LZ4_decompress_faster.h new file mode 100644 index 00000000000..a15f2662aa3 --- /dev/null +++ b/dbms/src/IO/LZ4_decompress_faster.h @@ -0,0 +1,147 @@ +#pragma once + +#include +#include +#include + + +namespace LZ4 +{ + +/** There are many implementation details of LZ4 decompression loop, that affect performance. + * For example: copy by 8 or by 16 bytes at once; use shuffle instruction to replicate match or not. + * + * The optimal algorithm is dependent: + * + * - on CPU architecture + * (example: on Skylake it's almost always better to copy by 16 bytes and use shuffle, + * but on Westmere using shuffle is worse and copy by 16 bytes is better only for high compression ratios) + * + * - on data distribution + * (example: when compression ratio is higher than 10.20, + * it's usually better to copy by 16 bytes rather than 8). + * + * It's very difficult to test all combinations on different CPUs and to choose correct rule to select best variant. + * (Even if you do this, you have high chance to over-optimize for specific CPU while downgrading performance on another.) + * + * Instead of this, we choose best algorithm by using performance statistics + * with something like "Bayesian Bandits" method. + */ + + +/** Both buffers passed to 'decompress' function must have + * at least this amount of excessive bytes after end of data + * that is allowed to read/write. + * This value is a little overestimation. + */ +static constexpr size_t ADDITIONAL_BYTES_AT_END_OF_BUFFER = 32; + + +/** When decompressing uniform sequence of blocks (for example, blocks from one file), + * you can pass single PerformanceStatistics object to subsequent invocations of 'decompress' method. + * It will accumulate statistics and use it as a feedback to choose best specialization of algorithm at runtime. + * One PerformanceStatistics object cannot be used concurrently from different threads. + */ +struct PerformanceStatistics +{ + struct Element + { + double count = 0; + double sum = 0; + + double adjusted_count() const + { + return count - NUM_INVOCATIONS_TO_THROW_OFF; + } + + double mean() const + { + return sum / adjusted_count(); + } + + /// For better convergence, we don't use proper estimate of stddev. + /// We want to eventually choose between two algorithms even in case + /// when there is no statistical significant difference between them. + double sigma() const + { + return mean() / sqrt(adjusted_count()); + } + + void update(double seconds, double bytes) + { + ++count; + + if (count > PerformanceStatistics::NUM_INVOCATIONS_TO_THROW_OFF) + sum += seconds / bytes; + } + + double sample(pcg64 & rng) const + { + /// If there is a variant with not enough statistics, always choose it. + /// And in that case prefer variant with less number of invocations. + + if (adjusted_count() < 2) + return adjusted_count() - 1; + else + return std::normal_distribution<>(mean(), sigma())(rng); + } + }; + + /// Number of different algorithms to select from. + static constexpr size_t NUM_ELEMENTS = 4; + + /// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account. + static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2; + + Element data[NUM_ELEMENTS]; + + pcg64 rng; + + /// To select from different algorithms we use a kind of "bandits" algorithm. + /// Sample random values from estimated normal distributions and choose the minimal. + size_t select() + { + double samples[NUM_ELEMENTS]; + for (size_t i = 0; i < NUM_ELEMENTS; ++i) + samples[i] = data[i].sample(rng); + + return std::min_element(samples, samples + NUM_ELEMENTS) - samples; + } +}; + + +/** This method dispatch to one of different implementations depending on performance statistics. + */ +void decompress( + const char * const source, + char * const dest, + size_t source_size, + size_t dest_size, + PerformanceStatistics & statistics); + + +/** Obtain statistics about LZ4 block useful for development. + */ +struct StreamStatistics +{ + size_t num_tokens = 0; + size_t sum_literal_lengths = 0; + size_t sum_match_lengths = 0; + size_t sum_match_offsets = 0; + size_t count_match_offset_less_8 = 0; + size_t count_match_offset_less_16 = 0; + size_t count_match_replicate_itself = 0; + + void literal(size_t length); + void match(size_t length, size_t offset); + + void print() const; +}; + +void statistics( + const char * const source, + char * const dest, + size_t dest_size, + StreamStatistics & stat); + +} diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 5752f3d10fc..1bea7387853 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -10,12 +10,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include @@ -42,7 +42,8 @@ protected: /// Points to memory, holding compressed block. char * compressed_buffer = nullptr; - LZ4::Stat stat; + LZ4::StreamStatistics stream_stat; + LZ4::PerformanceStatistics perf_stat; size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum) { @@ -82,7 +83,7 @@ protected: } else { - own_compressed_buffer.resize(size_compressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + own_compressed_buffer.resize(size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); compressed_buffer = &own_compressed_buffer[0]; compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } @@ -97,7 +98,7 @@ protected: if (method == static_cast(CompressionMethodByte::LZ4)) { //LZ4::statistics(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed, stat); - LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed); + LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat); } else throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); @@ -110,7 +111,8 @@ public: { } - LZ4::Stat getStatistics() const { return stat; } + LZ4::StreamStatistics getStreamStatistics() const { return stream_stat; } + LZ4::PerformanceStatistics getPerformanceStatistics() const { return perf_stat; } }; @@ -127,7 +129,7 @@ private: if (!size_compressed) return false; - memory.resize(size_decompressed + ADDITIONAL_BYTES_AT_END_OF_DECOMPRESSED_BUFFER); + memory.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); working_buffer = Buffer(&memory[0], &memory[size_decompressed]); decompress(working_buffer.begin(), size_decompressed, size_compressed_without_checksum); @@ -174,6 +176,19 @@ try // decompressing_in.getStatistics().print(); + LZ4::PerformanceStatistics perf_stat = decompressing_in.getPerformanceStatistics(); + + for (size_t i = 0; i < LZ4::PerformanceStatistics::NUM_ELEMENTS; ++i) + { + const LZ4::PerformanceStatistics::Element & elem = perf_stat.data[i]; + + std::cerr << "Variant " << i << ": " + << "count: " << elem.count + << ", mean ns/b: " << 1000000000.0 * elem.mean() + << ", sigma ns/b: " << 1000000000.0 * elem.sigma() + << "\n"; + } + return 0; } catch (...) From 846de8579916f54bc433518ca42d48c14971d98b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jan 2018 05:04:02 +0300 Subject: [PATCH 007/151] Continued experiments [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.h b/dbms/src/IO/LZ4_decompress_faster.h index a15f2662aa3..ecb7003c493 100644 --- a/dbms/src/IO/LZ4_decompress_faster.h +++ b/dbms/src/IO/LZ4_decompress_faster.h @@ -9,15 +9,15 @@ namespace LZ4 { /** There are many implementation details of LZ4 decompression loop, that affect performance. - * For example: copy by 8 or by 16 bytes at once; use shuffle instruction to replicate match or not. + * For example: copy by 8 or by 16 (SSE2) bytes at once; use shuffle (SSSE3) instruction to replicate match or not. * - * The optimal algorithm is dependent: + * The optimal algorithm is dependent on: * - * - on CPU architecture + * 1. CPU architecture. * (example: on Skylake it's almost always better to copy by 16 bytes and use shuffle, * but on Westmere using shuffle is worse and copy by 16 bytes is better only for high compression ratios) * - * - on data distribution + * 2. Data distribution. * (example: when compression ratio is higher than 10.20, * it's usually better to copy by 16 bytes rather than 8). * @@ -49,22 +49,22 @@ struct PerformanceStatistics double count = 0; double sum = 0; - double adjusted_count() const + double adjustedCount() const { return count - NUM_INVOCATIONS_TO_THROW_OFF; } double mean() const { - return sum / adjusted_count(); + return sum / adjustedCount(); } /// For better convergence, we don't use proper estimate of stddev. - /// We want to eventually choose between two algorithms even in case + /// We want to eventually separate between two algorithms even in case /// when there is no statistical significant difference between them. double sigma() const { - return mean() / sqrt(adjusted_count()); + return mean() / sqrt(adjustedCount()); } void update(double seconds, double bytes) @@ -80,8 +80,8 @@ struct PerformanceStatistics /// If there is a variant with not enough statistics, always choose it. /// And in that case prefer variant with less number of invocations. - if (adjusted_count() < 2) - return adjusted_count() - 1; + if (adjustedCount() < 2) + return adjustedCount() - 1; else return std::normal_distribution<>(mean(), sigma())(rng); } From eb8f41a3e343b3bddfd1bd2303a316ed627b6ca9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jan 2018 05:04:53 +0300 Subject: [PATCH 008/151] Continued experiments [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 459647408f3..25316fee5a6 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -316,11 +316,6 @@ void decompress( { decompressImpl<8, false>(source, dest, dest_size); } - -/* if (dest_size / source_size >= 16) - decompressImpl<16, true>(source, dest, dest_size); - else - decompressImpl<8, true>(source, dest, dest_size);*/ } From f830821c483944e3ca50938630242bf1ed7c2947 Mon Sep 17 00:00:00 2001 From: Jonatas Freitas Date: Sun, 10 Jun 2018 20:39:15 -0300 Subject: [PATCH 009/151] Update third-party_gui.md --- docs/en/interfaces/third-party_gui.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/third-party_gui.md b/docs/en/interfaces/third-party_gui.md index d6b7234e6d0..b97985ebdc7 100644 --- a/docs/en/interfaces/third-party_gui.md +++ b/docs/en/interfaces/third-party_gui.md @@ -19,11 +19,16 @@ Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) pr [HouseOps](https://github.com/HouseOps/HouseOps) is a unique Desktop ClickHouse Ops UI / IDE for OSX, Linux and Windows. ### Features: -- Query builder; +- Query builder with syntax highlighting, response viewed in Table and JSON Object. +- Export results in csv and JSON object. +- Processes List with description, Record mode and Kill processes feature. +- Database Graph with all tables and columns with extra informations. +- Easy view your columns size. +- Server settings. - Database manangement (soon); - Users manangement (soon); - Real-Time Data Analytics (soon); - Cluster/Infra monitoring (soon); - Cluster manangement (soon); - Kafka and Replicated tables monitoring (soon); -- And a lot of others features (soon) for you take a beautiful implementation of ClickHouse. +- And a lot of others features for you take a beautiful implementation of ClickHouse. From 619225323dfea59f180c099441fe885b54310515 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Jun 2018 08:02:20 +0300 Subject: [PATCH 010/151] Added ASLR to aid debugging #1890 --- dbms/src/Common/Allocator.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Allocator.cpp b/dbms/src/Common/Allocator.cpp index 5c653a9a1c9..74525a57c93 100644 --- a/dbms/src/Common/Allocator.cpp +++ b/dbms/src/Common/Allocator.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -60,7 +61,10 @@ void * Allocator::alloc(size_t size, size_t alignment) throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS); - buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + /// ASLR by hand + void * address_hint = reinterpret_cast(0x100000000000UL + randomSeed() % 0x600000000000UL); + + buf = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == buf) DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); From e9a0272f5656899c86fc974bad4e18cae5b16c70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=93=D0=B5=D0=BE=D1=80=D0=B3=D0=B8=D0=B9=20=D0=9A=D0=BE?= =?UTF-8?q?=D0=BD=D0=B4=D1=80=D0=B0=D1=82=D1=8C=D0=B5=D0=B2?= Date: Mon, 11 Jun 2018 09:02:12 +0000 Subject: [PATCH 011/151] Search for mysqlclient in mysql subdirectories of default locations --- libs/libmysqlxx/cmake/find_mysqlclient.cmake | 29 ++++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/libs/libmysqlxx/cmake/find_mysqlclient.cmake b/libs/libmysqlxx/cmake/find_mysqlclient.cmake index d5167a78949..4b37aca436c 100644 --- a/libs/libmysqlxx/cmake/find_mysqlclient.cmake +++ b/libs/libmysqlxx/cmake/find_mysqlclient.cmake @@ -3,28 +3,27 @@ option (ENABLE_MYSQL "Enable MySQL" ON) if (ENABLE_MYSQL) set (MYSQL_LIB_PATHS "/usr/local/opt/mysql/lib" - "/usr/local/lib/mysql/" - "/usr/local/lib/mysql" - "/usr/local/lib64/mysql" - "/usr/mysql/lib/mysql" - "/usr/mysql/lib64/mysql" - "/usr/lib/mysql" - "/usr/lib64/mysql" - "/lib/mysql" - "/lib64/mysql") + "/usr/local/lib" + "/usr/local/lib64" + "/usr/mysql/lib" + "/usr/mysql/lib64" + "/usr/lib" + "/usr/lib64" + "/lib" + "/lib64") set (MYSQL_INCLUDE_PATHS "/usr/local/opt/mysql/include" - "/usr/mysql/include/mysql" - "/usr/local/include/mysql" - "/usr/include/mysql") + "/usr/mysql/include" + "/usr/local/include" + "/usr/include") - find_path (MYSQL_INCLUDE_DIR NAMES mysql/mysql.h PATHS ${MYSQL_INCLUDE_PATHS}) + find_path (MYSQL_INCLUDE_DIR NAMES mysql/mysql.h PATHS ${MYSQL_INCLUDE_PATHS} PATH_SUFFIXES mysql) if (USE_STATIC_LIBRARIES) - find_library (STATIC_MYSQLCLIENT_LIB mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS}) + find_library (STATIC_MYSQLCLIENT_LIB NAMES mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS} PATH_SUFFIXES mysql) else () - find_library (MYSQLCLIENT_LIBRARIES mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS}) + find_library (MYSQLCLIENT_LIBRARIES NAMES mariadbclient mysqlclient PATHS ${MYSQL_LIB_PATHS} PATH_SUFFIXES mysql) endif () if (MYSQL_INCLUDE_DIR AND (STATIC_MYSQLCLIENT_LIB OR MYSQLCLIENT_LIBRARIES)) From 0f529dbc6e949104625b91fefd32411893e6b51b Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 11 Jun 2018 15:13:00 +0300 Subject: [PATCH 012/151] Add table fuction url and storage URL --- dbms/src/Storages/StorageURL.cpp | 169 ++++++++++++++++++ dbms/src/Storages/StorageURL.h | 55 ++++++ dbms/src/Storages/registerStorages.cpp | 2 + .../TableFunctions/ITableFunctionFileLike.cpp | 70 ++++++++ .../TableFunctions/ITableFunctionFileLike.h | 19 ++ dbms/src/TableFunctions/TableFunctionFile.cpp | 91 ++-------- dbms/src/TableFunctions/TableFunctionFile.h | 24 +-- dbms/src/TableFunctions/TableFunctionURL.cpp | 19 ++ dbms/src/TableFunctions/TableFunctionURL.h | 26 +++ .../TableFunctions/registerTableFunctions.cpp | 2 + 10 files changed, 394 insertions(+), 83 deletions(-) create mode 100644 dbms/src/Storages/StorageURL.cpp create mode 100644 dbms/src/Storages/StorageURL.h create mode 100644 dbms/src/TableFunctions/ITableFunctionFileLike.cpp create mode 100644 dbms/src/TableFunctions/ITableFunctionFileLike.h create mode 100644 dbms/src/TableFunctions/TableFunctionURL.cpp create mode 100644 dbms/src/TableFunctions/TableFunctionURL.h diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp new file mode 100644 index 00000000000..6bce66a6711 --- /dev/null +++ b/dbms/src/Storages/StorageURL.cpp @@ -0,0 +1,169 @@ +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +}; + +StorageURL::StorageURL(const Poco::URI & uri_, + const std::string & table_name_, + const String & format_name_, + const ColumnsDescription & columns_, + Context & context_) + : IStorage(columns_), uri(uri_), format_name(format_name_), table_name(table_name_), context_global(context_) +{ +} + +namespace +{ + class StorageURLBlockInputStream : public IProfilingBlockInputStream + { + public: + StorageURLBlockInputStream(const Poco::URI & uri, + const String & format, + const String & name_, + const Block & sample_block, + const Context & context, + size_t max_block_size, + const ConnectionTimeouts & timeouts) + : name(name_) + { + read_buf = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_GET, nullptr, timeouts); + + reader = FormatFactory().getInput(format, *read_buf, sample_block, context, max_block_size); + } + + ~StorageURLBlockInputStream() override {} + + String getName() const override + { + return name; + } + + Block readImpl() override + { + return reader->read(); + } + + Block getHeader() const override + { + return reader->getHeader(); + } + void readPrefixImpl() override + { + reader->readPrefix(); + } + + void readSuffixImpl() override + { + reader->readSuffix(); + } + + private: + String name; + std::unique_ptr read_buf; + BlockInputStreamPtr reader; + }; + + class StorageURLBlockOutputStream : public IBlockOutputStream + { + public: + StorageURLBlockOutputStream(const Poco::URI & uri_, + const String & format_, + const Block & sample_block_, + Context & context_, + const ConnectionTimeouts & timeouts_) + : global_context(context_), uri(uri_), format(format_), sample_block(sample_block_), timeouts(timeouts_) + { + } + + ~StorageURLBlockOutputStream() {} + + Block getHeader() const override + { + return sample_block; + } + + void write(const Block & block) override + { + ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr) { + WriteBufferFromOStream out_buffer(ostr); + auto writer = FormatFactory().getOutput(format, out_buffer, sample_block, global_context); + writer->writePrefix(); + writer->write(block); + writer->writeSuffix(); + writer->flush(); + }; + ReadWriteBufferFromHTTP(uri, Poco::Net::HTTPRequest::HTTP_POST, out_stream_callback, timeouts); // just for request + } + + private: + Context & global_context; + Poco::URI uri; + String format; + Block sample_block; + ConnectionTimeouts timeouts; + }; +} +BlockInputStreams StorageURL::read(const Names & /*column_names*/, + const SelectQueryInfo & /*query_info*/, + const Context & context, + QueryProcessingStage::Enum & /*processed_stage*/, + size_t max_block_size, + unsigned /*num_streams*/) +{ + return {std::make_shared(uri, + format_name, + getName(), + getSampleBlock(), + context, + max_block_size, + ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()))}; +} + + void StorageURL::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {} + +BlockOutputStreamPtr StorageURL::write(const ASTPtr & /*query*/, const Settings & /*settings*/) +{ + return std::make_shared( + uri, format_name, getSampleBlock(), context_global, ConnectionTimeouts::getHTTPTimeouts(context_global.getSettingsRef())); +} +void registerStorageURL(StorageFactory & factory) +{ + factory.registerStorage("URL", [](const StorageFactory::Arguments & args) { + ASTs & engine_args = args.engine_args; + + if (!(engine_args.size() == 1 || engine_args.size() == 2)) + throw Exception( + "Storage URL requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); + + String url = static_cast(*engine_args[0]).value.safeGet(); + Poco::URI uri(url); + + engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.local_context); + + String format_name = static_cast(*engine_args[1]).value.safeGet(); + + return StorageURL::create(uri, args.table_name, format_name, args.columns, args.context); + }); +} +} diff --git a/dbms/src/Storages/StorageURL.h b/dbms/src/Storages/StorageURL.h new file mode 100644 index 00000000000..80bec4e8d35 --- /dev/null +++ b/dbms/src/Storages/StorageURL.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +/** + * This class represents table engine for external urls. + * It sends HTTP GET to server when select is called and + * HTTP POST when insert is called. In POST request the data is send + * using Chunked transfer encoding, so server have to support it. + */ +class StorageURL : public ext::shared_ptr_helper, public IStorage +{ +public: + String getName() const override + { + return "URL"; + } + + String getTableName() const override + { + return table_name; + } + + BlockInputStreams read(const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override; + + void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; + +protected: + StorageURL(const Poco::URI & uri_, + const std::string & table_name_, + const String & format_name_, + const ColumnsDescription & columns_, + Context & context_); + +private: + Poco::URI uri; + String format_name; + String table_name; + Context & context_global; + + Logger * log = &Logger::get("StorageURL"); +}; +} diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index 651146eee99..182a3f8016a 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -17,6 +17,7 @@ void registerStorageBuffer(StorageFactory & factory); void registerStorageDistributed(StorageFactory & factory); void registerStorageMemory(StorageFactory & factory); void registerStorageFile(StorageFactory & factory); +void registerStorageURL(StorageFactory & factory); void registerStorageDictionary(StorageFactory & factory); void registerStorageSet(StorageFactory & factory); void registerStorageJoin(StorageFactory & factory); @@ -50,6 +51,7 @@ void registerStorages() registerStorageDistributed(factory); registerStorageMemory(factory); registerStorageFile(factory); + registerStorageURL(factory); registerStorageDictionary(factory); registerStorageSet(factory); registerStorageJoin(factory); diff --git a/dbms/src/TableFunctions/ITableFunctionFileLike.cpp b/dbms/src/TableFunctions/ITableFunctionFileLike.cpp new file mode 100644 index 00000000000..58fc1835403 --- /dev/null +++ b/dbms/src/TableFunctions/ITableFunctionFileLike.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & ast_function, const Context & context) const +{ + // Parse args + ASTs & args_func = typeid_cast(*ast_function).children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + + ASTs & args = typeid_cast(*args_func.at(0)).children; + + if (args.size() != 3) + throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: source, format and structure.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (size_t i = 0; i < 3; ++i) + args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); + + std::string source = static_cast(*args[0]).value.safeGet(); + std::string format = static_cast(*args[1]).value.safeGet(); + std::string structure = static_cast(*args[2]).value.safeGet(); + + // Create sample block + std::vector structure_vals; + boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); + + if (structure_vals.size() % 2 != 0) + throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); + + Block sample_block; + const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); + + for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) + { + ColumnWithTypeAndName column; + column.name = structure_vals[i]; + column.type = data_type_factory.get(structure_vals[i + 1]); + column.column = column.type->createColumn(); + sample_block.insert(std::move(column)); + } + + // Create table + StoragePtr storage = getStorage(source, format, sample_block, const_cast(context)); + + storage->startup(); + + return storage; +} +} diff --git a/dbms/src/TableFunctions/ITableFunctionFileLike.h b/dbms/src/TableFunctions/ITableFunctionFileLike.h new file mode 100644 index 00000000000..70637946808 --- /dev/null +++ b/dbms/src/TableFunctions/ITableFunctionFileLike.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +/* + * function(source, format, structure) - creates a temporary storage from formated source + */ +class ITableFunctionFileLike : public ITableFunction +{ +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; + virtual StoragePtr getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const = 0; +}; +} diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index e10ebfe2a50..89531096d35 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -1,78 +1,23 @@ -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include namespace DB { - namespace ErrorCodes - { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int DATABASE_ACCESS_DENIED; - } - - StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const - { - // Parse args - ASTs & args_func = typeid_cast(*ast_function).children; - - if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); - - ASTs & args = typeid_cast(*args_func.at(0)).children; - - if (args.size() != 3) - throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: path, format and structure.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < 3; ++i) - args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); - - std::string path = static_cast(*args[0]).value.safeGet(); - std::string format = static_cast(*args[1]).value.safeGet(); - std::string structure = static_cast(*args[2]).value.safeGet(); - - // Create sample block - std::vector structure_vals; - boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); - - if (structure_vals.size() % 2 != 0) - throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); - - Block sample_block; - const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - - for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) - { - ColumnWithTypeAndName column; - column.name = structure_vals[i]; - column.type = data_type_factory.get(structure_vals[i + 1]); - column.column = column.type->createColumn(); - sample_block.insert(std::move(column)); - } - - // Create table - StoragePtr storage = StorageFile::create( - path, -1, context.getUserFilesPath(), getName(), format, - ColumnsDescription{sample_block.getNamesAndTypesList()}, const_cast(context)); - - storage->startup(); - - return storage; - } - - - void registerTableFunctionFile(TableFunctionFactory & factory) - { - factory.registerFunction(); - } - +StoragePtr TableFunctionFile::getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const +{ + return StorageFile::create(source, + -1, + global_context.getUserFilesPath(), + getName(), + format, + ColumnsDescription{sample_block.getNamesAndTypesList()}, + global_context); +} + +void registerTableFunctionFile(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} } diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index d958a05937f..56cd5002ba1 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -1,25 +1,29 @@ #pragma once -#include +#include +#include +#include namespace DB { - /* file(path, format, structure) - creates a temporary storage from file * * * The file must be in the clickhouse data directory. * The relative path begins with the clickhouse data directory. */ - class TableFunctionFile : public ITableFunction +class TableFunctionFile : public ITableFunctionFileLike +{ +public: + static constexpr auto name = "file"; + std::string getName() const override { - public: - static constexpr auto name = "file"; - std::string getName() const override { return name; } - private: - StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; - }; - + return name; + } +private: + StoragePtr getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const override; +}; } diff --git a/dbms/src/TableFunctions/TableFunctionURL.cpp b/dbms/src/TableFunctions/TableFunctionURL.cpp new file mode 100644 index 00000000000..f33e5a92cb3 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionURL.cpp @@ -0,0 +1,19 @@ +#include +#include +#include +#include + +namespace DB +{ +StoragePtr TableFunctionURL::getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const +{ + Poco::URI uri(source); + return StorageURL::create(uri, getName(), format, ColumnsDescription{sample_block.getNamesAndTypesList()}, global_context); +} + +void registerTableFunctionURL(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/dbms/src/TableFunctions/TableFunctionURL.h b/dbms/src/TableFunctions/TableFunctionURL.h new file mode 100644 index 00000000000..edc9a9fa4c8 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionURL.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +/* url(source, format, structure) - creates a temporary storage from url + * + */ +class TableFunctionURL : public ITableFunctionFileLike +{ +public: + static constexpr auto name = "url"; + std::string getName() const override + { + return name; + } + +private: + StoragePtr getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const override; +}; +} diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 0858b44cbb0..8e2128fe908 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -12,6 +12,7 @@ void registerTableFunctionShardByHash(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); void registerTableFunctionFile(TableFunctionFactory & factory); +void registerTableFunctionURL(TableFunctionFactory & factory); #if USE_POCO_SQLODBC || USE_POCO_DATAODBC void registerTableFunctionODBC(TableFunctionFactory & factory); @@ -32,6 +33,7 @@ void registerTableFunctions() registerTableFunctionNumbers(factory); registerTableFunctionCatBoostPool(factory); registerTableFunctionFile(factory); + registerTableFunctionURL(factory); #if USE_POCO_SQLODBC || USE_POCO_DATAODBC registerTableFunctionODBC(factory); From eb83b2046b3ba14ef64b9973377ae4615c1477da Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 06:25:54 +0300 Subject: [PATCH 013/151] Fixed potential issue #1890 --- dbms/src/IO/CachedCompressedReadBuffer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/IO/CachedCompressedReadBuffer.cpp b/dbms/src/IO/CachedCompressedReadBuffer.cpp index 83eb6c23e28..a9f6a5d778c 100644 --- a/dbms/src/IO/CachedCompressedReadBuffer.cpp +++ b/dbms/src/IO/CachedCompressedReadBuffer.cpp @@ -48,20 +48,20 @@ bool CachedCompressedReadBuffer::nextImpl() if (owned_cell->compressed_size) { owned_cell->data.resize(size_decompressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); - decompress(owned_cell->data.m_data, size_decompressed, size_compressed_without_checksum); + decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum); /// Put data into cache. cache->set(key, owned_cell); } } - if (owned_cell->data.m_size == 0) + if (owned_cell->data.size() == 0) { owned_cell = nullptr; return false; } - working_buffer = Buffer(owned_cell->data.m_data, owned_cell->data.m_data + owned_cell->data.m_size); + working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); file_pos += owned_cell->compressed_size; From 2a30db6b44ff26564c6f7e3aee974fd00bb0a4ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 06:26:24 +0300 Subject: [PATCH 014/151] Whitespace [#CLICKHOUSE-2] --- dbms/src/Interpreters/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 1b41c92333f..c4d0d7654e6 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -90,7 +90,7 @@ struct Settings \ M(SettingUInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.") \ M(SettingUInt64, merge_tree_min_rows_for_seek, 0, "You can skip reading more than that number of rows at the price of one seek per file.") \ - M(SettingUInt64, merge_tree_coarse_index_granularity, 8, "If the index segment can contain the required keys, divide it into as many parts and recursively check them. ") \ + M(SettingUInt64, merge_tree_coarse_index_granularity, 8, "If the index segment can contain the required keys, divide it into as many parts and recursively check them.") \ M(SettingUInt64, merge_tree_max_rows_to_use_cache, (1024 * 1024), "The maximum number of rows per request, to use the cache of uncompressed data. If the request is large, the cache is not used. (For large queries not to flush out the cache.)") \ \ M(SettingBool, merge_tree_uniform_read_distribution, true, "Distribute read from MergeTree over threads evenly, ensuring stable average execution time of each thread within one read operation.") \ From d560db65f666fae90ee591693dc60e6852d0ba97 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 06:32:48 +0300 Subject: [PATCH 015/151] Fixed weird error #1890 --- dbms/src/IO/LZ4_decompress_faster.cpp | 112 +++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 4 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 25316fee5a6..adddb075fa7 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -52,6 +52,8 @@ inline void wildCopy8(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) { /// 4 % n. + /// Or if 4 % n is zero, we use n. + /// It gives equivalent result, but is better CPU friendly for unknown reason. static constexpr int shift1[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /// 8 % n - 4 % n @@ -67,11 +69,89 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) match += shift2[offset]; } + +/** We use 'xmm' (128bit SSE) registers here to shuffle 16 bytes. + * + * It is possible to use 'mm' (64bit MMX) registers to shuffle just 8 bytes as we need. + * + * There is corresponding version of 'pshufb' instruction that operates on 'mm' registers, + * (it operates on MMX registers although it is available in SSSE3) + * and compiler library has the corresponding intrinsic: '_mm_shuffle_pi8'. + * + * It can be done like this: + * + * unalignedStore(op, _mm_shuffle_pi8( + * unalignedLoad<__m64>(match), + * unalignedLoad<__m64>(masks + 8 * offset))); + * + * This is perfectly correct and this code have the same or even better performance. + * + * But if we write code this way, it will lead to + * extremely weird and extremely non obvious + * effects in completely unrelated parts of code. + * + * Because using MMX registers alters the mode of operation of x87 FPU, + * and then operations with FPU become broken. + * + * Example 1. + * Compile this code without optimizations: + * + #include + #include + #include + #include + + int main(int, char **) + { + [[maybe_unused]] __m64 shuffled = _mm_shuffle_pi8(__m64{}, __m64{}); + + std::vector vec; + std::unordered_set set(vec.begin(), vec.end()); + + std::cerr << set.size() << "\n"; + return 0; + } + + $ g++ -g -O0 -mssse3 -std=c++17 mmx_bug1.cpp && ./a.out + terminate called after throwing an instance of 'std::bad_alloc' + what(): std::bad_alloc + + Also reproduced with clang. But only with libstdc++, not with libc++. + + * Example 2. + + #include + #include + #include + + int main(int, char **) + { + double max_fill = 1; + + std::cerr << (long double)max_fill << "\n"; + [[maybe_unused]] __m64 shuffled = _mm_shuffle_pi8(__m64{}, __m64{}); + std::cerr << (long double)max_fill << "\n"; + + return 0; + } + + $ g++ -g -O0 -mssse3 -std=c++17 mmx_bug2.cpp && ./a.out + 1 + -nan + + * Explanation: + * + * https://stackoverflow.com/questions/33692969/assembler-mmx-errors + * https://software.intel.com/en-us/node/524274 + * + * Actually it's possible to use 'emms' instruction after decompression routine. + * But it's more easy to just use 'xmm' registers and avoid using 'mm' registers. + */ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) { #ifdef __SSSE3__ - static constexpr UInt8 __attribute__((__aligned__(8))) masks[] = + static constexpr UInt8 __attribute__((__aligned__(16))) masks[] = { 0, 1, 2, 2, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ @@ -81,11 +161,13 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o 0, 1, 2, 3, 4, 0, 1, 2, 0, 1, 2, 3, 4, 5, 0, 1, 0, 1, 2, 3, 4, 5, 6, 0, + 0, 0, 0, 0, 0, 0, 0, 0, /* this row is not used: padding to allow read 16 bytes starting at previous row */ }; - unalignedStore(op, _mm_shuffle_pi8( - unalignedLoad<__m64>(match), - unalignedLoad<__m64>(masks + 8 * offset))); + _mm_storeu_si128(reinterpret_cast<__m128i *>(op), + _mm_shuffle_epi8( + _mm_loadu_si128(reinterpret_cast(match)), + _mm_load_si128(reinterpret_cast(masks + 8 * offset)))); match += masks[offset]; @@ -225,6 +307,17 @@ void NO_INLINE decompressImpl( UInt8 * copy_end = op + length; + /// input: Hello, world + /// ^-ip + /// output: xyz + /// ^-op ^-copy_end + /// output: xyzHello, w + /// ^- excessive copied bytes due to "wildCopy" + /// input: Hello, world + /// ^-ip + /// output: xyzHello, w + /// ^-op (we will overwrite excessive bytes on next iteration) + wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. ip += length; @@ -256,6 +349,17 @@ void NO_INLINE decompressImpl( if (unlikely(offset < copy_amount)) { + /// output: Hello + /// ^-op + /// ^-match; offset = 5 + /// + /// output: Hello + /// [------] - copy_amount bytes + /// [------] - copy them here + /// + /// output: HelloHelloHel + /// ^-match ^-op + copyOverlap(op, match, offset); } else From 24b0f77a2cfbb3b910e720e9d38d2cea9f0354ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 06:33:26 +0300 Subject: [PATCH 016/151] Revert "Added ASLR to aid debugging #1890" This reverts commit 619225323dfea59f180c099441fe885b54310515. --- dbms/src/Common/Allocator.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dbms/src/Common/Allocator.cpp b/dbms/src/Common/Allocator.cpp index 74525a57c93..5c653a9a1c9 100644 --- a/dbms/src/Common/Allocator.cpp +++ b/dbms/src/Common/Allocator.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include @@ -61,10 +60,7 @@ void * Allocator::alloc(size_t size, size_t alignment) throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS); - /// ASLR by hand - void * address_hint = reinterpret_cast(0x100000000000UL + randomSeed() % 0x600000000000UL); - - buf = mmap(address_hint, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == buf) DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); From 777ace12b40fd5825e725d6c5b901b53f84e8031 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 06:44:53 +0300 Subject: [PATCH 017/151] Added test #1890 --- dbms/tests/queries/0_stateless/00646_weird_mmx.reference | 1 + dbms/tests/queries/0_stateless/00646_weird_mmx.sql | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00646_weird_mmx.reference create mode 100644 dbms/tests/queries/0_stateless/00646_weird_mmx.sql diff --git a/dbms/tests/queries/0_stateless/00646_weird_mmx.reference b/dbms/tests/queries/0_stateless/00646_weird_mmx.reference new file mode 100644 index 00000000000..9cf791085ad --- /dev/null +++ b/dbms/tests/queries/0_stateless/00646_weird_mmx.reference @@ -0,0 +1 @@ +4105 diff --git a/dbms/tests/queries/0_stateless/00646_weird_mmx.sql b/dbms/tests/queries/0_stateless/00646_weird_mmx.sql new file mode 100644 index 00000000000..f4af386c5d8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00646_weird_mmx.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test.weird_mmx; + +CREATE TABLE test.weird_mmx (x Array(UInt64)) ENGINE = TinyLog; +-- this triggers overlapping matches in LZ4 decompression routine; 915 is the minimum number +-- see comment in LZ4_decompression_faster.cpp about usage of MMX registers +INSERT INTO test.weird_mmx SELECT range(number % 10) FROM system.numbers LIMIT 915; +SELECT sum(length(*)) FROM test.weird_mmx; + +DROP TABLE test.weird_mmx; From cf94f609f69337b9302bbf155340beb8a5d8bb41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 07:18:01 +0300 Subject: [PATCH 018/151] Fixed error #1890 --- dbms/src/IO/LZ4_decompress_faster.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index adddb075fa7..fd52168982f 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -151,7 +151,7 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o { #ifdef __SSSE3__ - static constexpr UInt8 __attribute__((__aligned__(16))) masks[] = + static constexpr UInt8 __attribute__((__aligned__(8))) masks[] = { 0, 1, 2, 2, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ @@ -167,7 +167,7 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o _mm_storeu_si128(reinterpret_cast<__m128i *>(op), _mm_shuffle_epi8( _mm_loadu_si128(reinterpret_cast(match)), - _mm_load_si128(reinterpret_cast(masks + 8 * offset)))); + _mm_loadu_si128(reinterpret_cast(masks + 8 * offset)))); match += masks[offset]; From e3d5a2860e082a4a8746647ed4a804111dfe275c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 08:04:16 +0300 Subject: [PATCH 019/151] Updated LZ4 performance testing tool #1890 --- dbms/src/IO/LZ4_decompress_faster.h | 26 +++++++++++++---- utils/compressor/decompress_perf.cpp | 43 ++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.h b/dbms/src/IO/LZ4_decompress_faster.h index ecb7003c493..d05614b6831 100644 --- a/dbms/src/IO/LZ4_decompress_faster.h +++ b/dbms/src/IO/LZ4_decompress_faster.h @@ -71,7 +71,7 @@ struct PerformanceStatistics { ++count; - if (count > PerformanceStatistics::NUM_INVOCATIONS_TO_THROW_OFF) + if (count > NUM_INVOCATIONS_TO_THROW_OFF) sum += seconds / bytes; } @@ -93,6 +93,12 @@ struct PerformanceStatistics /// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account. static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2; + /// How to select method to run. + /// -1 - automatically, based on statistics (default); + /// 0..3 - always choose specified method (for performance testing); + /// -2 - choose methods in round robin fashion (for performance testing). + ssize_t choose_method = -1; + Element data[NUM_ELEMENTS]; pcg64 rng; @@ -101,12 +107,22 @@ struct PerformanceStatistics /// Sample random values from estimated normal distributions and choose the minimal. size_t select() { - double samples[NUM_ELEMENTS]; - for (size_t i = 0; i < NUM_ELEMENTS; ++i) - samples[i] = data[i].sample(rng); + if (choose_method < 0) + { + double samples[NUM_ELEMENTS]; + for (size_t i = 0; i < NUM_ELEMENTS; ++i) + samples[i] = choose_method == -1 + ? data[i].sample(rng) + : data[i].adjustedCount(); - return std::min_element(samples, samples + NUM_ELEMENTS) - samples; + return std::min_element(samples, samples + NUM_ELEMENTS) - samples; + } + else + return choose_method; } + + PerformanceStatistics() {} + PerformanceStatistics(ssize_t choose_method) : choose_method(choose_method) {} }; diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 1bea7387853..add6d4be56d 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -42,6 +42,11 @@ protected: /// Points to memory, holding compressed block. char * compressed_buffer = nullptr; + ssize_t variant; + + /// Variant for reference implementation of LZ4. + static constexpr ssize_t LZ4_REFERENCE = -3; + LZ4::StreamStatistics stream_stat; LZ4::PerformanceStatistics perf_stat; @@ -83,7 +88,7 @@ protected: } else { - own_compressed_buffer.resize(size_compressed + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + own_compressed_buffer.resize(size_compressed + variant == LZ4_REFERENCE ? 0 : LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); compressed_buffer = &own_compressed_buffer[0]; compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } @@ -98,7 +103,14 @@ protected: if (method == static_cast(CompressionMethodByte::LZ4)) { //LZ4::statistics(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed, stat); - LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat); + + if (variant == LZ4_REFERENCE) + { + if (LZ4_decompress_fast(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed) < 0) + throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS); + } + else + LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat); } else throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); @@ -106,8 +118,8 @@ protected: public: /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. - FasterCompressedReadBufferBase(ReadBuffer * in = nullptr) - : compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE) + FasterCompressedReadBufferBase(ReadBuffer * in, ssize_t variant) + : compressed_in(in), own_compressed_buffer(COMPRESSED_BLOCK_HEADER_SIZE), variant(variant), perf_stat(variant) { } @@ -138,8 +150,8 @@ private: } public: - FasterCompressedReadBuffer(ReadBuffer & in_) - : FasterCompressedReadBufferBase(&in_), BufferWithOwnMemory(0) + FasterCompressedReadBuffer(ReadBuffer & in_, ssize_t method) + : FasterCompressedReadBufferBase(&in_, method), BufferWithOwnMemory(0) { } }; @@ -147,21 +159,28 @@ public: } -int main(int, char **) +int main(int argc, char ** argv) try { using namespace DB; + /** -3 - use reference implementation of LZ4 + * -2 - run all algorithms in round robin fashion + * -1 - automatically detect best algorithm based on statistics + * 0..3 - run specified algorithm + */ + ssize_t variant = argc < 2 ? -1 : parse(argv[1]); + ReadBufferFromFileDescriptor in(STDIN_FILENO); - FasterCompressedReadBuffer decompressing_in(in); + FasterCompressedReadBuffer decompressing_in(in, variant); WriteBufferFromFileDescriptor out(STDOUT_FILENO); - HashingWriteBuffer hashing_out(out); +// HashingWriteBuffer hashing_out(out); Stopwatch watch; - copyData(decompressing_in, hashing_out); + copyData(decompressing_in, /*hashing_*/out); watch.stop(); - auto hash = hashing_out.getHash(); +// auto hash = hashing_out.getHash(); double seconds = watch.elapsedSeconds(); std::cerr << std::fixed << std::setprecision(3) @@ -171,7 +190,7 @@ try << ", ratio: " << static_cast(decompressing_in.count()) / in.count() << ", " << formatReadableSizeWithBinarySuffix(in.count() / seconds) << "/sec. compressed" << ", " << formatReadableSizeWithBinarySuffix(decompressing_in.count() / seconds) << "/sec. decompressed" - << ", checksum: " << hash.first << "_" << hash.second +// << ", checksum: " << hash.first << "_" << hash.second << "\n"; // decompressing_in.getStatistics().print(); From 755310d6ff939823cea509a50eacbebcd3ca3baf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jun 2018 08:06:23 +0300 Subject: [PATCH 020/151] Updated LZ4 performance testing tool #1890 --- utils/compressor/decompress_perf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index add6d4be56d..08ff8e91c4c 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -88,7 +88,7 @@ protected: } else { - own_compressed_buffer.resize(size_compressed + variant == LZ4_REFERENCE ? 0 : LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); + own_compressed_buffer.resize(size_compressed + (variant == LZ4_REFERENCE ? 0 : LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER)); compressed_buffer = &own_compressed_buffer[0]; compressed_in->readStrict(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } From ceef414cb85d41aa668fda095a90137ce4df5092 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 12 Jun 2018 18:59:43 +0300 Subject: [PATCH 021/151] Add simple tests for table function url and table function engine --- .../0_stateless/00646_url_engine.python | 171 ++++++++++++++++++ .../0_stateless/00646_url_engine.reference | 1 + .../queries/0_stateless/00646_url_engine.sh | 8 + 3 files changed, 180 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00646_url_engine.python create mode 100644 dbms/tests/queries/0_stateless/00646_url_engine.reference create mode 100755 dbms/tests/queries/0_stateless/00646_url_engine.sh diff --git a/dbms/tests/queries/0_stateless/00646_url_engine.python b/dbms/tests/queries/0_stateless/00646_url_engine.python new file mode 100644 index 00000000000..488c929a210 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00646_url_engine.python @@ -0,0 +1,171 @@ +#!/usr/bin/env python +from __future__ import print_function +import csv +import tempfile +import threading +import os, urllib +from io import StringIO +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer + + +SERVER_ADDRESS = ('127.0.0.1', 51234) +SERVER_ADDRESS_STR = 'http://' + ':'.join(str(s) for s in SERVER_ADDRESS) + "/" +CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) + + +def get_ch_answer(query): + return urllib.urlopen(os.environ.get('CLICKHOUSE_URL', 'http://localhost:' + os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')), data=query).read() + +def check_answers(query, answer): + ch_answer = get_ch_answer(query) + if ch_answer.strip() != answer.strip(): + print("FAIL on query:", query) + print("Expected answer:", answer) + print("Fetched answer :", ch_answer) + raise Exception("Fail on query") + +class CSVHTTPServer(BaseHTTPRequestHandler): + def _set_headers(self): + self.send_response(200) + self.send_header('Content-type', 'text/csv') + self.end_headers() + + def do_GET(self): + self._set_headers() + with open(CSV_DATA, 'r') as fl: + reader = csv.reader(fl, delimiter=',') + for row in reader: + self.wfile.write(', '.join(row) + '\n') + return + + def read_chunk(self): + msg = '' + while True: + sym = self.rfile.read(1) + if sym == '': + break + msg += sym.decode('utf-8') + if msg.endswith('\r\n'): + break + length = int(msg[:-2], 16) + if length == 0: + return '' + content = self.rfile.read(length) + self.rfile.read(2) # read sep \r\n + return content.decode('utf-8') + + def do_POST(self): + data = '' + while True: + chunk = self.read_chunk() + if not chunk: + break + data += chunk + text = "" + with StringIO(data) as fl: + reader = csv.reader(fl, delimiter=',') + with open(CSV_DATA, 'a') as d: + for row in reader: + d.write(','.join(row) + '\n') + self._set_headers() + self.wfile.write("ok") + + def log_message(self, format, *args): + return + +def start_server(requests_amount): + httpd = HTTPServer(SERVER_ADDRESS, CSVHTTPServer) + + def real_func(): + for i in xrange(requests_amount): + httpd.handle_request() + + t = threading.Thread(target=real_func) + return t + +# test section + +def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): + with open(CSV_DATA, 'w') as f: # clear file + f.write('') + + if test_data: + with open(CSV_DATA, 'w') as f: + f.write(test_data + "\n") + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, SERVER_ADDRESS_STR)) + + for i in xrange(len(requests)): + tbl = table_name + if not tbl: + tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=SERVER_ADDRESS_STR, schema=schema) + check_answers(requests[i].format(tbl=tbl), answers[i]) + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + +def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): + with open(CSV_DATA, 'w') as f: # flush test file + f.write('') + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, SERVER_ADDRESS_STR)) + + for req in requests_insert: + tbl = table_name + if not tbl: + tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=SERVER_ADDRESS_STR, schema=schema) + get_ch_answer(req.format(tbl=tbl)) + + + for i in xrange(len(requests_select)): + tbl = table_name + if not tbl: + tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=SERVER_ADDRESS_STR, schema=schema) + check_answers(requests_select[i].format(tbl=tbl), answers[i]) + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + + +def main(): + test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" + select_table_input = { + "select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'), + "select numuint, count(*) from {tbl} group by numuint" : "2\t2", + "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'), + } + + insert_requests = [ + "insert into {tbl} values('Hello',10,-2,7.7)('World',10,-5,7.7)", + "insert into {tbl} select 'Buy', number, 9-number, 9.9 from system.numbers limit 10", + ] + + select_requests = { + "select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in xrange(11)]), + "select count(*) from {tbl}": '12', + 'select double, count(*) from {tbl} group by double': "7.7\t2\n9.9\t10" + } + + t = start_server(len(select_table_input) * 2 + (len(insert_requests) + len(select_requests)) * 2) + t.start() + # test table with url engine + test_select(table_name="test_table_select", requests=select_table_input.keys(), answers=select_table_input.values(), test_data=test_data) + # test table function url + test_select(requests=select_table_input.keys(), answers=select_table_input.values(), test_data=test_data) + #test insert into table with url engine + test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=select_requests.keys(), answers=select_requests.values()) + #test insert into table function url + test_insert(requests_insert=insert_requests, requests_select=select_requests.keys(), answers=select_requests.values()) + t.join() + print("PASSED") + + +if __name__ == "__main__": + try: + main() + except: + os._exit(1) diff --git a/dbms/tests/queries/0_stateless/00646_url_engine.reference b/dbms/tests/queries/0_stateless/00646_url_engine.reference new file mode 100644 index 00000000000..53cdf1e9393 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00646_url_engine.reference @@ -0,0 +1 @@ +PASSED diff --git a/dbms/tests/queries/0_stateless/00646_url_engine.sh b/dbms/tests/queries/0_stateless/00646_url_engine.sh new file mode 100755 index 00000000000..e218a41c28b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00646_url_engine.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python $CURDIR/00646_url_engine.python From 7f294007cbdc2df819ca1e58c4409eec7926e82f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 03:51:23 +0300 Subject: [PATCH 022/151] Removed string size overestimation #2428 --- dbms/src/Columns/ColumnString.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index ecb3bc6f62e..5ba5d49ed81 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -5,9 +5,6 @@ #include #include -/// Used in the `reserve` method, when the number of rows is known, but sizes of elements are not. -#define APPROX_STRING_SIZE 64 - namespace DB { @@ -260,7 +257,6 @@ void ColumnString::gather(ColumnGathererStream & gatherer) void ColumnString::reserve(size_t n) { offsets.reserve(n); - chars.reserve(n * APPROX_STRING_SIZE); } From a4939b9a98940553eb8290e401546c89463cea6b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 04:44:13 +0300 Subject: [PATCH 023/151] Updated performance testing tool #1890 --- utils/compressor/decompress_perf.cpp | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 08ff8e91c4c..30318269d9a 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -197,17 +198,34 @@ try LZ4::PerformanceStatistics perf_stat = decompressing_in.getPerformanceStatistics(); + std::optional best_variant; + double best_variant_mean = 0; + for (size_t i = 0; i < LZ4::PerformanceStatistics::NUM_ELEMENTS; ++i) { const LZ4::PerformanceStatistics::Element & elem = perf_stat.data[i]; - std::cerr << "Variant " << i << ": " - << "count: " << elem.count - << ", mean ns/b: " << 1000000000.0 * elem.mean() - << ", sigma ns/b: " << 1000000000.0 * elem.sigma() - << "\n"; + if (elem.count) + { + double mean = elem.mean(); + + std::cerr << "Variant " << i << ": " + << "count: " << elem.count + << ", mean ns/b: " << 1000000000.0 * mean << " (" << formatReadableSizeWithBinarySuffix(1 / mean) << ")" + << ", sigma ns/b: " << 1000000000.0 * elem.sigma() + << "\n"; + + if (!best_variant || mean < best_variant_mean) + { + best_variant_mean = mean; + best_variant = i; + } + } } + if (best_variant) + std::cerr << "Best variant: " << *best_variant << "\n"; + return 0; } catch (...) From fd7949a5c6f16a320b8acc3a0e126ada6dc6a8fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 04:46:23 +0300 Subject: [PATCH 024/151] Updated performance testing tool #1890 --- utils/compressor/decompress_perf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 30318269d9a..a40ada173d2 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -211,7 +211,7 @@ try std::cerr << "Variant " << i << ": " << "count: " << elem.count - << ", mean ns/b: " << 1000000000.0 * mean << " (" << formatReadableSizeWithBinarySuffix(1 / mean) << ")" + << ", mean ns/b: " << 1000000000.0 * mean << " (" << formatReadableSizeWithBinarySuffix(1 / mean) << "/sec.)" << ", sigma ns/b: " << 1000000000.0 * elem.sigma() << "\n"; From 4c8f2eb625fc3e99f9154b9620c943d22a141598 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 04:50:33 +0300 Subject: [PATCH 025/151] Updated performance testing tool #1890 --- utils/compressor/decompress_perf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index a40ada173d2..a3dd8481e5d 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -174,11 +174,13 @@ try ReadBufferFromFileDescriptor in(STDIN_FILENO); FasterCompressedReadBuffer decompressing_in(in, variant); - WriteBufferFromFileDescriptor out(STDOUT_FILENO); +// WriteBufferFromFileDescriptor out(STDOUT_FILENO); // HashingWriteBuffer hashing_out(out); Stopwatch watch; - copyData(decompressing_in, /*hashing_*/out); +// copyData(decompressing_in, /*hashing_*/out); + while (!in.eof()) + in.next(); watch.stop(); // auto hash = hashing_out.getHash(); From 880807da5d7514b2fa4933575380c71c1171c368 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 04:51:15 +0300 Subject: [PATCH 026/151] Updated performance testing tool #1890 --- utils/compressor/decompress_perf.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index a3dd8481e5d..4631c850a72 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -179,8 +179,8 @@ try Stopwatch watch; // copyData(decompressing_in, /*hashing_*/out); - while (!in.eof()) - in.next(); + while (!decompressing_in.eof()) + decompressing_in.next(); watch.stop(); // auto hash = hashing_out.getHash(); From 37ffa4c948dabfa488a63bb0ed3e25ff9915fb73 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 04:52:52 +0300 Subject: [PATCH 027/151] Updated performance testing tool #1890 --- utils/compressor/decompress_perf.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 4631c850a72..0c284c5e378 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -180,7 +180,10 @@ try Stopwatch watch; // copyData(decompressing_in, /*hashing_*/out); while (!decompressing_in.eof()) + { + decompressing_in.position() = decompressing_in.buffer().end(); decompressing_in.next(); + } watch.stop(); // auto hash = hashing_out.getHash(); From 397ac0fb245dd2aaa2db9d5818f3a7aa09ba2f56 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 05:52:03 +0300 Subject: [PATCH 028/151] Updated performance testing tool #1890 --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/IO/MMapReadBufferFromFile.cpp | 72 +++++++++++++++ dbms/src/IO/MMapReadBufferFromFile.h | 37 ++++++++ .../IO/MMapReadBufferFromFileDescriptor.cpp | 90 +++++++++++++++++++ .../src/IO/MMapReadBufferFromFileDescriptor.h | 39 ++++++++ dbms/src/IO/ReadBufferFromFileBase.h | 2 +- utils/compressor/decompress_perf.cpp | 4 +- 7 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 dbms/src/IO/MMapReadBufferFromFile.cpp create mode 100644 dbms/src/IO/MMapReadBufferFromFile.h create mode 100644 dbms/src/IO/MMapReadBufferFromFileDescriptor.cpp create mode 100644 dbms/src/IO/MMapReadBufferFromFileDescriptor.h diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 79301034abc..21e5f65cda3 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -374,6 +374,7 @@ namespace ErrorCodes extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW = 397; extern const int UNKNOWN_MUTATION_COMMAND = 398; extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT = 399; + extern const int CANNOT_STAT = 400; extern const int KEEPER_EXCEPTION = 999; diff --git a/dbms/src/IO/MMapReadBufferFromFile.cpp b/dbms/src/IO/MMapReadBufferFromFile.cpp new file mode 100644 index 00000000000..75968013302 --- /dev/null +++ b/dbms/src/IO/MMapReadBufferFromFile.cpp @@ -0,0 +1,72 @@ +#include +#include + +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event FileOpen; + extern const Event FileOpenFailed; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; +} + + +void MMapReadBufferFromFile::open(const std::string & file_name) +{ + ProfileEvents::increment(ProfileEvents::FileOpen); + + fd = ::open(file_name.c_str(), O_RDONLY); + + if (-1 == fd) + { + ProfileEvents::increment(ProfileEvents::FileOpenFailed); + throwFromErrno("Cannot open file " + file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + } +} + + +MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name, size_t offset, size_t length) +{ + open(file_name); + init(fd, offset, length); +} + + +MMapReadBufferFromFile::MMapReadBufferFromFile(const std::string & file_name, size_t offset) +{ + open(file_name); + init(fd, offset); +} + + +MMapReadBufferFromFile::~MMapReadBufferFromFile() +{ + if (fd != -1) + close(); /// Exceptions will lead to std::terminate and that's Ok. +} + + +void MMapReadBufferFromFile::close() +{ + finish(); + + if (0 != ::close(fd)) + throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + + fd = -1; + metric_increment.destroy(); +} + +} diff --git a/dbms/src/IO/MMapReadBufferFromFile.h b/dbms/src/IO/MMapReadBufferFromFile.h new file mode 100644 index 00000000000..c1762bd54f5 --- /dev/null +++ b/dbms/src/IO/MMapReadBufferFromFile.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + + +namespace CurrentMetrics +{ + extern const Metric OpenFileForRead; +} + + +namespace DB +{ + +class MMapReadBufferFromFile : public MMapReadBufferFromFileDescriptor +{ +public: + MMapReadBufferFromFile(const std::string & file_name, size_t offset, size_t length); + + /// Map till end of file. + MMapReadBufferFromFile(const std::string & file_name, size_t offset); + + ~MMapReadBufferFromFile() override; + + void close(); + +private: + int fd = -1; + + CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; + + void open(const std::string & file_name); +}; + +} + diff --git a/dbms/src/IO/MMapReadBufferFromFileDescriptor.cpp b/dbms/src/IO/MMapReadBufferFromFileDescriptor.cpp new file mode 100644 index 00000000000..4643b9b626c --- /dev/null +++ b/dbms/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -0,0 +1,90 @@ +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int CANNOT_MUNMAP; + extern const int CANNOT_STAT; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +void MMapReadBufferFromFileDescriptor::init(int fd_, size_t offset, size_t length_) +{ + fd = fd_; + length = length_; + + if (length) + { + void * buf = mmap(nullptr, length, PROT_READ, MAP_PRIVATE, fd, offset); + if (MAP_FAILED == buf) + throwFromErrno("MMapReadBufferFromFileDescriptor: Cannot mmap " + formatReadableSizeWithBinarySuffix(length) + ".", + ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + BufferBase::set(static_cast(buf), length, 0); + } +} + +void MMapReadBufferFromFileDescriptor::init(int fd_, size_t offset) +{ + fd = fd_; + + struct stat stat_res {}; + if (0 != fstat(fd, &stat_res)) + throwFromErrno("MMapReadBufferFromFileDescriptor: Cannot fstat.", ErrorCodes::CANNOT_STAT); + + off_t file_size = stat_res.st_size; + + if (file_size < 0) + throw Exception("MMapReadBufferFromFileDescriptor: fstat returned negative file size", ErrorCodes::LOGICAL_ERROR); + + if (offset > static_cast(file_size)) + throw Exception("MMapReadBufferFromFileDescriptor: requested offset is greater than file size", ErrorCodes::BAD_ARGUMENTS); + + init(fd, offset, file_size - offset); +} + + +MMapReadBufferFromFileDescriptor::MMapReadBufferFromFileDescriptor(int fd, size_t offset, size_t length) + : MMapReadBufferFromFileDescriptor() +{ + init(fd, offset, length); +} + + +MMapReadBufferFromFileDescriptor::MMapReadBufferFromFileDescriptor(int fd, size_t offset) + : MMapReadBufferFromFileDescriptor() +{ + init(fd, offset); +} + + +MMapReadBufferFromFileDescriptor::~MMapReadBufferFromFileDescriptor() +{ + if (length) + finish(); /// Exceptions will lead to std::terminate and that's Ok. +} + + +void MMapReadBufferFromFileDescriptor::finish() +{ + if (0 != munmap(internalBuffer().begin(), length)) + throwFromErrno("MMapReadBufferFromFileDescriptor: Cannot munmap " + formatReadableSizeWithBinarySuffix(length) + ".", + ErrorCodes::CANNOT_MUNMAP); + + length = 0; +} + +} diff --git a/dbms/src/IO/MMapReadBufferFromFileDescriptor.h b/dbms/src/IO/MMapReadBufferFromFileDescriptor.h new file mode 100644 index 00000000000..90bfe640df6 --- /dev/null +++ b/dbms/src/IO/MMapReadBufferFromFileDescriptor.h @@ -0,0 +1,39 @@ +#pragma once + +#include + + +namespace DB +{ + +/** MMap range in a file and represent it as a ReadBuffer. + * Please note that mmap is not always the optimal way to read file. + * Also you cannot control whether and how long actual IO take place, + * so this method is not manageable and not recommended for anything except benchmarks. + */ +class MMapReadBufferFromFileDescriptor : public ReadBuffer +{ +protected: + MMapReadBufferFromFileDescriptor() : ReadBuffer(nullptr, 0) {}; + + void init(int fd_, size_t offset, size_t length_); + void init(int fd_, size_t offset); + +public: + MMapReadBufferFromFileDescriptor(int fd, size_t offset, size_t length); + + /// Map till end of file. + MMapReadBufferFromFileDescriptor(int fd, size_t offset); + + ~MMapReadBufferFromFileDescriptor() override; + + /// unmap memory before call to destructor + void finish(); + +private: + size_t length = 0; + int fd = -1; +}; + +} + diff --git a/dbms/src/IO/ReadBufferFromFileBase.h b/dbms/src/IO/ReadBufferFromFileBase.h index dbbc667edff..bdadc0ffe68 100644 --- a/dbms/src/IO/ReadBufferFromFileBase.h +++ b/dbms/src/IO/ReadBufferFromFileBase.h @@ -19,7 +19,7 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory public: ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); ReadBufferFromFileBase(ReadBufferFromFileBase &&) = default; - virtual ~ReadBufferFromFileBase(); + ~ReadBufferFromFileBase() override; off_t seek(off_t off, int whence = SEEK_SET); virtual off_t getPositionInFile() = 0; virtual std::string getFileName() const = 0; diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 0c284c5e378..e6234980b7d 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -172,7 +173,8 @@ try */ ssize_t variant = argc < 2 ? -1 : parse(argv[1]); - ReadBufferFromFileDescriptor in(STDIN_FILENO); + MMapReadBufferFromFileDescriptor in(STDIN_FILENO, 0); +// ReadBufferFromFileDescriptor in(STDIN_FILENO); FasterCompressedReadBuffer decompressing_in(in, variant); // WriteBufferFromFileDescriptor out(STDOUT_FILENO); // HashingWriteBuffer hashing_out(out); From 3d8455de0083c678cee4760cd8c3c98cfa1cd05b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 09:04:04 +0300 Subject: [PATCH 029/151] Updated performance testing tool: output single number #1890 --- utils/compressor/decompress_perf.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index e6234980b7d..259d812fb82 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -188,6 +188,11 @@ try } watch.stop(); + std::cout << std::fixed << std::setprecision(3) + << watch.elapsed() * 1000 / decompressing_in.count() + << '\n'; + +/* // auto hash = hashing_out.getHash(); double seconds = watch.elapsedSeconds(); @@ -232,6 +237,7 @@ try if (best_variant) std::cerr << "Best variant: " << *best_variant << "\n"; +*/ return 0; } From b42351d36c921f8319cabbae1f57246aceebb876 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Jun 2018 10:00:07 +0300 Subject: [PATCH 030/151] Fix format in storage url --- dbms/src/Storages/StorageURL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 6bce66a6711..f0bd6e8971c 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -138,7 +138,7 @@ BlockInputStreams StorageURL::read(const Names & /*column_names*/, ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()))}; } - void StorageURL::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {} +void StorageURL::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {} BlockOutputStreamPtr StorageURL::write(const ASTPtr & /*query*/, const Settings & /*settings*/) { From 567bbb2bbbb9315c419f563c517817ed2dde52c6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Jun 2018 10:36:47 +0300 Subject: [PATCH 031/151] Fix header path and small fixes in tests --- dbms/src/Storages/StorageURL.cpp | 8 +++++--- .../queries/0_stateless/00646_url_engine.python | 15 ++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index f0bd6e8971c..2a0b6dfc9fe 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -9,7 +9,9 @@ #include #include -#include +#include + +#include #include #include @@ -47,7 +49,7 @@ namespace { read_buf = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_GET, nullptr, timeouts); - reader = FormatFactory().getInput(format, *read_buf, sample_block, context, max_block_size); + reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } ~StorageURLBlockInputStream() override {} @@ -105,7 +107,7 @@ namespace { ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr) { WriteBufferFromOStream out_buffer(ostr); - auto writer = FormatFactory().getOutput(format, out_buffer, sample_block, global_context); + auto writer = FormatFactory::instance().getOutput(format, out_buffer, sample_block, global_context); writer->writePrefix(); writer->write(block); writer->writeSuffix(); diff --git a/dbms/tests/queries/0_stateless/00646_url_engine.python b/dbms/tests/queries/0_stateless/00646_url_engine.python index 488c929a210..fed6b2a4d84 100644 --- a/dbms/tests/queries/0_stateless/00646_url_engine.python +++ b/dbms/tests/queries/0_stateless/00646_url_engine.python @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import print_function import csv +import sys import tempfile import threading import os, urllib @@ -19,9 +20,9 @@ def get_ch_answer(query): def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): - print("FAIL on query:", query) - print("Expected answer:", answer) - print("Fetched answer :", ch_answer) + print("FAIL on query:", query, file=sys.stderr) + print("Expected answer:", answer, file=sys.stderr) + print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") class CSVHTTPServer(BaseHTTPRequestHandler): @@ -133,7 +134,7 @@ def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,do def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" - select_table_input = { + select_only_requests = { "select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'), "select numuint, count(*) from {tbl} group by numuint" : "2\t2", "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'), @@ -150,12 +151,12 @@ def main(): 'select double, count(*) from {tbl} group by double': "7.7\t2\n9.9\t10" } - t = start_server(len(select_table_input) * 2 + (len(insert_requests) + len(select_requests)) * 2) + t = start_server(len(select_only_requests) * 2 + (len(insert_requests) + len(select_requests)) * 2) t.start() # test table with url engine - test_select(table_name="test_table_select", requests=select_table_input.keys(), answers=select_table_input.values(), test_data=test_data) + test_select(table_name="test_table_select", requests=select_only_requests.keys(), answers=select_only_requests.values(), test_data=test_data) # test table function url - test_select(requests=select_table_input.keys(), answers=select_table_input.values(), test_data=test_data) + test_select(requests=select_only_requests.keys(), answers=select_only_requests.values(), test_data=test_data) #test insert into table with url engine test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=select_requests.keys(), answers=select_requests.values()) #test insert into table function url From 14947b494f543a32b664ee6135628a1de408f16a Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Sat, 9 Jun 2018 18:53:14 +0300 Subject: [PATCH 032/151] make ASTAlterQuery::Parameters a real AST element ASTAlterCommand [#CLICKHOUSE-3747] --- dbms/src/Interpreters/DDLWorker.cpp | 22 +- .../Interpreters/InterpreterAlterQuery.cpp | 60 +-- dbms/src/Interpreters/InterpreterAlterQuery.h | 2 +- dbms/src/Parsers/ASTAlterQuery.cpp | 277 ++++++------ dbms/src/Parsers/ASTAlterQuery.h | 132 +++--- dbms/src/Parsers/ParserAlterQuery.cpp | 394 ++++++++++-------- dbms/src/Parsers/ParserAlterQuery.h | 17 + 7 files changed, 501 insertions(+), 403 deletions(-) diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 7fed4201b8c..1d46d33ac26 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -207,12 +207,12 @@ static std::unique_ptr createSimpleZooKeeperLock( static bool isSupportedAlterType(int type) { static const std::unordered_set supported_alter_types{ - ASTAlterQuery::ADD_COLUMN, - ASTAlterQuery::DROP_COLUMN, - ASTAlterQuery::MODIFY_COLUMN, - ASTAlterQuery::MODIFY_PRIMARY_KEY, - ASTAlterQuery::DROP_PARTITION, - ASTAlterQuery::DELETE, + ASTAlterCommand::ADD_COLUMN, + ASTAlterCommand::DROP_COLUMN, + ASTAlterCommand::MODIFY_COLUMN, + ASTAlterCommand::MODIFY_PRIMARY_KEY, + ASTAlterCommand::DROP_PARTITION, + ASTAlterCommand::DELETE, }; return supported_alter_types.count(type) != 0; @@ -621,13 +621,13 @@ void DDLWorker::processTaskAlter( bool execute_once_on_replica = storage->supportsReplication(); bool execute_on_leader_replica = false; - for (const auto & param : ast_alter->parameters) + for (const auto & command : ast_alter->command_list->commands) { - if (!isSupportedAlterType(param.type)) + if (!isSupportedAlterType(command->type)) throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED); if (execute_once_on_replica) - execute_on_leader_replica |= param.type == ASTAlterQuery::DROP_PARTITION; + execute_on_leader_replica |= command->type == ASTAlterCommand::DROP_PARTITION; } const auto & shard_info = task.cluster->getShardsInfo().at(task.host_shard_num); @@ -1142,9 +1142,9 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont if (auto query_alter = dynamic_cast(query_ptr.get())) { - for (const auto & param : query_alter->parameters) + for (const auto & command : query_alter->command_list->commands) { - if (!isSupportedAlterType(param.type)) + if (!isSupportedAlterType(command->type)) throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED); } } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 8934ef8f9eb..785edd2e229 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -51,7 +51,7 @@ BlockIO InterpreterAlterQuery::execute() AlterCommands alter_commands; PartitionCommands partition_commands; MutationCommands mutation_commands; - parseAlter(alter.parameters, alter_commands, partition_commands, mutation_commands); + parseAlter(alter.command_list->commands, alter_commands, partition_commands, mutation_commands); if (!mutation_commands.commands.empty()) { @@ -104,21 +104,21 @@ BlockIO InterpreterAlterQuery::execute() } void InterpreterAlterQuery::parseAlter( - const ASTAlterQuery::ParameterContainer & params_container, + const std::vector & command_asts, AlterCommands & out_alter_commands, PartitionCommands & out_partition_commands, MutationCommands & out_mutation_commands) { const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - for (const auto & params : params_container) + for (const auto & command_ast : command_asts) { - if (params.type == ASTAlterQuery::ADD_COLUMN) + if (command_ast->type == ASTAlterCommand::ADD_COLUMN) { AlterCommand command; command.type = AlterCommand::ADD_COLUMN; - const auto & ast_col_decl = typeid_cast(*params.col_decl); + const auto & ast_col_decl = typeid_cast(*command_ast->col_decl); command.column_name = ast_col_decl.name; if (ast_col_decl.type) @@ -131,40 +131,40 @@ void InterpreterAlterQuery::parseAlter( command.default_expression = ast_col_decl.default_expression; } - if (params.column) - command.after_column = typeid_cast(*params.column).name; + if (command_ast->column) + command.after_column = typeid_cast(*command_ast->column).name; out_alter_commands.emplace_back(std::move(command)); } - else if (params.type == ASTAlterQuery::DROP_COLUMN) + else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) { - if (params.partition) + if (command_ast->partition) { - if (!params.clear_column) + if (!command_ast->clear_column) throw Exception("Can't DROP COLUMN from partition. It is possible only CLEAR COLUMN in partition", ErrorCodes::BAD_ARGUMENTS); - const Field & column_name = typeid_cast(*(params.column)).name; + const Field & column_name = typeid_cast(*(command_ast->column)).name; - out_partition_commands.emplace_back(PartitionCommand::clearColumn(params.partition, column_name)); + out_partition_commands.emplace_back(PartitionCommand::clearColumn(command_ast->partition, column_name)); } else { - if (params.clear_column) + if (command_ast->clear_column) throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); AlterCommand command; command.type = AlterCommand::DROP_COLUMN; - command.column_name = typeid_cast(*(params.column)).name; + command.column_name = typeid_cast(*(command_ast->column)).name; out_alter_commands.emplace_back(std::move(command)); } } - else if (params.type == ASTAlterQuery::MODIFY_COLUMN) + else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) { AlterCommand command; command.type = AlterCommand::MODIFY_COLUMN; - const auto & ast_col_decl = typeid_cast(*params.col_decl); + const auto & ast_col_decl = typeid_cast(*command_ast->col_decl); command.column_name = ast_col_decl.name; if (ast_col_decl.type) @@ -180,37 +180,37 @@ void InterpreterAlterQuery::parseAlter( out_alter_commands.emplace_back(std::move(command)); } - else if (params.type == ASTAlterQuery::MODIFY_PRIMARY_KEY) + else if (command_ast->type == ASTAlterCommand::MODIFY_PRIMARY_KEY) { AlterCommand command; command.type = AlterCommand::MODIFY_PRIMARY_KEY; - command.primary_key = params.primary_key; + command.primary_key = command_ast->primary_key; out_alter_commands.emplace_back(std::move(command)); } - else if (params.type == ASTAlterQuery::DROP_PARTITION) + else if (command_ast->type == ASTAlterCommand::DROP_PARTITION) { - out_partition_commands.emplace_back(PartitionCommand::dropPartition(params.partition, params.detach)); + out_partition_commands.emplace_back(PartitionCommand::dropPartition(command_ast->partition, command_ast->detach)); } - else if (params.type == ASTAlterQuery::ATTACH_PARTITION) + else if (command_ast->type == ASTAlterCommand::ATTACH_PARTITION) { - out_partition_commands.emplace_back(PartitionCommand::attachPartition(params.partition, params.part)); + out_partition_commands.emplace_back(PartitionCommand::attachPartition(command_ast->partition, command_ast->part)); } - else if (params.type == ASTAlterQuery::REPLACE_PARTITION) + else if (command_ast->type == ASTAlterCommand::REPLACE_PARTITION) { out_partition_commands.emplace_back( - PartitionCommand::replacePartition(params.partition, params.replace, params.from_database, params.from_table)); + PartitionCommand::replacePartition(command_ast->partition, command_ast->replace, command_ast->from_database, command_ast->from_table)); } - else if (params.type == ASTAlterQuery::FETCH_PARTITION) + else if (command_ast->type == ASTAlterCommand::FETCH_PARTITION) { - out_partition_commands.emplace_back(PartitionCommand::fetchPartition(params.partition, params.from)); + out_partition_commands.emplace_back(PartitionCommand::fetchPartition(command_ast->partition, command_ast->from)); } - else if (params.type == ASTAlterQuery::FREEZE_PARTITION) + else if (command_ast->type == ASTAlterCommand::FREEZE_PARTITION) { - out_partition_commands.emplace_back(PartitionCommand::freezePartition(params.partition, params.with_name)); + out_partition_commands.emplace_back(PartitionCommand::freezePartition(command_ast->partition, command_ast->with_name)); } - else if (params.type == ASTAlterQuery::DELETE) + else if (command_ast->type == ASTAlterCommand::DELETE) { - out_mutation_commands.commands.emplace_back(MutationCommand::delete_(params.predicate)); + out_mutation_commands.commands.emplace_back(MutationCommand::delete_(command_ast->predicate)); } else throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.h b/dbms/src/Interpreters/InterpreterAlterQuery.h index ea9fe925a4a..31b89f74864 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.h +++ b/dbms/src/Interpreters/InterpreterAlterQuery.h @@ -123,7 +123,7 @@ private: const Context & context; - static void parseAlter(const ASTAlterQuery::ParameterContainer & params, + static void parseAlter(const std::vector & commands, AlterCommands & out_alter_commands, PartitionCommands & out_partition_commands, MutationCommands & out_mutation_commands); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 11c21ff134a..cbf1c934b05 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -10,45 +10,172 @@ namespace ErrorCodes extern const int UNEXPECTED_AST_STRUCTURE; } -ASTAlterQuery::Parameters::Parameters() {} - -void ASTAlterQuery::Parameters::clone(Parameters & p) const +ASTPtr ASTAlterCommand::clone() const { - p = *this; - if (col_decl) p.col_decl = col_decl->clone(); - if (column) p.column = column->clone(); - if (primary_key) p.primary_key = primary_key->clone(); - if (partition) p.partition = partition->clone(); - if (predicate) p.predicate = predicate->clone(); + auto res = std::make_shared(*this); + res->children.clear(); + + if (col_decl) + { + res->col_decl = col_decl->clone(); + res->children.push_back(res->col_decl); + } + if (column) + { + res->column = column->clone(); + res->children.push_back(res->column); + } + if (primary_key) + { + res->primary_key = primary_key->clone(); + res->children.push_back(res->primary_key); + } + if (partition) + { + res->partition = partition->clone(); + res->children.push_back(res->partition); + } + if (predicate) + { + res->predicate = predicate->clone(); + res->children.push_back(res->predicate); + } + + return res; } -void ASTAlterQuery::addParameters(const Parameters & params) +void ASTAlterCommand::formatImpl( + const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - parameters.push_back(params); - if (params.col_decl) - children.push_back(params.col_decl); - if (params.column) - children.push_back(params.column); - if (params.partition) - children.push_back(params.partition); - if (params.primary_key) - children.push_back(params.primary_key); - if (params.predicate) - children.push_back(params.predicate); + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + if (type == ASTAlterCommand::ADD_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : ""); + col_decl->formatImpl(settings, state, frame); + + /// AFTER + if (column) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::DROP_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str + << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + if (partition) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " IN PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + } + else if (type == ASTAlterCommand::MODIFY_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : ""); + col_decl->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::MODIFY_PRIMARY_KEY) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY PRIMARY KEY " << (settings.hilite ? hilite_none : ""); + settings.ostr << "("; + primary_key->formatImpl(settings, state, frame); + settings.ostr << ")"; + } + else if (type == ASTAlterCommand::DROP_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << " PARTITION " + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::ATTACH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " + << (part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::REPLACE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (replace ? "REPLACE" : "ATTACH") << " PARTITION " + << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); + if (!from_database.empty()) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_database) + << (settings.hilite ? hilite_none : "") << "."; + } + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(from_table) << (settings.hilite ? hilite_none : ""); + } + else if (type == ASTAlterCommand::FETCH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " + << "PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " FROM " << (settings.hilite ? hilite_none : "") << std::quoted(from, '\''); + } + else if (type == ASTAlterCommand::FREEZE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : ""); + partition->formatImpl(settings, state, frame); + + if (!with_name.empty()) + { + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") + << " " << std::quoted(with_name, '\''); + } + } + else if (type == ASTAlterCommand::DELETE) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE WHERE " << (settings.hilite ? hilite_none : ""); + predicate->formatImpl(settings, state, frame); + } + else + throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } + +ASTPtr ASTAlterCommandList::clone() const +{ + auto res = std::make_shared(); + for (ASTAlterCommand * command : commands) + res->add(command->clone()); + return res; +} + +void ASTAlterCommandList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + for (size_t i = 0; i < commands.size(); ++i) + { + static_cast(commands[i])->formatImpl(settings, state, frame); + + std::string comma = (i < (commands.size() - 1)) ? "," : ""; + settings.ostr << (settings.hilite ? hilite_keyword : "") << comma << (settings.hilite ? hilite_none : ""); + + settings.ostr << settings.nl_or_ws; + } +} + + /** Get the text that identifies this element. */ String ASTAlterQuery::getID() const { - return ("AlterQuery_" + database + "_" + table); + return "AlterQuery_" + database + "_" + table; } ASTPtr ASTAlterQuery::clone() const { auto res = std::make_shared(*this); - for (ParameterContainer::size_type i = 0; i < parameters.size(); ++i) - parameters[i].clone(res->parameters[i]); - cloneOutputOptions(*res); + res->children.clear(); + + if (command_list) + res->set(res->command_list, command_list->clone()); + return res; } @@ -84,102 +211,10 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState formatOnCluster(settings); settings.ostr << settings.nl_or_ws; - for (size_t i = 0; i < parameters.size(); ++i) - { - const ASTAlterQuery::Parameters & p = parameters[i]; - - if (p.type == ASTAlterQuery::ADD_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : ""); - p.col_decl->formatImpl(settings, state, frame); - - /// AFTER - if (p.column) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); - p.column->formatImpl(settings, state, frame); - } - } - else if (p.type == ASTAlterQuery::DROP_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << (p.clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (settings.hilite ? hilite_none : ""); - p.column->formatImpl(settings, state, frame); - if (p.partition) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " IN PARTITION " << (settings.hilite ? hilite_none : ""); - p.partition->formatImpl(settings, state, frame); - } - } - else if (p.type == ASTAlterQuery::MODIFY_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : ""); - p.col_decl->formatImpl(settings, state, frame); - } - else if (p.type == ASTAlterQuery::MODIFY_PRIMARY_KEY) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY PRIMARY KEY " << (settings.hilite ? hilite_none : ""); - settings.ostr << "("; - p.primary_key->formatImpl(settings, state, frame); - settings.ostr << ")"; - } - else if (p.type == ASTAlterQuery::DROP_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (p.detach ? "DETACH" : "DROP") << " PARTITION " - << (settings.hilite ? hilite_none : ""); - p.partition->formatImpl(settings, state, frame); - } - else if (p.type == ASTAlterQuery::ATTACH_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " - << (p.part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); - p.partition->formatImpl(settings, state, frame); - } - else if (p.type == ASTAlterQuery::REPLACE_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (p.replace ? "REPLACE" : "ATTACH") << " PARTITION " - << (settings.hilite ? hilite_none : ""); - p.partition->formatImpl(settings, state, frame); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); - if (!p.from_database.empty()) - { - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(p.from_database) - << (settings.hilite ? hilite_none : "") << "."; - } - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(p.from_table) << (settings.hilite ? hilite_none : ""); - } - else if (p.type == ASTAlterQuery::FETCH_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " - << "PARTITION " << (settings.hilite ? hilite_none : ""); - p.partition->formatImpl(settings, state, frame); - settings.ostr << (settings.hilite ? hilite_keyword : "") - << " FROM " << (settings.hilite ? hilite_none : "") << std::quoted(p.from, '\''); - } - else if (p.type == ASTAlterQuery::FREEZE_PARTITION) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : ""); - p.partition->formatImpl(settings, state, frame); - - if (!p.with_name.empty()) - { - settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << "WITH NAME" << (settings.hilite ? hilite_none : "") - << " " << std::quoted(p.with_name, '\''); - } - } - else if (p.type == ASTAlterQuery::DELETE) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE WHERE " << (settings.hilite ? hilite_none : ""); - p.predicate->formatImpl(settings, state, frame); - } - else - throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - - std::string comma = (i < (parameters.size() -1) ) ? "," : ""; - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << comma << (settings.hilite ? hilite_none : ""); - - settings.ostr << settings.nl_or_ws; - } + FormatStateStacked frame_nested = frame; + frame_nested.need_parens = false; + ++frame_nested.indent; + static_cast(command_list)->formatImpl(settings, state, frame_nested); } } diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index cdde19d061a..a97503305f5 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -16,10 +16,10 @@ namespace DB * DROP PARTITION partition, */ -class ASTAlterQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster +class ASTAlterCommand : public IAST { public: - enum ParameterType + enum Type { ADD_COLUMN, DROP_COLUMN, @@ -37,70 +37,88 @@ public: NO_TYPE, }; - struct Parameters + Type type = NO_TYPE; + + /** The ADD COLUMN query stores the name and type of the column to add + * This field is not used in the DROP query + * In MODIFY query, the column name and the new type are stored here + */ + ASTPtr col_decl; + + /** The ADD COLUMN query here optionally stores the name of the column following AFTER + * The DROP query stores the column name for deletion here + */ + ASTPtr column; + + /** For MODIFY PRIMARY KEY + */ + ASTPtr primary_key; + + /** Used in DROP PARTITION, RESHARD PARTITION and ATTACH PARTITION FROM queries. + * The value or ID of the partition is stored here. + */ + ASTPtr partition; + + /// For DELETE WHERE: the predicate that filters the rows to delete. + ASTPtr predicate; + + bool detach = false; /// true for DETACH PARTITION + + bool part = false; /// true for ATTACH PART + + bool do_copy = false; /// for RESHARD PARTITION + + bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) + + /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. + */ + String from; + + /** For FREEZE PARTITION - place local backup to directory with specified name. + */ + String with_name; + + /// REPLACE(ATTACH) PARTITION partition FROM db.table + String from_database; + String from_table; + /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table + bool replace = true; + + String getID() const override { return "AlterCommand_" + std::to_string(static_cast(type)); } + + ASTPtr clone() const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +class ASTAlterCommandList : public IAST +{ +public: + std::vector commands; + + void add(const ASTPtr & command) { - Parameters(); + commands.push_back(static_cast(command.get())); + children.push_back(command); + } - int type = NO_TYPE; + String getID() const override { return "AlterCommandList"; } - /** The ADD COLUMN query stores the name and type of the column to add - * This field is not used in the DROP query - * In MODIFY query, the column name and the new type are stored here - */ - ASTPtr col_decl; + ASTPtr clone() const override; - /** The ADD COLUMN query here optionally stores the name of the column following AFTER - * The DROP query stores the column name for deletion here - */ - ASTPtr column; +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; - /** For MODIFY PRIMARY KEY - */ - ASTPtr primary_key; - - /** Used in DROP PARTITION, RESHARD PARTITION and ATTACH PARTITION FROM queries. - * The value or ID of the partition is stored here. - */ - ASTPtr partition; - - /// For DELETE WHERE: the predicate that filters the rows to delete. - ASTPtr predicate; - - bool detach = false; /// true for DETACH PARTITION - - bool part = false; /// true for ATTACH PART - - bool do_copy = false; /// for RESHARD PARTITION - - bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) - - /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. - */ - String from; - - /** For FREEZE PARTITION - place local backup to directory with specified name. - */ - String with_name; - - /// REPLACE(ATTACH) PARTITION partition FROM db.table - String from_database; - String from_table; - /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table - bool replace = true; - - /// deep copy - void clone(Parameters & p) const; - }; - - using ParameterContainer = std::vector; - ParameterContainer parameters; +class ASTAlterQuery : public ASTQueryWithOutput, public ASTQueryWithOnCluster +{ +public: String database; String table; + ASTAlterCommandList * command_list = nullptr; - void addParameters(const Parameters & params); - - /** Get the text that identifies this element. */ String getID() const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 6715ada2ece..c592cb66d7d 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -12,9 +12,11 @@ namespace DB { -bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKeyword s_alter_table("ALTER TABLE"); + auto command = std::make_shared(); + node = command; + ParserKeyword s_add_column("ADD COLUMN"); ParserKeyword s_drop_column("DROP COLUMN"); ParserKeyword s_clear_column("CLEAR COLUMN"); @@ -37,207 +39,233 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_delete_where("DELETE WHERE"); - ParserToken s_dot(TokenType::Dot); - ParserToken s_comma(TokenType::Comma); - - ParserIdentifier table_parser; ParserCompoundIdentifier parser_name; + ParserStringLiteral parser_string_literal; ParserCompoundColumnDeclaration parser_col_decl; ParserPartition parser_partition; - ParserStringLiteral parser_string_literal; ParserExpression exp_elem; - String cluster_str; - ASTPtr col_type; - ASTPtr col_after; - ASTPtr col_drop; + if (s_add_column.ignore(pos, expected)) + { + if (!parser_col_decl.parse(pos, command->col_decl, expected)) + return false; + if (s_after.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + } + + command->type = ASTAlterCommand::ADD_COLUMN; + } + else if (s_drop_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PARTITION; + } + else if (s_drop_column.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + + command->type = ASTAlterCommand::DROP_COLUMN; + command->detach = false; + } + else if (s_clear_column.ignore(pos, expected)) + { + if (!parser_name.parse(pos, command->column, expected)) + return false; + + command->type = ASTAlterCommand::DROP_COLUMN; + command->clear_column = true; + command->detach = false; + + if (s_in_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + } + } + else if (s_detach_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + command->type = ASTAlterCommand::DROP_PARTITION; + command->detach = true; + } + else if (s_attach_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (s_from.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) + return false; + + command->replace = false; + command->type = ASTAlterCommand::REPLACE_PARTITION; + } + else + { + command->type = ASTAlterCommand::ATTACH_PARTITION; + } + } + else if (ParserKeyword{"REPLACE PARTITION"}.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + if (!parseDatabaseAndTableName(pos, expected, command->from_database, command->from_table)) + return false; + + command->replace = true; + command->type = ASTAlterCommand::REPLACE_PARTITION; + } + else if (s_attach_part.ignore(pos, expected)) + { + if (!parser_string_literal.parse(pos, command->partition, expected)) + return false; + + command->part = true; + command->type = ASTAlterCommand::ATTACH_PARTITION; + } + else if (s_fetch_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + ASTPtr ast_from; + if (!parser_string_literal.parse(pos, ast_from, expected)) + return false; + + command->from = typeid_cast(*ast_from).value.get(); + command->type = ASTAlterCommand::FETCH_PARTITION; + } + else if (s_freeze_partition.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, command->partition, expected)) + return false; + + /// WITH NAME 'name' - place local backup to directory with specified name + if (s_with.ignore(pos, expected)) + { + if (!s_name.ignore(pos, expected)) + return false; + + ASTPtr ast_with_name; + if (!parser_string_literal.parse(pos, ast_with_name, expected)) + return false; + + command->with_name = typeid_cast(*ast_with_name).value.get(); + } + + command->type = ASTAlterCommand::FREEZE_PARTITION; + } + else if (s_modify_column.ignore(pos, expected)) + { + if (!parser_col_decl.parse(pos, command->col_decl, expected)) + return false; + + command->type = ASTAlterCommand::MODIFY_COLUMN; + } + else if (s_modify_primary_key.ignore(pos, expected)) + { + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (!ParserNotEmptyExpressionList(false).parse(pos, command->primary_key, expected)) + return false; + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + command->type = ASTAlterCommand::MODIFY_PRIMARY_KEY; + } + else if (s_delete_where.ignore(pos, expected)) + { + if (!exp_elem.parse(pos, command->predicate, expected)) + return false; + + command->type = ASTAlterCommand::DELETE; + } + else + return false; + + if (command->col_decl) + command->children.push_back(command->col_decl); + if (command->column) + command->children.push_back(command->column); + if (command->primary_key) + command->children.push_back(command->primary_key); + if (command->partition) + command->children.push_back(command->partition); + if (command->predicate) + command->children.push_back(command->predicate); + + return true; +} + + +bool ParserAlterCommandList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto command_list = std::make_shared(); + node = command_list; + + ParserToken s_comma(TokenType::Comma); + ParserAlterCommand p_command; + + do + { + ASTPtr command; + if (!p_command.parse(pos, command, expected)) + return false; + + command_list->add(command); + } + while (s_comma.ignore(pos, expected)); + + return true; +} + + +bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ auto query = std::make_shared(); + node = query; + ParserKeyword s_alter_table("ALTER TABLE"); if (!s_alter_table.ignore(pos, expected)) return false; if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) return false; + String cluster_str; if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; } - - bool parsing_finished = false; - do - { - ASTAlterQuery::Parameters params; - - if (s_add_column.ignore(pos, expected)) - { - if (!parser_col_decl.parse(pos, params.col_decl, expected)) - return false; - - if (s_after.ignore(pos, expected)) - { - if (!parser_name.parse(pos, params.column, expected)) - return false; - } - - params.type = ASTAlterQuery::ADD_COLUMN; - } - else if (s_drop_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - - params.type = ASTAlterQuery::DROP_PARTITION; - } - else if (s_drop_column.ignore(pos, expected)) - { - if (!parser_name.parse(pos, params.column, expected)) - return false; - - params.type = ASTAlterQuery::DROP_COLUMN; - params.detach = false; - } - else if (s_clear_column.ignore(pos, expected)) - { - if (!parser_name.parse(pos, params.column, expected)) - return false; - - params.type = ASTAlterQuery::DROP_COLUMN; - params.clear_column = true; - params.detach = false; - - if (s_in_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - } - } - else if (s_detach_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - - params.type = ASTAlterQuery::DROP_PARTITION; - params.detach = true; - } - else if (s_attach_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - - if (s_from.ignore(pos)) - { - if (!parseDatabaseAndTableName(pos, expected, params.from_database, params.from_table)) - return false; - - params.replace = false; - params.type = ASTAlterQuery::REPLACE_PARTITION; - } - else - { - params.type = ASTAlterQuery::ATTACH_PARTITION; - } - } - else if (ParserKeyword{"REPLACE PARTITION"}.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - if (!parseDatabaseAndTableName(pos, expected, params.from_database, params.from_table)) - return false; - - params.replace = true; - params.type = ASTAlterQuery::REPLACE_PARTITION; - } - else if (s_attach_part.ignore(pos, expected)) - { - if (!parser_string_literal.parse(pos, params.partition, expected)) - return false; - - params.part = true; - params.type = ASTAlterQuery::ATTACH_PARTITION; - } - else if (s_fetch_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - - if (!s_from.ignore(pos, expected)) - return false; - - ASTPtr ast_from; - if (!parser_string_literal.parse(pos, ast_from, expected)) - return false; - - params.from = typeid_cast(*ast_from).value.get(); - params.type = ASTAlterQuery::FETCH_PARTITION; - } - else if (s_freeze_partition.ignore(pos, expected)) - { - if (!parser_partition.parse(pos, params.partition, expected)) - return false; - - /// WITH NAME 'name' - place local backup to directory with specified name - if (s_with.ignore(pos, expected)) - { - if (!s_name.ignore(pos, expected)) - return false; - - ASTPtr ast_with_name; - if (!parser_string_literal.parse(pos, ast_with_name, expected)) - return false; - - params.with_name = typeid_cast(*ast_with_name).value.get(); - } - - params.type = ASTAlterQuery::FREEZE_PARTITION; - } - else if (s_modify_column.ignore(pos, expected)) - { - if (!parser_col_decl.parse(pos, params.col_decl, expected)) - return false; - - params.type = ASTAlterQuery::MODIFY_COLUMN; - } - else if (s_modify_primary_key.ignore(pos, expected)) - { - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - if (!ParserNotEmptyExpressionList(false).parse(pos, params.primary_key, expected)) - return false; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - params.type = ASTAlterQuery::MODIFY_PRIMARY_KEY; - } - else if (s_delete_where.ignore(pos, expected)) - { - if (!exp_elem.parse(pos, params.predicate, expected)) - return false; - - params.type = ASTAlterQuery::DELETE; - } - else - return false; - - if (!s_comma.ignore(pos, expected)) - parsing_finished = true; - - query->addParameters(params); - } - while (!parsing_finished); - query->cluster = cluster_str; - node = query; + + ParserAlterCommandList p_command_list; + ASTPtr command_list; + if (!p_command_list.parse(pos, command_list, expected)) + return false; + + query->set(query->command_list, command_list); return true; } diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 03c23c6f47f..c758e0304b4 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -18,6 +18,23 @@ namespace DB * [FREEZE PARTITION] * [DELETE WHERE ...] */ + +class ParserAlterCommand : public IParserBase +{ +protected: + const char * getName() const { return "ALTER command"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + + +class ParserAlterCommandList : public IParserBase +{ +protected: + const char * getName() const { return "a list of ALTER commands"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + + class ParserAlterQuery : public IParserBase { protected: From c5c601f6622e29e865f802dc172ccfd2067de906 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 13 Jun 2018 16:49:27 +0300 Subject: [PATCH 033/151] save serialized commands in ReplicatedMergeTreeMutationEntry; split InterpreterAlterQuery::parseAlter() [#CLICKHOUSE-3747] --- .../ApplyingMutationsBlockInputStream.cpp | 4 +- .../Interpreters/InterpreterAlterQuery.cpp | 165 ++---------------- dbms/src/Interpreters/InterpreterAlterQuery.h | 103 +---------- dbms/src/Storages/AlterCommands.cpp | 89 +++++++++- dbms/src/Storages/AlterCommands.h | 16 +- .../ReplicatedMergeTreeMutationEntry.cpp | 30 +++- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 6 +- dbms/src/Storages/MutationCommands.cpp | 91 ++-------- dbms/src/Storages/MutationCommands.h | 27 +-- dbms/src/Storages/PartitionCommands.cpp | 94 ++++++++++ dbms/src/Storages/PartitionCommands.h | 59 +++++++ .../Storages/StorageReplicatedMergeTree.cpp | 2 +- 12 files changed, 319 insertions(+), 367 deletions(-) create mode 100644 dbms/src/Storages/PartitionCommands.cpp create mode 100644 dbms/src/Storages/PartitionCommands.h diff --git a/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp b/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp index 51f155e0bfc..bfcfcb85418 100644 --- a/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp +++ b/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp @@ -2,6 +2,8 @@ #include #include #include +#include + namespace DB { @@ -32,7 +34,7 @@ ApplyingMutationsBlockInputStream::ApplyingMutationsBlockInputStream( break; } default: - throw Exception("Unsupported mutation cmd type: " + toString(static_cast(cmd.type)), + throw Exception("Unsupported mutation cmd type: " + toString(cmd.type), ErrorCodes::LOGICAL_ERROR); } } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 785edd2e229..c58d358dd63 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,21 +1,7 @@ #include #include #include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include +#include #include @@ -26,8 +12,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; } @@ -51,9 +35,19 @@ BlockIO InterpreterAlterQuery::execute() AlterCommands alter_commands; PartitionCommands partition_commands; MutationCommands mutation_commands; - parseAlter(alter.command_list->commands, alter_commands, partition_commands, mutation_commands); + for (ASTAlterCommand * command_ast : alter.command_list->commands) + { + if (auto alter_command = AlterCommand::parse(command_ast)) + alter_commands.emplace_back(std::move(*alter_command)); + else if (auto partition_command = PartitionCommand::parse(command_ast)) + partition_commands.emplace_back(std::move(*partition_command)); + else if (auto mut_command = MutationCommand::parse(command_ast)) + mutation_commands.emplace_back(std::move(*mut_command)); + else + throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); + } - if (!mutation_commands.commands.empty()) + if (!mutation_commands.empty()) { mutation_commands.validate(*table, context); table->mutate(mutation_commands, context); @@ -103,137 +97,4 @@ BlockIO InterpreterAlterQuery::execute() return {}; } -void InterpreterAlterQuery::parseAlter( - const std::vector & command_asts, - AlterCommands & out_alter_commands, - PartitionCommands & out_partition_commands, - MutationCommands & out_mutation_commands) -{ - const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - - for (const auto & command_ast : command_asts) - { - if (command_ast->type == ASTAlterCommand::ADD_COLUMN) - { - AlterCommand command; - command.type = AlterCommand::ADD_COLUMN; - - const auto & ast_col_decl = typeid_cast(*command_ast->col_decl); - - command.column_name = ast_col_decl.name; - if (ast_col_decl.type) - { - command.data_type = data_type_factory.get(ast_col_decl.type); - } - if (ast_col_decl.default_expression) - { - command.default_kind = columnDefaultKindFromString(ast_col_decl.default_specifier); - command.default_expression = ast_col_decl.default_expression; - } - - if (command_ast->column) - command.after_column = typeid_cast(*command_ast->column).name; - - out_alter_commands.emplace_back(std::move(command)); - } - else if (command_ast->type == ASTAlterCommand::DROP_COLUMN) - { - if (command_ast->partition) - { - if (!command_ast->clear_column) - throw Exception("Can't DROP COLUMN from partition. It is possible only CLEAR COLUMN in partition", ErrorCodes::BAD_ARGUMENTS); - - const Field & column_name = typeid_cast(*(command_ast->column)).name; - - out_partition_commands.emplace_back(PartitionCommand::clearColumn(command_ast->partition, column_name)); - } - else - { - if (command_ast->clear_column) - throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); - - AlterCommand command; - command.type = AlterCommand::DROP_COLUMN; - command.column_name = typeid_cast(*(command_ast->column)).name; - - out_alter_commands.emplace_back(std::move(command)); - } - } - else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) - { - AlterCommand command; - command.type = AlterCommand::MODIFY_COLUMN; - - const auto & ast_col_decl = typeid_cast(*command_ast->col_decl); - - command.column_name = ast_col_decl.name; - if (ast_col_decl.type) - { - command.data_type = data_type_factory.get(ast_col_decl.type); - } - - if (ast_col_decl.default_expression) - { - command.default_kind = columnDefaultKindFromString(ast_col_decl.default_specifier); - command.default_expression = ast_col_decl.default_expression; - } - - out_alter_commands.emplace_back(std::move(command)); - } - else if (command_ast->type == ASTAlterCommand::MODIFY_PRIMARY_KEY) - { - AlterCommand command; - command.type = AlterCommand::MODIFY_PRIMARY_KEY; - command.primary_key = command_ast->primary_key; - out_alter_commands.emplace_back(std::move(command)); - } - else if (command_ast->type == ASTAlterCommand::DROP_PARTITION) - { - out_partition_commands.emplace_back(PartitionCommand::dropPartition(command_ast->partition, command_ast->detach)); - } - else if (command_ast->type == ASTAlterCommand::ATTACH_PARTITION) - { - out_partition_commands.emplace_back(PartitionCommand::attachPartition(command_ast->partition, command_ast->part)); - } - else if (command_ast->type == ASTAlterCommand::REPLACE_PARTITION) - { - out_partition_commands.emplace_back( - PartitionCommand::replacePartition(command_ast->partition, command_ast->replace, command_ast->from_database, command_ast->from_table)); - } - else if (command_ast->type == ASTAlterCommand::FETCH_PARTITION) - { - out_partition_commands.emplace_back(PartitionCommand::fetchPartition(command_ast->partition, command_ast->from)); - } - else if (command_ast->type == ASTAlterCommand::FREEZE_PARTITION) - { - out_partition_commands.emplace_back(PartitionCommand::freezePartition(command_ast->partition, command_ast->with_name)); - } - else if (command_ast->type == ASTAlterCommand::DELETE) - { - out_mutation_commands.commands.emplace_back(MutationCommand::delete_(command_ast->predicate)); - } - else - throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); - } -} - - -void InterpreterAlterQuery::PartitionCommands::validate(const IStorage & table) -{ - for (const PartitionCommand & command : *this) - { - if (command.type == PartitionCommand::CLEAR_COLUMN) - { - String column_name = command.column_name.safeGet(); - - if (!table.getColumns().hasPhysical(column_name)) - { - throw Exception("Wrong column name. Cannot find column " + column_name + " to clear it from partition", - DB::ErrorCodes::ILLEGAL_COLUMN); - } - } - } -} - - } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.h b/dbms/src/Interpreters/InterpreterAlterQuery.h index 31b89f74864..3e4453608ca 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.h +++ b/dbms/src/Interpreters/InterpreterAlterQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -22,111 +23,9 @@ public: BlockIO execute() override; private: - struct PartitionCommand - { - enum Type - { - DROP_PARTITION, - ATTACH_PARTITION, - REPLACE_PARTITION, - FETCH_PARTITION, - FREEZE_PARTITION, - CLEAR_COLUMN, - }; - - Type type; - - ASTPtr partition; - Field column_name; - - /// true for DETACH PARTITION. - bool detach = false; - - /// true for ATTACH PART (and false for PARTITION) - bool part = false; - - /// For ATTACH PARTITION partition FROM db.table - String from_database; - String from_table; - bool replace = true; - - /// For FETCH PARTITION - path in ZK to the shard, from which to download the partition. - String from_zookeeper_path; - - /// For FREEZE PARTITION - String with_name; - - static PartitionCommand dropPartition(const ASTPtr & partition, bool detach) - { - PartitionCommand res; - res.type = DROP_PARTITION; - res.partition = partition; - res.detach = detach; - return res; - } - - static PartitionCommand clearColumn(const ASTPtr & partition, const Field & column_name) - { - PartitionCommand res; - res.type = CLEAR_COLUMN; - res.partition = partition; - res.column_name = column_name; - return res; - } - - static PartitionCommand attachPartition(const ASTPtr & partition, bool part) - { - PartitionCommand res; - res.type = ATTACH_PARTITION; - res.partition = partition; - res.part = part; - return res; - } - - static PartitionCommand replacePartition(const ASTPtr & partition, bool replace, const String & from_database, const String & from_table) - { - PartitionCommand res; - res.type = REPLACE_PARTITION; - res.partition = partition; - res.replace = replace; - res.from_database = from_database; - res.from_table = from_table; - return res; - } - - static PartitionCommand fetchPartition(const ASTPtr & partition, const String & from) - { - PartitionCommand res; - res.type = FETCH_PARTITION; - res.partition = partition; - res.from_zookeeper_path = from; - return res; - } - - static PartitionCommand freezePartition(const ASTPtr & partition, const String & with_name) - { - PartitionCommand res; - res.type = FREEZE_PARTITION; - res.partition = partition; - res.with_name = with_name; - return res; - } - }; - - class PartitionCommands : public std::vector - { - public: - void validate(const IStorage & table); - }; - ASTPtr query_ptr; const Context & context; - - static void parseAlter(const std::vector & commands, - AlterCommands & out_alter_commands, - PartitionCommands & out_partition_commands, - MutationCommands & out_mutation_commands); }; } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index cea0a6b68eb..9e6d525f685 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -9,6 +10,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -21,6 +25,83 @@ namespace ErrorCodes } +std::optional AlterCommand::parse(const ASTAlterCommand * command_ast) +{ + const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); + + if (command_ast->type == ASTAlterCommand::ADD_COLUMN) + { + AlterCommand command; + command.type = AlterCommand::ADD_COLUMN; + + const auto & ast_col_decl = typeid_cast(*command_ast->col_decl); + + command.column_name = ast_col_decl.name; + if (ast_col_decl.type) + { + command.data_type = data_type_factory.get(ast_col_decl.type); + } + if (ast_col_decl.default_expression) + { + command.default_kind = columnDefaultKindFromString(ast_col_decl.default_specifier); + command.default_expression = ast_col_decl.default_expression; + } + + if (command_ast->column) + command.after_column = typeid_cast(*command_ast->column).name; + + return command; + } + else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && !command_ast->partition) + { + if (command_ast->clear_column) + throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED); + + AlterCommand command; + command.type = AlterCommand::DROP_COLUMN; + command.column_name = typeid_cast(*(command_ast->column)).name; + return command; + } + else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) + { + AlterCommand command; + command.type = AlterCommand::MODIFY_COLUMN; + + const auto & ast_col_decl = typeid_cast(*command_ast->col_decl); + + command.column_name = ast_col_decl.name; + if (ast_col_decl.type) + { + command.data_type = data_type_factory.get(ast_col_decl.type); + } + + if (ast_col_decl.default_expression) + { + command.default_kind = columnDefaultKindFromString(ast_col_decl.default_specifier); + command.default_expression = ast_col_decl.default_expression; + } + + return command; + } + else if (command_ast->type == ASTAlterCommand::MODIFY_PRIMARY_KEY) + { + AlterCommand command; + command.type = AlterCommand::MODIFY_PRIMARY_KEY; + command.primary_key = command_ast->primary_key; + return command; + } + else + return {}; +} + + +/// the names are the same if they match the whole name or name_without_dot matches the part of the name up to the dot +static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePair & name_type) +{ + String name_with_dot = name_without_dot + "."; + return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); +} + void AlterCommand::apply(ColumnsDescription & columns_description) const { if (type == ADD_COLUMN) @@ -187,7 +268,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) { const auto & column_name = command.column_name; const auto column_it = std::find_if(std::begin(all_columns), std::end(all_columns), - std::bind(AlterCommand::namesEqual, std::cref(command.column_name), std::placeholders::_1)); + std::bind(namesEqual, std::cref(command.column_name), std::placeholders::_1)); if (command.type == AlterCommand::ADD_COLUMN) { @@ -251,7 +332,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) auto found = false; for (auto it = std::begin(all_columns); it != std::end(all_columns);) { - if (AlterCommand::namesEqual(command.column_name, *it)) + if (namesEqual(command.column_name, *it)) { found = true; it = all_columns.erase(it); @@ -262,7 +343,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) for (auto it = std::begin(defaults); it != std::end(defaults);) { - if (AlterCommand::namesEqual(command.column_name, { it->first, nullptr })) + if (namesEqual(command.column_name, { it->first, nullptr })) it = defaults.erase(it); else ++it; @@ -280,7 +361,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) { const auto & column_name = col_def.first; const auto column_it = std::find_if(all_columns.begin(), all_columns.end(), [&] (const NameAndTypePair & name_type) - { return AlterCommand::namesEqual(column_name, name_type); }); + { return namesEqual(column_name, name_type); }); const auto tmp_column_name = column_name + "_tmp"; const auto & column_type_ptr = column_it->type; diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index 796f48eea1a..fca8a68f70d 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -6,6 +6,8 @@ namespace DB { +class ASTAlterCommand; + /// Operation from the ALTER query (except for manipulation with PART/PARTITION). Adding Nested columns is not expanded to add individual columns. struct AlterCommand { @@ -36,15 +38,6 @@ struct AlterCommand /// For MODIFY_PRIMARY_KEY ASTPtr primary_key; - /// the names are the same if they match the whole name or name_without_dot matches the part of the name up to the dot - static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePair & name_type) - { - String name_with_dot = name_without_dot + "."; - return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name); - } - - void apply(ColumnsDescription & columns_description) const; - AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, @@ -52,6 +45,11 @@ struct AlterCommand : type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind}, default_expression{default_expression}, after_column{after_column} {} + + static std::optional parse(const ASTAlterCommand * command); + + void apply(ColumnsDescription & columns_description) const; + }; class IStorage; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index a0bc59a0e69..739a9304b23 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -1,12 +1,21 @@ #include +#include +#include +#include #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_MUTATION_COMMAND; +} + void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const { out << "format version: 1\n" @@ -21,8 +30,9 @@ void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const out << partition_id << "\t" << number << "\n"; } - out << "mutation commands:\n"; - commands.writeText(out); + std::stringstream commands_ss; + formatAST(*commands.ast(), commands_ss, /* hilite = */ false, /* one_line = */ true); + out << "commands: " << escape << commands_ss.str(); } void ReplicatedMergeTreeMutationEntry::readText(ReadBuffer & in) @@ -45,8 +55,20 @@ void ReplicatedMergeTreeMutationEntry::readText(ReadBuffer & in) block_numbers[partition_id] = number; } - in >> "mutation commands:\n"; - commands.readText(in); + String commands_str; + in >> "commands: " >> escape >> commands_str; + + ParserAlterCommandList p_alter_commands; + auto commands_ast = parseQuery( + p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0); + for (ASTAlterCommand * command_ast : typeid_cast(*commands_ast).commands) + { + auto command = MutationCommand::parse(command_ast); + if (!command) + throw Exception("Unknown mutation command type: " + DB::toString(command_ast->type), ErrorCodes::UNKNOWN_MUTATION_COMMAND); + commands.push_back(std::move(*command)); + } + } String ReplicatedMergeTreeMutationEntry::toString() const diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index b2dccae82bc..06a01933e9d 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -951,11 +951,11 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( else ++end; - std::vector commands; + MutationCommands commands; for (auto it = begin; it != end; ++it) - commands.insert(commands.end(), it->second->commands.commands.begin(), it->second->commands.commands.end()); + commands.insert(commands.end(), it->second->commands.begin(), it->second->commands.end()); - return MutationCommands{commands}; + return commands; } void ReplicatedMergeTreeQueue::disableMergesInRange(const String & part_name) diff --git a/dbms/src/Storages/MutationCommands.cpp b/dbms/src/Storages/MutationCommands.cpp index 0119be9132c..ff36f6ed27e 100644 --- a/dbms/src/Storages/MutationCommands.cpp +++ b/dbms/src/Storages/MutationCommands.cpp @@ -11,64 +11,34 @@ namespace DB { -namespace ErrorCodes +std::optional MutationCommand::parse(ASTAlterCommand * command) { - extern const int UNKNOWN_MUTATION_COMMAND; -} - -static String typeToString(MutationCommand::Type type) -{ - switch (type) + if (command->type == ASTAlterCommand::DELETE) { - case MutationCommand::DELETE: return "DELETE"; - default: - throw Exception("Bad mutation type: " + toString(type), ErrorCodes::LOGICAL_ERROR); - } -} - -void MutationCommand::writeText(WriteBuffer & out) const -{ - out << typeToString(type) << "\n"; - - switch (type) - { - case MutationCommand::DELETE: - { - std::stringstream ss; - formatAST(*predicate, ss, /* hilite = */ false, /* one_line = */ true); - out << "predicate: " << escape << ss.str() << "\n"; - break; - } - default: - throw Exception("Bad mutation type: " + toString(type), ErrorCodes::LOGICAL_ERROR); - } -} - -void MutationCommand::readText(ReadBuffer & in) -{ - String type_str; - in >> type_str >> "\n"; - - if (type_str == "DELETE") - { - type = DELETE; - - String predicate_str; - in >> "predicate: " >> escape >> predicate_str >> "\n"; - ParserExpressionWithOptionalAlias p_expr(false); - predicate = parseQuery( - p_expr, predicate_str.data(), predicate_str.data() + predicate_str.length(), "mutation predicate", 0); + MutationCommand res; + res.ast = command->ptr(); + res.type = DELETE; + res.predicate = command->predicate; + return res; } else - throw Exception("Unknown mutation command: `" + type_str + "'", ErrorCodes::UNKNOWN_MUTATION_COMMAND); + return {}; } -void MutationCommands::validate(const IStorage & table, const Context & context) +std::shared_ptr MutationCommands::ast() const +{ + auto res = std::make_shared(); + for (const MutationCommand & command : *this) + res->add(command.ast->clone()); + return res; +} + +void MutationCommands::validate(const IStorage & table, const Context & context) const { auto all_columns = table.getColumns().getAll(); - for (const MutationCommand & command : commands) + for (const MutationCommand & command : *this) { switch (command.type) { @@ -86,29 +56,4 @@ void MutationCommands::validate(const IStorage & table, const Context & context) } } -void MutationCommands::writeText(WriteBuffer & out) const -{ - out << "format version: 1\n" - << "count: " << commands.size() << "\n"; - for (const MutationCommand & command : commands) - { - command.writeText(out); - } -} - -void MutationCommands::readText(ReadBuffer & in) -{ - in >> "format version: 1\n"; - - size_t count; - in >> "count: " >> count >> "\n"; - - for (size_t i = 0; i < count; ++i) - { - MutationCommand command; - command.readText(in); - commands.push_back(std::move(command)); - } -} - } diff --git a/dbms/src/Storages/MutationCommands.h b/dbms/src/Storages/MutationCommands.h index a7a2c24ef8f..0a382d2f80c 100644 --- a/dbms/src/Storages/MutationCommands.h +++ b/dbms/src/Storages/MutationCommands.h @@ -1,7 +1,7 @@ #pragma once -#include -#include +#include +#include namespace DB @@ -12,6 +12,8 @@ class Context; struct MutationCommand { + ASTPtr ast; /// The AST of the whole command + enum Type { EMPTY, /// Not used. @@ -22,26 +24,15 @@ struct MutationCommand ASTPtr predicate; - static MutationCommand delete_(const ASTPtr & predicate) - { - MutationCommand res; - res.type = DELETE; - res.predicate = predicate; - return res; - } - - void writeText(WriteBuffer & out) const; - void readText(ReadBuffer & in); + static std::optional parse(ASTAlterCommand * command); }; -struct MutationCommands +class MutationCommands : public std::vector { - std::vector commands; +public: + std::shared_ptr ast() const; - void validate(const IStorage & table, const Context & context); - - void writeText(WriteBuffer & out) const; - void readText(ReadBuffer & in); + void validate(const IStorage & table, const Context & context) const; }; } diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp new file mode 100644 index 00000000000..e7daabb246c --- /dev/null +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; +} + +std::optional PartitionCommand::parse(const ASTAlterCommand * command_ast) +{ + if (command_ast->type == ASTAlterCommand::DROP_PARTITION) + { + PartitionCommand res; + res.type = DROP_PARTITION; + res.partition = command_ast->partition; + res.detach = command_ast->detach; + return res; + } + else if (command_ast->type == ASTAlterCommand::ATTACH_PARTITION) + { + PartitionCommand res; + res.type = ATTACH_PARTITION; + res.partition = command_ast->partition; + res.part = command_ast->part; + return res; + } + else if (command_ast->type == ASTAlterCommand::REPLACE_PARTITION) + { + PartitionCommand res; + res.type = REPLACE_PARTITION; + res.partition = command_ast->partition; + res.replace = command_ast->replace; + res.from_database = command_ast->from_database; + res.from_table = command_ast->from_table; + return res; + } + else if (command_ast->type == ASTAlterCommand::FETCH_PARTITION) + { + PartitionCommand res; + res.type = FETCH_PARTITION; + res.partition = command_ast->partition; + res.from_zookeeper_path = command_ast->from; + return res; + } + else if (command_ast->type == ASTAlterCommand::FREEZE_PARTITION) + { + PartitionCommand res; + res.type = FREEZE_PARTITION; + res.partition = command_ast->partition; + res.with_name = command_ast->with_name; + return res; + } + else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && command_ast->partition) + { + if (!command_ast->clear_column) + throw Exception("Can't DROP COLUMN from partition. It is possible only CLEAR COLUMN in partition", ErrorCodes::BAD_ARGUMENTS); + + PartitionCommand res; + res.type = CLEAR_COLUMN; + res.partition = command_ast->partition; + const Field & column_name = typeid_cast(*(command_ast->column)).name; + res.column_name = column_name; + return res; + } + else + return {}; +} + +void PartitionCommands::validate(const IStorage & table) +{ + for (const PartitionCommand & command : *this) + { + if (command.type == PartitionCommand::CLEAR_COLUMN) + { + String column_name = command.column_name.safeGet(); + + if (!table.getColumns().hasPhysical(column_name)) + { + throw Exception("Wrong column name. Cannot find column " + column_name + " to clear it from partition", + DB::ErrorCodes::ILLEGAL_COLUMN); + } + } + } +} + +} diff --git a/dbms/src/Storages/PartitionCommands.h b/dbms/src/Storages/PartitionCommands.h new file mode 100644 index 00000000000..6fa127de899 --- /dev/null +++ b/dbms/src/Storages/PartitionCommands.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +class IStorage; +class ASTAlterCommand; + +struct PartitionCommand +{ + enum Type + { + DROP_PARTITION, + ATTACH_PARTITION, + REPLACE_PARTITION, + FETCH_PARTITION, + FREEZE_PARTITION, + CLEAR_COLUMN, + }; + + Type type; + + ASTPtr partition; + Field column_name; + + /// true for DETACH PARTITION. + bool detach = false; + + /// true for ATTACH PART (and false for PARTITION) + bool part = false; + + /// For ATTACH PARTITION partition FROM db.table + String from_database; + String from_table; + bool replace = true; + + /// For FETCH PARTITION - path in ZK to the shard, from which to download the partition. + String from_zookeeper_path; + + /// For FREEZE PARTITION + String with_name; + + static std::optional parse(const ASTAlterCommand * command); +}; + +class PartitionCommands : public std::vector +{ +public: + void validate(const IStorage & table); +}; + + +} diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index aaff1e3f97c..702cc81a7ec 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1382,7 +1382,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM try { - new_part = merger_mutator.mutatePartToTemporaryPart(future_mutated_part, commands.commands, context); + new_part = merger_mutator.mutatePartToTemporaryPart(future_mutated_part, commands, context); data.renameTempPartAndReplace(new_part, nullptr, &transaction); try From 2c61a5940cf5d0383507991e46ce3f00ddb7d41f Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 7 Jun 2018 14:00:43 +0300 Subject: [PATCH 034/151] store mutation entries in shared_ptr --- .../ReplicatedMergeTreeMutationEntry.h | 2 ++ .../MergeTree/ReplicatedMergeTreeQueue.cpp | 18 ++++++++---------- .../MergeTree/ReplicatedMergeTreeQueue.h | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h index e92230d3cc6..737270cb024 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h @@ -29,4 +29,6 @@ struct ReplicatedMergeTreeMutationEntry MutationCommands commands; }; +using ReplicatedMergeTreeMutationEntryPtr = std::shared_ptr; + } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 06a01933e9d..b2349ea83a6 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -445,7 +445,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z for (auto it = mutations_by_znode.begin(); it != mutations_by_znode.end(); ) { - const ReplicatedMergeTreeMutationEntry & entry = it->second; + const ReplicatedMergeTreeMutationEntry & entry = *it->second; if (!entries_in_zk_set.count(entry.znode_name)) { LOG_DEBUG(log, "Removing obsolete mutation " + entry.znode_name + " from local state."); @@ -478,25 +478,23 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z for (const String & entry : entries_to_load) futures.emplace_back(zookeeper->asyncGet(zookeeper_path + "/mutations/" + entry)); - std::vector new_mutations; + std::vector new_mutations; for (size_t i = 0; i < entries_to_load.size(); ++i) { - new_mutations.push_back( - ReplicatedMergeTreeMutationEntry::parse(futures[i].get().data, entries_to_load[i])); + new_mutations.push_back(std::make_shared( + ReplicatedMergeTreeMutationEntry::parse(futures[i].get().data, entries_to_load[i]))); } { std::lock_guard lock(target_state_mutex); - for (ReplicatedMergeTreeMutationEntry & entry : new_mutations) + for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { - String znode = entry.znode_name; - const ReplicatedMergeTreeMutationEntry & inserted_entry = - mutations_by_znode.emplace(znode, std::move(entry)).first->second; + mutations_by_znode.emplace(entry->znode_name, entry); - for (const auto & partition_and_block_num : inserted_entry.block_numbers) + for (const auto & partition_and_block_num : entry->block_numbers) mutations_by_partition[partition_and_block_num.first].emplace( - partition_and_block_num.second, &inserted_entry); + partition_and_block_num.second, entry); } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 13ed28f2b35..73bd6cc8feb 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -91,8 +91,8 @@ private: /// mutations_by_partition is an index partition ID -> block ID -> mutation into this list. /// Note that mutations are updated in such a way that they are always more recent than /// log_pointer (see pullLogsToQueue()). - std::map mutations_by_znode; - std::unordered_map> mutations_by_partition; + std::map mutations_by_znode; + std::unordered_map> mutations_by_partition; /// Provides only one simultaneous call to pullLogsToQueue. From 4ee581117b59acb4ad011e1cf1ac30b211974b71 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 7 Jun 2018 16:28:39 +0300 Subject: [PATCH 035/151] system.mutations table skeleton [#CLICKHOUSE-3747] --- .../MergeTree/MergeTreeMutationStatus.h | 18 +++ .../ReplicatedMergeTreeMutationEntry.h | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 28 ++++ .../MergeTree/ReplicatedMergeTreeQueue.h | 3 + .../Storages/StorageReplicatedMergeTree.cpp | 5 + .../src/Storages/StorageReplicatedMergeTree.h | 2 + .../System/StorageSystemMutations.cpp | 123 ++++++++++++++++++ .../Storages/System/StorageSystemMutations.h | 36 +++++ .../Storages/System/attachSystemTables.cpp | 2 + 9 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h create mode 100644 dbms/src/Storages/System/StorageSystemMutations.cpp create mode 100644 dbms/src/Storages/System/StorageSystemMutations.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h b/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h new file mode 100644 index 00000000000..6df3bf58d20 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +struct MergeTreeMutationStatus +{ + String id; + String command; + time_t create_time = 0; + std::map block_numbers; +}; + +} diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h index 737270cb024..68aab6fa021 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h @@ -25,7 +25,7 @@ struct ReplicatedMergeTreeMutationEntry time_t create_time = 0; String source_replica; - std::unordered_map block_numbers; + std::map block_numbers; MutationCommands commands; }; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index b2349ea83a6..1c468f859c1 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1043,6 +1043,33 @@ void ReplicatedMergeTreeQueue::getInsertTimes(time_t & out_min_unprocessed_inser } +std::vector ReplicatedMergeTreeQueue::getMutationsStatus() const +{ + std::lock_guard lock(target_state_mutex); + + std::vector result; + for (const auto & pair : mutations_by_znode) + { + const ReplicatedMergeTreeMutationEntry & entry = *pair.second; + + for (const MutationCommand & command : entry.commands) + { + std::stringstream ss; + formatAST(*command.ast, ss, false, true); + result.push_back(MergeTreeMutationStatus + { + entry.znode_name, + ss.str(), + entry.create_time, + entry.block_numbers, + }); + } + } + + return result; +} + + ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( ReplicatedMergeTreeQueue & queue_, zkutil::ZooKeeperPtr & zookeeper) : queue(queue_) @@ -1346,4 +1373,5 @@ String padIndex(Int64 index) String index_str = toString(index); return std::string(10 - index_str.size(), '0') + index_str; } + } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 73bd6cc8feb..5a6a99e7da3 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -304,6 +305,8 @@ public: /// Get information about the insertion times. void getInsertTimes(time_t & out_min_unprocessed_insert_time, time_t & out_max_processed_insert_time) const; + + std::vector getMutationsStatus() const; }; class ReplicatedMergeTreeMergePredicate diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 702cc81a7ec..837da9625e8 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -4050,6 +4050,11 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const } } +std::vector StorageReplicatedMergeTree::getMutationsStatus() const +{ + return queue.getMutationsStatus(); +} + void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index bb9d0056a50..b8b670fbdd2 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -125,6 +125,8 @@ public: void mutate(const MutationCommands & commands, const Context & context) override; + std::vector getMutationsStatus() const; + /** Removes a replica from ZooKeeper. If there are no other replicas, it deletes the entire table from ZooKeeper. */ void drop() override; diff --git a/dbms/src/Storages/System/StorageSystemMutations.cpp b/dbms/src/Storages/System/StorageSystemMutations.cpp new file mode 100644 index 00000000000..90fce84d709 --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemMutations.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +StorageSystemMutations::StorageSystemMutations(const std::string & name_) + : name(name_) +{ + setColumns(ColumnsDescription({ + { "database", std::make_shared() }, + { "table", std::make_shared() }, + { "mutation_id", std::make_shared() }, + { "command", std::make_shared() }, + { "create_time", std::make_shared() }, + { "block_numbers.partition_id", std::make_shared( + std::make_shared()) }, + { "block_numbers.number", std::make_shared( + std::make_shared()) }, + })); +} + + +BlockInputStreams StorageSystemMutations::read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) +{ + check(column_names); + processed_stage = QueryProcessingStage::FetchColumns; + + /// Collect a set of replicated tables. + std::map> replicated_tables; + for (const auto & db : context.getDatabases()) + for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) + if (dynamic_cast(iterator->table().get())) + replicated_tables[db.first][iterator->name()] = iterator->table(); + + MutableColumnPtr col_database_mut = ColumnString::create(); + MutableColumnPtr col_table_mut = ColumnString::create(); + + for (auto & db : replicated_tables) + { + for (auto & table : db.second) + { + col_database_mut->insert(db.first); + col_table_mut->insert(table.first); + } + } + + ColumnPtr col_database = std::move(col_database_mut); + ColumnPtr col_table = std::move(col_table_mut); + + /// Determine what tables are needed by the conditions in the query. + { + Block filtered_block + { + { col_database, std::make_shared(), "database" }, + { col_table, std::make_shared(), "table" }, + }; + + VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + + if (!filtered_block.rows()) + return BlockInputStreams(); + + col_database = filtered_block.getByName("database").column; + col_table = filtered_block.getByName("table").column; + } + + MutableColumns res_columns = getSampleBlock().cloneEmptyColumns(); + for (size_t i_storage = 0; i_storage < col_database->size(); ++i_storage) + { + auto database = (*col_database)[i_storage].safeGet(); + auto table = (*col_table)[i_storage].safeGet(); + + std::vector states = + dynamic_cast(*replicated_tables[database][table]) + .getMutationsStatus(); + + for (const MergeTreeMutationStatus & status : states) + { + Array block_partition_ids; + block_partition_ids.reserve(status.block_numbers.size()); + Array block_numbers; + block_numbers.reserve(status.block_numbers.size()); + for (const auto & pair : status.block_numbers) + { + block_partition_ids.emplace_back(pair.first); + block_numbers.emplace_back(pair.second); + } + + size_t col_num = 0; + res_columns[col_num++]->insert(database); + res_columns[col_num++]->insert(table); + + res_columns[col_num++]->insert(status.id); + res_columns[col_num++]->insert(status.command); + res_columns[col_num++]->insert(UInt64(status.create_time)); + res_columns[col_num++]->insert(block_partition_ids); + res_columns[col_num++]->insert(block_numbers); + } + } + + Block res = getSampleBlock().cloneEmpty(); + for (size_t i_col = 0; i_col < res.columns(); ++i_col) + res.getByPosition(i_col).column = std::move(res_columns[i_col]); + + return BlockInputStreams(1, std::make_shared(res)); +} + +} diff --git a/dbms/src/Storages/System/StorageSystemMutations.h b/dbms/src/Storages/System/StorageSystemMutations.h new file mode 100644 index 00000000000..3b82f3f46be --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemMutations.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class Context; + + +/// Implements the `mutations` system table, which provides information about the status of mutations +/// in the MergeTree tables. +class StorageSystemMutations : public ext::shared_ptr_helper, public IStorage +{ +public: + String getName() const override { return "SystemMutations"; } + String getTableName() const override { return name; } + + BlockInputStreams read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size, + unsigned num_streams) override; + +private: + const String name; + +protected: + StorageSystemMutations(const String & name_); +}; + +} diff --git a/dbms/src/Storages/System/attachSystemTables.cpp b/dbms/src/Storages/System/attachSystemTables.cpp index 93d4809b3c7..705d01fb9c2 100644 --- a/dbms/src/Storages/System/attachSystemTables.cpp +++ b/dbms/src/Storages/System/attachSystemTables.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper) system_database.attachTable("processes", StorageSystemProcesses::create("processes")); system_database.attachTable("metrics", StorageSystemMetrics::create("metrics")); system_database.attachTable("merges", StorageSystemMerges::create("merges")); + system_database.attachTable("mutations", StorageSystemMutations::create("mutations")); system_database.attachTable("replicas", StorageSystemReplicas::create("replicas")); system_database.attachTable("replication_queue", StorageSystemReplicationQueue::create("replication_queue")); system_database.attachTable("dictionaries", StorageSystemDictionaries::create("dictionaries")); From b60a2a90fab820a7bcbec6f5dc85df4b3a77b041 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 6 Jun 2018 22:15:10 +0300 Subject: [PATCH 036/151] rename method for clarity --- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 4 ++-- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 1c468f859c1..50f2b9c3ffa 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -195,7 +195,7 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper( } -void ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry) +void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry) { auto code = zookeeper->tryRemove(replica_path + "/queue/" + entry->znode_name); @@ -891,7 +891,7 @@ bool ReplicatedMergeTreeQueue::processEntry( try { if (func(entry)) - remove(get_zookeeper(), entry); + removeProcessedEntry(get_zookeeper(), entry); } catch (...) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 5a6a99e7da3..cacdab7c288 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -139,7 +139,7 @@ private: std::lock_guard & target_state_lock, std::lock_guard & queue_lock); - void remove(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry); + void removeProcessedEntry(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry); /** Can I now try this action. If not, you need to leave it in the queue and try another one. * Called under the queue_mutex. From a3bf3e6d20030d002415827881a0a17bf3056dc0 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 13 Jun 2018 18:47:40 +0300 Subject: [PATCH 037/151] add missing columns to system.parts [#CLICKHOUSE-3747] --- dbms/src/Storages/System/StorageSystemParts.cpp | 4 ++++ dbms/src/Storages/System/StorageSystemPartsColumns.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemParts.cpp b/dbms/src/Storages/System/StorageSystemParts.cpp index b2f3d2208c7..a5fe5e5285d 100644 --- a/dbms/src/Storages/System/StorageSystemParts.cpp +++ b/dbms/src/Storages/System/StorageSystemParts.cpp @@ -31,9 +31,11 @@ StorageSystemParts::StorageSystemParts(const std::string & name) {"refcount", std::make_shared()}, {"min_date", std::make_shared()}, {"max_date", std::make_shared()}, + {"partition_id", std::make_shared()}, {"min_block_number", std::make_shared()}, {"max_block_number", std::make_shared()}, {"level", std::make_shared()}, + {"data_version", std::make_shared()}, {"primary_key_bytes_in_memory", std::make_shared()}, {"primary_key_bytes_in_memory_allocated", std::make_shared()}, @@ -80,9 +82,11 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns, const Stor columns[i++]->insert(static_cast(part->getMinDate())); columns[i++]->insert(static_cast(part->getMaxDate())); + columns[i++]->insert(part->info.partition_id); columns[i++]->insert(part->info.min_block); columns[i++]->insert(part->info.max_block); columns[i++]->insert(static_cast(part->info.level)); + columns[i++]->insert(static_cast(part->info.getDataVersion())); columns[i++]->insert(static_cast(part->getIndexSizeInBytes())); columns[i++]->insert(static_cast(part->getIndexSizeInAllocatedBytes())); diff --git a/dbms/src/Storages/System/StorageSystemPartsColumns.cpp b/dbms/src/Storages/System/StorageSystemPartsColumns.cpp index 921b32b6c0a..964c256b492 100644 --- a/dbms/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsColumns.cpp @@ -32,9 +32,11 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const std::string & name) {"refcount", std::make_shared()}, {"min_date", std::make_shared()}, {"max_date", std::make_shared()}, + {"partition_id", std::make_shared()}, {"min_block_number", std::make_shared()}, {"max_block_number", std::make_shared()}, {"level", std::make_shared()}, + {"data_version", std::make_shared()}, {"primary_key_bytes_in_memory", std::make_shared()}, {"primary_key_bytes_in_memory_allocated", std::make_shared()}, @@ -123,9 +125,11 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns, con columns[j++]->insert(static_cast(min_date)); columns[j++]->insert(static_cast(max_date)); + columns[j++]->insert(part->info.partition_id); columns[j++]->insert(part->info.min_block); columns[j++]->insert(part->info.max_block); columns[j++]->insert(static_cast(part->info.level)); + columns[j++]->insert(static_cast(part->info.getDataVersion())); columns[j++]->insert(static_cast(index_size_in_bytes)); columns[j++]->insert(static_cast(index_size_in_allocated_bytes)); From f546169a4dced9b3883d76c8ccf272660517c9c7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 13 Jun 2018 21:51:56 +0300 Subject: [PATCH 038/151] Update StorageURL.cpp --- dbms/src/Storages/StorageURL.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 2a0b6dfc9fe..a88d8d9e6d0 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -68,6 +68,7 @@ namespace { return reader->getHeader(); } + void readPrefixImpl() override { reader->readPrefix(); From 931ccf988f86d637ce2c0c66c38d2036986c5e24 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 13 Jun 2018 21:52:17 +0300 Subject: [PATCH 039/151] Update TableFunctionURL.h --- dbms/src/TableFunctions/TableFunctionURL.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionURL.h b/dbms/src/TableFunctions/TableFunctionURL.h index edc9a9fa4c8..6382beee836 100644 --- a/dbms/src/TableFunctions/TableFunctionURL.h +++ b/dbms/src/TableFunctions/TableFunctionURL.h @@ -8,7 +8,6 @@ namespace DB { /* url(source, format, structure) - creates a temporary storage from url - * */ class TableFunctionURL : public ITableFunctionFileLike { From ca883a699d83c15b87cf06ce4a981b28759cbc90 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 13 Jun 2018 21:54:32 +0300 Subject: [PATCH 040/151] Update StorageURL.cpp --- dbms/src/Storages/StorageURL.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index a88d8d9e6d0..9c5a6031b50 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -52,8 +52,6 @@ namespace reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } - ~StorageURLBlockInputStream() override {} - String getName() const override { return name; @@ -97,8 +95,6 @@ namespace { } - ~StorageURLBlockOutputStream() {} - Block getHeader() const override { return sample_block; @@ -106,7 +102,8 @@ namespace void write(const Block & block) override { - ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr) { + ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr) + { WriteBufferFromOStream out_buffer(ostr); auto writer = FormatFactory::instance().getOutput(format, out_buffer, sample_block, global_context); writer->writePrefix(); @@ -132,7 +129,8 @@ BlockInputStreams StorageURL::read(const Names & /*column_names*/, size_t max_block_size, unsigned /*num_streams*/) { - return {std::make_shared(uri, + return {std::make_shared( + uri, format_name, getName(), getSampleBlock(), @@ -148,9 +146,11 @@ BlockOutputStreamPtr StorageURL::write(const ASTPtr & /*query*/, const Settings return std::make_shared( uri, format_name, getSampleBlock(), context_global, ConnectionTimeouts::getHTTPTimeouts(context_global.getSettingsRef())); } + void registerStorageURL(StorageFactory & factory) { - factory.registerStorage("URL", [](const StorageFactory::Arguments & args) { + factory.registerStorage("URL", [](const StorageFactory::Arguments & args) + { ASTs & engine_args = args.engine_args; if (!(engine_args.size() == 1 || engine_args.size() == 2)) From cedf7a309e2a55fe0946929462bcc7688712f0df Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Jun 2018 22:24:54 +0300 Subject: [PATCH 041/151] Use zookeeper session timeout from zookeeper server. --- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0d8bc8de725..251235654b1 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -677,7 +677,8 @@ void ZooKeeper::receiveHandshake() read(timeout); if (timeout != session_timeout.totalMilliseconds()) - throw Exception("Received different session timeout from server: " + toString(timeout), ZMARSHALLINGERROR); + /// Use timeout from server. + session_timeout = timeout * Poco::Timespan::MILLISECONDS; read(session_id); read(passwd); From 95e4bcdb56dc793a858ad0fb0868a6ed5adb32f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 22:42:04 +0300 Subject: [PATCH 042/151] Fixed non-deterministic test #2352 --- .../queries/0_stateless/00632_aggregation_window_funnel.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00632_aggregation_window_funnel.sql b/dbms/tests/queries/0_stateless/00632_aggregation_window_funnel.sql index 338c201e59d..e985956d272 100644 --- a/dbms/tests/queries/0_stateless/00632_aggregation_window_funnel.sql +++ b/dbms/tests/queries/0_stateless/00632_aggregation_window_funnel.sql @@ -18,7 +18,7 @@ select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, drop table if exists funnel_test2; create table funnel_test2 (uid UInt32 default 1,timestamp DateTime, event UInt32) engine=Memory; -insert into funnel_test2(timestamp, event) values (now() + 1,1001),(now() + 2,1002),(now() + 3,1003),(now() + 4,1004),(now() + 5,1005),(now() + 6,1006),(now() + 7,1007),(now() + 8,1008); +insert into funnel_test2(timestamp, event) values ('2018-01-01 01:01:01',1001),('2018-01-01 01:01:02',1002),('2018-01-01 01:01:03',1003),('2018-01-01 01:01:04',1004),('2018-01-01 01:01:05',1005),('2018-01-01 01:01:06',1006),('2018-01-01 01:01:07',1007),('2018-01-01 01:01:08',1008); select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test2; @@ -27,4 +27,4 @@ select 1 = windowFunnel(10000)(timestamp, event = 1008, event = 1001) from funne select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test2; drop table funnel_test; -drop table funnel_test2; \ No newline at end of file +drop table funnel_test2; From c1910fef377e01114e94f5b3e3c68e262fa0c40a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 13 Jun 2018 23:00:10 +0300 Subject: [PATCH 043/151] Update ASTAlterQuery.h --- dbms/src/Parsers/ASTAlterQuery.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index a97503305f5..683d0780876 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -54,7 +54,7 @@ public: */ ASTPtr primary_key; - /** Used in DROP PARTITION, RESHARD PARTITION and ATTACH PARTITION FROM queries. + /** Used in DROP PARTITION and ATTACH PARTITION FROM queries. * The value or ID of the partition is stored here. */ ASTPtr partition; @@ -66,8 +66,6 @@ public: bool part = false; /// true for ATTACH PART - bool do_copy = false; /// for RESHARD PARTITION - bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. From 1e60b733f4d5d928d0d202643a2ea50ec3901148 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Jun 2018 23:02:27 +0300 Subject: [PATCH 044/151] Fixed warning #2504 --- dbms/src/Storages/IStorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 174286b1bc1..c580bd1f749 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -43,7 +43,7 @@ using StorageWeakPtr = std::weak_ptr; struct Settings; class AlterCommands; -struct MutationCommands; +class MutationCommands; /** Does not allow changing the table description (including rename and delete the table). From 94afe26d4cecf4d1e309a8e0fd878ca610505f18 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Thu, 14 Jun 2018 14:07:57 +0300 Subject: [PATCH 045/151] Update summingmergetree.md Added linefeed. --- docs/ru/table_engines/summingmergetree.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ru/table_engines/summingmergetree.md b/docs/ru/table_engines/summingmergetree.md index da49ae0b257..6b3302a6017 100644 --- a/docs/ru/table_engines/summingmergetree.md +++ b/docs/ru/table_engines/summingmergetree.md @@ -24,7 +24,9 @@ SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, ...), 8192, (Shows, C Если название вложенной таблицы заканчивается на Map и она содержит не менее двух столбцов, удовлетворяющих следующим критериям: - первый столбец - числовой ((U)IntN, Date, DateTime), назовем его условно key, -- остальные столбцы - арифметические ((U)IntN, Float32/64), условно (values...), то такая вложенная таблица воспринимается как отображение key =`>` (values...) и при слиянии ее строк выполняется слияние элементов двух множеств по key со сложением соответствующих (values...). +- остальные столбцы - арифметические ((U)IntN, Float32/64), условно (values...), + +то такая вложенная таблица воспринимается как отображение key `=>` (values...) и при слиянии ее строк выполняется слияние элементов двух множеств по key со сложением соответствующих (values...). Примеры: From 29c7c36957a19f57d33a6d464ff5f3a55618aca0 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Thu, 14 Jun 2018 14:09:28 +0300 Subject: [PATCH 046/151] Update summingmergetree.md Added linefeed. --- docs/en/table_engines/summingmergetree.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/table_engines/summingmergetree.md b/docs/en/table_engines/summingmergetree.md index 19f690ef9c1..32f97cfb744 100644 --- a/docs/en/table_engines/summingmergetree.md +++ b/docs/en/table_engines/summingmergetree.md @@ -24,7 +24,9 @@ In addition, a table can have nested data structures that are processed in a spe If the name of a nested table ends in 'Map' and it contains at least two columns that meet the following criteria: - The first table is numeric ((U)IntN, Date, DateTime), which we'll refer to as the 'key'. -- The other columns are arithmetic ((U)IntN, Float32/64), which we'll refer to as '(values...)'. Then this nested table is interpreted as a mapping of key =`>` (values...), and when merging its rows, the elements of two data sets are merged by 'key' with a summation of the corresponding (values...). +- The other columns are arithmetic ((U)IntN, Float32/64), which we'll refer to as '(values...)'. + +Then this nested table is interpreted as a mapping of key `=>` (values...), and when merging its rows, the elements of two data sets are merged by 'key' with a summation of the corresponding (values...). Examples: From 3178f57f54c2b2cd335af051488f72aaa0aacdbb Mon Sep 17 00:00:00 2001 From: ezhaka Date: Thu, 14 Jun 2018 19:29:09 +0300 Subject: [PATCH 047/151] Do not use exact chown path We've faced with the problem that chown is located in ``/bin/chown`` instead of ``/usr/bin/chown``. We've created symlink in order to bypass the problem, but it seems it would be nice to fix the script. --- debian/clickhouse-server.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index b91de6f56eb..b2e041fce86 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -8,7 +8,7 @@ Group=clickhouse PermissionsStartOnly=true Restart=always RestartSec=30 -ExecStartPre=/usr/bin/chown clickhouse:clickhouse -R /etc/clickhouse-server +ExecStartPre=chown clickhouse:clickhouse -R /etc/clickhouse-server ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml LimitCORE=infinity LimitNOFILE=500000 From 27c6df7e208c5ce12b83569755d9847b56d1a8f8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Jun 2018 22:15:35 +0300 Subject: [PATCH 048/151] Trim exit string before checking. #2510 --- dbms/programs/client/Client.cpp | 2 +- dbms/src/Common/StringUtils/StringUtils.cpp | 8 ++++++++ dbms/src/Common/StringUtils/StringUtils.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 748bcd49838..b056f82d1a7 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -731,7 +731,7 @@ private: bool processSingleQuery(const String & line, ASTPtr parsed_query_ = nullptr) { - if (exit_strings.end() != exit_strings.find(line)) + if (exit_strings.end() != exit_strings.find(trim(line, [](char c){ return isWhitespaceASCII(c) || c == ';'; }))) return false; resetOutput(); diff --git a/dbms/src/Common/StringUtils/StringUtils.cpp b/dbms/src/Common/StringUtils/StringUtils.cpp index 676b00ce9ac..572905d21dc 100644 --- a/dbms/src/Common/StringUtils/StringUtils.cpp +++ b/dbms/src/Common/StringUtils/StringUtils.cpp @@ -1,4 +1,5 @@ #include "StringUtils.h" +#include namespace detail { @@ -14,3 +15,10 @@ bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) } } + +std::string trim(const std::string & str, const std::function & predicate) +{ + std::string trimmed = str; + boost::trim_if(trimmed, predicate); + return trimmed; +} diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 8043b88a89e..ed34150930b 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -133,3 +133,4 @@ inline bool equalsCaseInsensitive(char a, char b) return a == b || (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b); } +std::string trim(const std::string & str, const std::function & predicate); From fd406f30ada0dc646eaec7a434dde18796a00772 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Jun 2018 22:21:35 +0300 Subject: [PATCH 049/151] Trim exit string before checking. #2510 --- dbms/src/Common/StringUtils/StringUtils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index ed34150930b..49c1c2d40a2 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace detail From a2fdda4231be0877e428f8c4fdaa50924d6fb1c0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 22:54:11 +0300 Subject: [PATCH 050/151] Fixed warning on platforms where char is unsigned [#CLICKHOUSE-2] --- dbms/src/Common/StringUtils/StringUtils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 8043b88a89e..db7e8cc89f7 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -109,7 +109,7 @@ inline bool isWhitespaceASCII(char c) inline bool isControlASCII(char c) { - return c >= 0 && c <= 31; + return static_cast(c) <= 31; } /// Works assuming isAlphaASCII. From 86053c754f9354cdf1469cec8441c285ed14ba0a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 22:54:11 +0300 Subject: [PATCH 051/151] Fixed warning on platforms where char is unsigned [#CLICKHOUSE-2] --- dbms/src/Common/StringUtils/StringUtils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 8043b88a89e..db7e8cc89f7 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -109,7 +109,7 @@ inline bool isWhitespaceASCII(char c) inline bool isControlASCII(char c) { - return c >= 0 && c <= 31; + return static_cast(c) <= 31; } /// Works assuming isAlphaASCII. From e27af42041ad9bd22fa9715e4fe903483fe09445 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 23:04:48 +0300 Subject: [PATCH 052/151] Fallback for non x86 [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index fd52168982f..2a8acb5444c 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -391,8 +391,8 @@ void decompress( if (source_size == 0 || dest_size == 0) return; +#if __SSE2__ /// Don't run timer if the block is too small. - if (dest_size >= 32768) { size_t best_variant = statistics.select(); @@ -420,6 +420,9 @@ void decompress( { decompressImpl<8, false>(source, dest, dest_size); } +#else + decompressImpl<8, false>(source, dest, dest_size); +#endif } From 177251790b501d40b55be1d22e38cabec0d80193 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 23:05:20 +0300 Subject: [PATCH 053/151] Fallback for non x86 [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 2a8acb5444c..6e661c1e408 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -386,7 +386,7 @@ void decompress( char * const dest, size_t source_size, size_t dest_size, - PerformanceStatistics & statistics) + PerformanceStatistics & [[maybe_unused]] statistics) { if (source_size == 0 || dest_size == 0) return; From cc4f896a9d793a9f61bd4a39df4f282d82ca875a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 23:05:52 +0300 Subject: [PATCH 054/151] Fallback for non x86 [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 6e661c1e408..a5bcffd50e6 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -386,7 +386,7 @@ void decompress( char * const dest, size_t source_size, size_t dest_size, - PerformanceStatistics & [[maybe_unused]] statistics) + PerformanceStatistics & statistics [[maybe_unused]]) { if (source_size == 0 || dest_size == 0) return; From af5875960f371315f03b006765c8bd87fed95e18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 23:09:05 +0300 Subject: [PATCH 055/151] Fallback for non x86 [#CLICKHOUSE-2] --- contrib/capnproto | 2 +- contrib/librdkafka | 2 +- contrib/poco | 2 +- contrib/zstd | 2 +- dbms/src/Common/tests/int_hashes_perf.cpp | 14 ++++++-------- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/contrib/capnproto b/contrib/capnproto index 7173ab638fd..c949a18da5f 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit 7173ab638fdf144032411dc69fb1082cd473e08f +Subproject commit c949a18da5f041a36cc218c5c4b79c7705999b4f diff --git a/contrib/librdkafka b/contrib/librdkafka index 7478b5ef16a..c3d50eb6137 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98 +Subproject commit c3d50eb613704fb9c8ab3bce95a88275cb5875b7 diff --git a/contrib/poco b/contrib/poco index 3a2d0a833a2..81d4fdfcb88 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 3a2d0a833a22ef5e1164a9ada54e3253cb038904 +Subproject commit 81d4fdfcb887f89b0f7b1e9b503cbe63e6d8366b diff --git a/contrib/zstd b/contrib/zstd index 255597502c3..f4340f46b23 160000 --- a/contrib/zstd +++ b/contrib/zstd @@ -1 +1 @@ -Subproject commit 255597502c3a4ef150abc964e376d4202a8c2929 +Subproject commit f4340f46b2387bc8de7d5320c0b83bb1499933ad diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp index 71263955678..1174f27a4a4 100644 --- a/dbms/src/Common/tests/int_hashes_perf.cpp +++ b/dbms/src/Common/tests/int_hashes_perf.cpp @@ -113,15 +113,15 @@ static inline size_t murmurMix(UInt64 x) return x; } + +#if __x86_64__ static inline size_t crc32Hash(UInt64 x) { UInt64 crc = -1ULL; -#if __x86_64__ asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x)); -#endif return crc; } - +#endif static inline size_t mulShift(UInt64 x) { @@ -279,11 +279,6 @@ static inline void test(size_t n, const UInt64 * data, const char * name) int main(int argc, char ** argv) { - -#if !__x86_64__ - std::cerr << "Only for x86_64 arch" << std::endl; -#endif - const size_t BUF_SIZE = 1024; size_t n = (atoi(argv[1]) + (BUF_SIZE - 1)) / BUF_SIZE * BUF_SIZE; @@ -321,7 +316,10 @@ int main(int argc, char ** argv) if (!method || method == 7) test (n, &data[0], "6: murmur64 mixer"); if (!method || method == 8) test (n, &data[0], "7: mulShift"); if (!method || method == 9) test(n, &data[0], "8: tabulation"); + +#if __x86_64__ if (!method || method == 10) test (n, &data[0], "9: crc32"); +#endif return 0; } From c9789f985ac15cf23729903d4db9cfd8945502ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Jun 2018 23:16:57 +0300 Subject: [PATCH 056/151] Fallback for non x86 [#CLICKHOUSE-2] --- dbms/src/Interpreters/tests/hash_map_string.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp index c5454db77b9..e81a177288e 100644 --- a/dbms/src/Interpreters/tests/hash_map_string.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string.cpp @@ -128,11 +128,11 @@ struct FastHash64 }; +#if __x86_64__ struct CrapWow { size_t operator() (CompactStringRef x) const { -#if __x86_64__ const char * key = x.data(); size_t len = x.size; size_t seed = 0; @@ -196,11 +196,9 @@ struct CrapWow : "%r12", "%r13", "%r14", "%r15", "cc" ); return hash; -#else - return 0; -#endif } }; +#endif struct SimpleHash @@ -380,6 +378,7 @@ int main(int argc, char ** argv) << std::endl; } +#if __x86_64__ if (!m || m == 3) { Stopwatch watch; @@ -408,6 +407,7 @@ int main(int argc, char ** argv) #endif << std::endl; } +#endif if (!m || m == 4) { From d4aa1dffa2a43212aaa8812f501c56cbab9487ed Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 00:13:13 +0300 Subject: [PATCH 057/151] Attempt to use ARM Neon [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 84 +++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index a5bcffd50e6..62bd487bd61 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -19,9 +19,10 @@ #include #endif +#if __aarch64__ +#include +#endif -/** for i in *.bin; do ./decompress_perf < $i > /dev/null; done - */ namespace LZ4 { @@ -70,6 +71,8 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset) } +#ifdef __x86_64__ + /** We use 'xmm' (128bit SSE) registers here to shuffle 16 bytes. * * It is possible to use 'mm' (64bit MMX) registers to shuffle just 8 bytes as we need. @@ -176,20 +179,51 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o #endif } +#endif + + +#ifdef __aarch64__ + +inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) +{ + static constexpr UInt8 __attribute__((__aligned__(8))) masks[] = + { + 0, 1, 2, 2, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ + 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 2, 0, 1, 2, 0, 1, + 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 0, 1, 2, + 0, 1, 2, 3, 4, 5, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 0, + }; + + unalignedStore(op, vtbl1_u8(unalignedLoad(match), unalignedLoad(masks + 8 * offset))); + match += masks[offset]; +} + +#endif + + + template <> void inline copy<8>(UInt8 * dst, const UInt8 * src) { copy8(dst, src); }; template <> void inline wildCopy<8>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy8(dst, src, dst_end); }; template <> void inline copyOverlap<8, false>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8(op, match, offset); }; template <> void inline copyOverlap<8, true>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap8Shuffle(op, match, offset); }; -#if __SSE2__ - inline void copy16(UInt8 * dst, const UInt8 * src) { +#if __SSE2__ _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_loadu_si128(reinterpret_cast(src))); +#else + memcpy(dst, src, 16); +#endif } + + inline void wildCopy16(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { do @@ -226,6 +260,9 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) match += shift3[offset]; } + +#ifdef __x86_64__ + inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) { #ifdef __SSSE3__ @@ -262,12 +299,49 @@ inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t #endif } +#endif + +#ifdef __aarch64__ + +inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) +{ + static constexpr UInt8 __attribute__((__aligned__(16))) masks[] = + { + 0, 1, 2, 1, 4, 1, 4, 2, 8, 7, 6, 5, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */ + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + }; + + unalignedStore(op, + vtbl2_u8(unalignedLoad(src), unalignedLoad(masks + 16 * offset))); + + unalignedStore(op + 8, + vtbl2_u8(unalignedLoad(src), unalignedLoad(masks + 16 * offset + 8))); + + match += masks[offset]; +} + +#endif + + template <> void inline copy<16>(UInt8 * dst, const UInt8 * src) { copy16(dst, src); }; template <> void inline wildCopy<16>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy16(dst, src, dst_end); }; template <> void inline copyOverlap<16, false>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16(op, match, offset); }; template <> void inline copyOverlap<16, true>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap16Shuffle(op, match, offset); }; -#endif /// See also https://stackoverflow.com/a/30669632 From 0c467158f9d7c5c67b08597e72e94227a45a563e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 00:14:08 +0300 Subject: [PATCH 058/151] Attempt to use ARM Neon [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 62bd487bd61..27b222bba6f 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -326,10 +326,10 @@ inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t }; unalignedStore(op, - vtbl2_u8(unalignedLoad(src), unalignedLoad(masks + 16 * offset))); + vtbl2_u8(unalignedLoad(match), unalignedLoad(masks + 16 * offset))); unalignedStore(op + 8, - vtbl2_u8(unalignedLoad(src), unalignedLoad(masks + 16 * offset + 8))); + vtbl2_u8(unalignedLoad(match), unalignedLoad(masks + 16 * offset + 8))); match += masks[offset]; } From 148cbe8058b67c1c5f61c34c1f1abe0ee348893a Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 15 Jun 2018 00:20:39 +0300 Subject: [PATCH 059/151] Added WriteBufferFromHTTP and used it in StorageURL. Also small refactoring. --- dbms/src/IO/HTTPCommon.cpp | 51 +++++++++++++++++++ dbms/src/IO/HTTPCommon.h | 19 +++++++ dbms/src/IO/ReadWriteBufferFromHTTP.cpp | 45 ++-------------- dbms/src/IO/ReadWriteBufferFromHTTP.h | 5 -- dbms/src/IO/WriteBufferFromOStream.h | 26 ++++++---- .../Storages/MergeTree/DataPartsExchange.cpp | 1 + dbms/src/Storages/StorageURL.cpp | 39 +++++++------- 7 files changed, 110 insertions(+), 76 deletions(-) diff --git a/dbms/src/IO/HTTPCommon.cpp b/dbms/src/IO/HTTPCommon.cpp index 84ee03b679d..eb81610c17d 100644 --- a/dbms/src/IO/HTTPCommon.cpp +++ b/dbms/src/IO/HTTPCommon.cpp @@ -1,9 +1,12 @@ #include +#include +#include #include #if USE_POCO_NETSSL #include #include +#include #include #include #include @@ -15,6 +18,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; + extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; +} + void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout) { if (!response.getKeepAlive()) @@ -34,4 +43,46 @@ void SSLInit() Poco::Net::initializeSSL(); #endif } + + +std::unique_ptr getPreparedSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts) +{ + bool is_ssl = static_cast(uri.getScheme() == "https"); + std::unique_ptr session( +#if USE_POCO_NETSSL + is_ssl ? new Poco::Net::HTTPSClientSession : +#endif + new Poco::Net::HTTPClientSession); + + session->setHost(DNSResolver::instance().resolveHost(uri.getHost()).toString()); + session->setPort(uri.getPort()); + +#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000 + session->setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); +#else + session->setTimeout(timeouts.connection_timeout); +#endif + + return session; +} + + +std::istream * makeRequest( + Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response) +{ + auto istr = &session.receiveResponse(response); + auto status = response.getStatus(); + + if (status != Poco::Net::HTTPResponse::HTTP_OK) + { + std::stringstream error_message; + error_message << "Received error from remote server " << request.getURI() << ". HTTP status code: " << status << " " + << response.getReason() << ", body: " << istr->rdbuf(); + + throw Exception(error_message.str(), + status == HTTP_TOO_MANY_REQUESTS ? ErrorCodes::RECEIVED_ERROR_TOO_MANY_REQUESTS + : ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER); + } + return istr; +} } diff --git a/dbms/src/IO/HTTPCommon.h b/dbms/src/IO/HTTPCommon.h index c7fe7d5e7f8..28bdd86bee4 100644 --- a/dbms/src/IO/HTTPCommon.h +++ b/dbms/src/IO/HTTPCommon.h @@ -1,6 +1,16 @@ #pragma once #include +#include +#include + +#include +#include +#include +#include + + +#include namespace Poco { @@ -14,9 +24,18 @@ namespace Poco namespace DB { + +const int HTTP_TOO_MANY_REQUESTS = 429; + void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout); extern std::once_flag ssl_init_once; void SSLInit(); +std::unique_ptr getPreparedSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts); + +/* Function makes HTTP-request from prepared structures and returns response istream + * in case of HTTP_OK and throws exception with details in case of not HTTP_OK + */ +std::istream* makeRequest(Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response); } diff --git a/dbms/src/IO/ReadWriteBufferFromHTTP.cpp b/dbms/src/IO/ReadWriteBufferFromHTTP.cpp index dfd3cfbdbde..c707ed5eaf0 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/dbms/src/IO/ReadWriteBufferFromHTTP.cpp @@ -8,19 +8,10 @@ #include #include #include - -#if USE_POCO_NETSSL -#include -#endif - +#include namespace DB { -namespace ErrorCodes -{ - extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; - extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; -} ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(const Poco::URI & uri, @@ -31,25 +22,8 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(const Poco::URI & uri, : ReadBuffer(nullptr, 0), uri{uri}, method{!method_.empty() ? method_ : out_stream_callback ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}, - timeouts{timeouts}, - is_ssl{uri.getScheme() == "https"}, - session + session{getPreparedSession(uri, timeouts)} { - std::unique_ptr( -#if USE_POCO_NETSSL - is_ssl ? new Poco::Net::HTTPSClientSession : -#endif - new Poco::Net::HTTPClientSession) -} -{ - session->setHost(DNSResolver::instance().resolveHost(uri.getHost()).toString()); - session->setPort(uri.getPort()); - -#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000 - session->setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); -#else - session->setTimeout(timeouts.connection_timeout); -#endif Poco::Net::HTTPRequest request(method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(uri.getHost()); // use original, not resolved host name in header @@ -66,20 +40,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(const Poco::URI & uri, if (out_stream_callback) out_stream_callback(stream_out); - istr = &session->receiveResponse(response); - - auto status = response.getStatus(); - - if (status != Poco::Net::HTTPResponse::HTTP_OK) - { - std::stringstream error_message; - error_message << "Received error from remote server " << uri.toString() << ". HTTP status code: " << status << " " - << response.getReason() << ", body: " << istr->rdbuf(); - - throw Exception(error_message.str(), - status == HTTP_TOO_MANY_REQUESTS ? ErrorCodes::RECEIVED_ERROR_TOO_MANY_REQUESTS - : ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER); - } + istr = makeRequest(*session, request, response); impl = std::make_unique(*istr, buffer_size_); } diff --git a/dbms/src/IO/ReadWriteBufferFromHTTP.h b/dbms/src/IO/ReadWriteBufferFromHTTP.h index 88230ac2079..93a8232f93d 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTP.h +++ b/dbms/src/IO/ReadWriteBufferFromHTTP.h @@ -12,9 +12,6 @@ namespace DB { -const int HTTP_TOO_MANY_REQUESTS = 429; - - /** Perform HTTP POST request and provide response to read. */ class ReadWriteBufferFromHTTP : public ReadBuffer @@ -22,9 +19,7 @@ class ReadWriteBufferFromHTTP : public ReadBuffer private: Poco::URI uri; std::string method; - ConnectionTimeouts timeouts; - bool is_ssl; std::unique_ptr session; std::istream * istr; /// owned by session std::unique_ptr impl; diff --git a/dbms/src/IO/WriteBufferFromOStream.h b/dbms/src/IO/WriteBufferFromOStream.h index b5a5cbf62f6..355e4fb7e46 100644 --- a/dbms/src/IO/WriteBufferFromOStream.h +++ b/dbms/src/IO/WriteBufferFromOStream.h @@ -18,28 +18,34 @@ namespace ErrorCodes class WriteBufferFromOStream : public BufferWithOwnMemory { -private: - std::ostream & ostr; +protected: + std::ostream * ostr; void nextImpl() override { if (!offset()) return; - ostr.write(working_buffer.begin(), offset()); - ostr.flush(); + ostr->write(working_buffer.begin(), offset()); + ostr->flush(); - if (!ostr.good()) + if (!ostr->good()) throw Exception("Cannot write to ostream", ErrorCodes::CANNOT_WRITE_TO_OSTREAM); } + WriteBufferFromOStream(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) + : BufferWithOwnMemory(size, existing_memory, alignment) + { + } + + public: WriteBufferFromOStream( - std::ostream & ostr_, - size_t size = DBMS_DEFAULT_BUFFER_SIZE, - char * existing_memory = nullptr, - size_t alignment = 0) - : BufferWithOwnMemory(size, existing_memory, alignment), ostr(ostr_) {} + std::ostream & ostr_, + size_t size = DBMS_DEFAULT_BUFFER_SIZE, + char * existing_memory = nullptr, + size_t alignment = 0) + : BufferWithOwnMemory(size, existing_memory, alignment), ostr(&ostr_) {} ~WriteBufferFromOStream() override { diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index ed6c4388efd..15d1c56b051 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 2a0b6dfc9fe..a4d46c1efc9 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include @@ -87,13 +87,15 @@ namespace class StorageURLBlockOutputStream : public IBlockOutputStream { public: - StorageURLBlockOutputStream(const Poco::URI & uri_, - const String & format_, + StorageURLBlockOutputStream(const Poco::URI & uri, + const String & format, const Block & sample_block_, - Context & context_, - const ConnectionTimeouts & timeouts_) - : global_context(context_), uri(uri_), format(format_), sample_block(sample_block_), timeouts(timeouts_) + Context & context, + const ConnectionTimeouts & timeouts) + : sample_block(sample_block_) { + write_buf = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts); + writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); } ~StorageURLBlockOutputStream() {} @@ -105,23 +107,22 @@ namespace void write(const Block & block) override { - ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr) { - WriteBufferFromOStream out_buffer(ostr); - auto writer = FormatFactory::instance().getOutput(format, out_buffer, sample_block, global_context); - writer->writePrefix(); - writer->write(block); - writer->writeSuffix(); - writer->flush(); - }; - ReadWriteBufferFromHTTP(uri, Poco::Net::HTTPRequest::HTTP_POST, out_stream_callback, timeouts); // just for request + writer->write(block); + } + void writePrefix() override { + writer->writePrefix(); + } + + void writeSuffix() override { + writer->writeSuffix(); + writer->flush(); + write_buf->finalize(); } private: - Context & global_context; - Poco::URI uri; - String format; Block sample_block; - ConnectionTimeouts timeouts; + std::unique_ptr write_buf; + BlockOutputStreamPtr writer; }; } BlockInputStreams StorageURL::read(const Names & /*column_names*/, From 233906f61b1585c10e95647317994ccc84092614 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 00:58:48 +0300 Subject: [PATCH 060/151] Enabled ARM Neon [#CLICKHOUSE-2] --- dbms/src/IO/LZ4_decompress_faster.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 27b222bba6f..28fe324c9ed 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -465,7 +465,6 @@ void decompress( if (source_size == 0 || dest_size == 0) return; -#if __SSE2__ /// Don't run timer if the block is too small. if (dest_size >= 32768) { @@ -494,9 +493,6 @@ void decompress( { decompressImpl<8, false>(source, dest, dest_size); } -#else - decompressImpl<8, false>(source, dest, dest_size); -#endif } From b68c68a414ccd2c940d7df21de3d35b7005d5bce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 02:29:29 +0300 Subject: [PATCH 061/151] Fixed build [#CLICKHOUSE-2] --- cmake/find_llvm.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index 89c5f835ce9..3b04af7be34 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -31,6 +31,8 @@ if (ENABLE_EMBEDDED_COMPILER) option(LLVM_HAS_RTTI "Enable if LLVM was build with RTTI enabled" ON) set (USE_EMBEDDED_COMPILER 1) + else() + set (USE_EMBEDDED_COMPILER 0) endif() else() set (LLVM_FOUND 1) From 14c16f67ec38756af79288f871bc152251809f48 Mon Sep 17 00:00:00 2001 From: Jason Date: Fri, 15 Jun 2018 09:13:39 +0800 Subject: [PATCH 062/151] Update system.zookeeper add description 'This table presents when ZooKeeper is configured' at first. --- docs/en/system_tables/system.zookeeper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/system_tables/system.zookeeper.md b/docs/en/system_tables/system.zookeeper.md index d20f7620b38..ec3046ebc16 100644 --- a/docs/en/system_tables/system.zookeeper.md +++ b/docs/en/system_tables/system.zookeeper.md @@ -1,6 +1,6 @@ # system.zookeeper -Allows reading data from the ZooKeeper cluster defined in the config. +This table presents when ZooKeeper is configured. It allows reading data from the ZooKeeper cluster defined in the config. The query must have a 'path' equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. From 6fd915b5321425289b418a8dc7a8b315429dcd72 Mon Sep 17 00:00:00 2001 From: Jason Date: Fri, 15 Jun 2018 09:15:36 +0800 Subject: [PATCH 063/151] Update system.parts add description 'Low versions must be YYYYMM format, and now it can be any' in partition column description. --- docs/en/system_tables/system.parts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/system_tables/system.parts.md b/docs/en/system_tables/system.parts.md index 402908e42f2..0fb5aff568d 100644 --- a/docs/en/system_tables/system.parts.md +++ b/docs/en/system_tables/system.parts.md @@ -6,7 +6,7 @@ Each row describes one part of the data. Columns: -- partition (String) – The partition name. YYYYMM format. To learn what a partition is, see the description of the [ALTER](../query_language/queries.md#query_language_queries_alter) query. +- partition (String) – The partition name. Low versions must be YYYYMM format, and now it can be any. To learn what a partition is, see the description of the [ALTER](../query_language/queries.md#query_language_queries_alter) query. - name (String) – Name of the data part. - active (UInt8) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. - marks (UInt64) – The number of marks. To get the approximate number of rows in a data part, multiply ``marks`` by the index granularity (usually 8192). From c375814e4448325ef80f36aebae32674ee5c2ad1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 08:21:12 +0300 Subject: [PATCH 064/151] Added support for intHashCRC32 on AArch64 [#CLICKHOUSE-2] --- dbms/src/Common/HashTable/Hash.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index e3b900d85cf..7e414dbe1af 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -39,10 +39,17 @@ inline DB::UInt64 intHash64(DB::UInt64 x) #include #endif +#if __aarch64__ +#include +#include +#endif + inline DB::UInt64 intHashCRC32(DB::UInt64 x) { #if __SSE4_2__ return _mm_crc32_u64(-1ULL, x); +#elif __aarch64__ + return __crc32cd(-1ULL, x); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. return intHash64(x); From 1c4eb2c5865d1c171176569920fb8e682fc3ce58 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 08:33:39 +0300 Subject: [PATCH 065/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/CMakeLists.txt | 1 + dbms/programs/anonymizer/CMakeLists.txt | 2 + dbms/programs/anonymizer/main.cpp | 674 ++++++++++++++++++++++++ dbms/src/Core/Block.cpp | 31 ++ dbms/src/Core/Block.h | 4 + 5 files changed, 712 insertions(+) create mode 100644 dbms/programs/anonymizer/CMakeLists.txt create mode 100644 dbms/programs/anonymizer/main.cpp diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 775013c7402..13651f74ecd 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -25,6 +25,7 @@ add_subdirectory (compressor) add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) +add_subdirectory (anonymizer) if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test diff --git a/dbms/programs/anonymizer/CMakeLists.txt b/dbms/programs/anonymizer/CMakeLists.txt new file mode 100644 index 00000000000..4b84e81b828 --- /dev/null +++ b/dbms/programs/anonymizer/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (clickhouse-anonymizer main.cpp) +target_link_libraries (clickhouse-anonymizer dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp new file mode 100644 index 00000000000..583b333cb26 --- /dev/null +++ b/dbms/programs/anonymizer/main.cpp @@ -0,0 +1,674 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +/// Model is used to transform columns with source data to columns +/// with similar by structure and by probability distributions but anonymized data. +class IModel +{ +public: + /// Call train iteratively for each block to train a model. + virtual void train(const IColumn & column); + + /// Call finalize one time after training before generating. + virtual void finalize(); + + /// Call generate: pass source data column to obtain a column with anonymized data as a result. + virtual ColumnPtr generate(const IColumn & column); + + virtual ~IModel() {} +}; + +using ModelPtr = std::unique_ptr; + + +template +UInt64 hash(Ts... xs) +{ + SipHash hash; + (hash.update(xs), ...); + return hash.get64(); +} + + +UInt64 maskBits(UInt64 x, size_t num_bits) +{ + return x & ((1 << num_bits) - 1); +} + + +/// Apply Feistel network round to least significant num_bits part of x. +UInt64 feistelRound(UInt64 x, size_t num_bits, UInt64 seed, size_t round) +{ + size_t num_bits_right_half = num_bits / 2; + size_t num_bits_left_half = num_bits - num_bits_right_half; + + UInt64 right_half = maskBits(x, num_bits_right_half); + UInt64 left_half = maskBits(x >> num_bits_right_half, num_bits_left_half); + + UInt64 new_left_half = right_half; + UInt64 new_right_half = left_half ^ hash(right_half, seed, round); + + return (new_left_half << num_bits_right_half) ^ new_right_half; +} + + +/// Apply Feistel network with num_rounds to least significant num_bits part of x. +UInt64 feistelNetwork(UInt64 x, size_t num_bits, UInt64 seed, size_t num_rounds = 4) +{ + for (size_t i = 0; i < num_rounds; ++i) + x = feistelRound(x, num_bits, seed, i); + return x; +} + + +/// Pseudorandom permutation within set of numbers with the same log2(x). +UInt64 transform(UInt64 x, UInt64 seed) +{ + /// Keep 0 and 1 as is. + if (x == 0 || x == 1) + return x; + + /// Pseudorandom permutation of two elements. + if (x == 2 || x == 3) + return x ^ (seed & 1); + + size_t num_leading_zeros = __builtin_clzll(x); + return feistelNetwork(x, 64 - num_leading_zeros - 1, seed); +} + + +class UnsignedIntegerModel : public IModel +{ +private: + const UInt64 seed; + +public: + UnsignedIntegerModel(UInt64 seed) : seed(seed) {} + + void train(const IColumn &) override {} + void finalize() override {} + + ColumnPtr generate(const IColumn & column) override + { + MutableColumnPtr res = column.cloneEmpty(); + + size_t size = column.size(); + res->reserve(size); + + for (size_t i = 0; i < size; ++i) + res->insert(transform(column.getUInt(i), seed)); + + return res; + } +}; + + +/// Keep sign and apply pseudorandom permutation after converting to unsigned as above. +Int64 transformSigned(Int64 x, UInt64 seed) +{ + if (x >= 0) + return transform(x, seed); + else + return -transform(-x, seed); /// It works Ok even for minimum signed number. +} + + +class SignedIntegerModel : public IModel +{ +private: + const UInt64 seed; + +public: + SignedIntegerModel(UInt64 seed) : seed(seed) {} + + void train(const IColumn &) override {} + void finalize() override {} + + ColumnPtr generate(const IColumn & column) override + { + MutableColumnPtr res = column.cloneEmpty(); + + size_t size = column.size(); + res->reserve(size); + + for (size_t i = 0; i < size; ++i) + res->insert(transformSigned(column.getInt(i), seed)); + + return res; + } +}; + + +/// Pseudorandom permutation of mantissa. +template +Float transformFloatMantissa(Float x, UInt64 seed) +{ + using UInt = std::conditional_t, UInt32, UInt64>; + constexpr size_t mantissa_num_bits = std::is_same_v ? 23 : 52; + + UInt x_uint = ext::bit_cast(x); + x_uint = feistelNetwork(x_uint, mantissa_num_bits, seed); + return ext::bit_cast(x_uint); +} + + +/// Transform difference from previous number by applying pseudorandom permutation to mantissa part of it. +/// It allows to retain some continuouty property of source data. +template +class FloatModel : public IModel +{ +private: + const UInt64 seed; + Float src_prev_value = 0; + Float res_prev_value = 0; + +public: + FloatModel(UInt64 seed) : seed(seed) {} + + void train(const IColumn &) override {} + void finalize() override {} + + ColumnPtr generate(const IColumn & column) override + { + const auto & src_data = static_cast &>(column).getData(); + size_t size = src_data.size(); + + auto res_column = ColumnVector::create(size); + auto & res_data = static_cast &>(*res_column).getData(); + + for (size_t i = 0; i < size; ++i) + { + res_data[i] = res_prev_value + transformFloatMantissa(src_data[i] - src_prev_value, seed); + src_prev_value = src_data[i]; + res_prev_value = res_data[i]; + } + + return res_column; + } +}; + + +/// Leave all data as is. For example, it is used for columns of type Date. +class IdentityModel : public IModel +{ +public: + void train(const IColumn &) override {} + void finalize() override {} + + ColumnPtr generate(const IColumn & column) override + { + return column.cloneResized(column.size()); + } +}; + + +/// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class. +class DateTimeModel : public IModel +{ +private: + const UInt64 seed; + UInt32 src_prev_value = 0; + UInt32 res_prev_value = 0; + + const DateLUTImpl & date_lut; + +public: + DateTimeModel(UInt64 seed) : seed(seed), date_lut(DateLUT::instance()) {} + + void train(const IColumn &) override {} + void finalize() override {} + + ColumnPtr generate(const IColumn & column) override + { + const auto & src_data = static_cast &>(column).getData(); + size_t size = src_data.size(); + + auto res_column = ColumnVector::create(size); + auto & res_data = static_cast &>(*res_column).getData(); + + for (size_t i = 0; i < size; ++i) + { + UInt32 src_time = src_data[i]; + UInt32 src_date = date_lut.toDate(src_time); + + Int32 src_diff = src_time - src_prev_value; + Int32 res_diff = transform(src_diff, seed); + + UInt32 new_time = res_prev_value + res_diff; + res_data[i] = src_date + new_time % 86400; /// Don't care about tz changes and daylight saving time. + + src_prev_value = src_time; + res_prev_value = res_data[i]; + } + + return res_column; + } +}; + + +class MarkovModel +{ +private: + using CodePoint = UInt32; + using NGramHash = UInt32; + + struct HistogramElement + { + CodePoint code; + UInt64 count; + }; + + struct Histogram + { + UInt32 total = 0; + std::vector data; + + void add(CodePoint code) + { + ++total; + + for (auto & elem : data) + { + if (elem.code == code) + { + ++elem.count; + return; + } + } + + data.emplace_back(HistogramElement{.code = code, .count = 1}); + } + + UInt8 sample(UInt64 random) const + { + random %= total; + + UInt64 sum = 0; + for (const auto & elem : data) + { + sum += elem.count; + if (sum > random) + return elem.code; + } + + __builtin_unreachable(); + } + }; + + using Table = HashMap; + Table table; + + size_t order; + + std::vector code_points; + + + NGramHash hashContext(const CodePoint * begin, const CodePoint * end) const + { + return CRC32Hash()(StringRef(reinterpret_cast(begin), (end - begin) * sizeof(CodePoint))); + } + + /// By the way, we don't have to use actual Unicode numbers. We use just arbitary bijective mapping. + CodePoint readCodePoint(const char *& pos, const char * end) + { + size_t length = UTF8::seqLength(*pos); + if (pos + length > end) + length = end - pos; + + CodePoint res = 0; + memcpy(&res, pos, length); + return res; + } + + bool writeCodePoint(CodePoint code, char *& pos, char * end) + { + size_t length + = (code & 0xFF000000) ? 4 + : (code & 0xFFFF0000) ? 3 + : (code & 0xFFFFFF00) ? 2 + : 1; + + if (pos + length > end) + return false; + + memcpy(pos, &code, length); + pos += length; + return true; + } + +public: + explicit MarkovModel(size_t order) : order(order) {} + + void consume(const char * data, size_t size) + { + code_points.clear(); + + const char * pos = data; + const char * end = data + size; + + while (pos < end) + { + code_points.push_back(readCodePoint(pos, end)); + + for (size_t context_size = 0; context_size < order; ++context_size) + { + if (code_points.size() <= context_size) + break; + + table[hashContext(&code_points.back() - context_size, &code_points.back())].add(code_points.back()); + } + } + } + + + void finalize() + { + /// TODO: Clean low frequencies. + } + + + size_t generate(char * data, size_t size, + UInt64 seed, const char * determinator_data, size_t determinator_size) + { + code_points.clear(); + + char * pos = data; + char * end = data + size; + + while (pos < end) + { + Table::iterator it = table.end(); + + size_t context_size = std::min(order, code_points.size()); + while (true) + { + it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size())); + if (table.end() != it) + break; + + if (context_size == 0) + break; + --context_size; + } + + if (table.end() == it) + throw Exception("Logical error in markov model"); + + size_t offset_from_begin_of_string = pos - data; + constexpr size_t determinator_sliding_window_size = 8; + + size_t determinator_sliding_window_overflow = offset_from_begin_of_string + determinator_sliding_window_size > determinator_size + ? offset_from_begin_of_string + determinator_sliding_window_size - determinator_size : 0; + + const char * determinator_sliding_window_begin = determinator_data + offset_from_begin_of_string - determinator_sliding_window_overflow; + + SipHash hash; + hash.update(seed); + hash.update(determinator_sliding_window_begin, determinator_sliding_window_size); + hash.update(determinator_sliding_window_overflow); + UInt64 determinator = hash.get64(); + + CodePoint code = it->second.sample(determinator); + code_points.push_back(code); + + if (!writeCodePoint(code, pos, end)) + break; + } + + return pos - data; + } +}; + + +/// Generate length of strings as above. +/// To generate content of strings, use +/// order-N Markov model on Unicode code points, +/// and to generate next code point use deterministic RNG +/// determined by hash of 8-byte sliding window of source string. +/// This is intended to generate locally-similar strings from locally-similar sources. +class StringModel : public IModel +{ +private: + UInt64 seed; + MarkovModel markov_model{3}; + +public: + StringModel(UInt64 seed) : seed(seed) {} + + void train(const IColumn & column) override + { + const ColumnString & column_string = static_cast(column); + size_t size = column_string.size(); + + for (size_t i = 0; i < size; ++i) + { + StringRef string = column_string.getDataAt(i); + markov_model.consume(string.data, string.size); + } + } + + void finalize() override + { + /// TODO cut low frequencies + } + + ColumnPtr generate(const IColumn & column) override + { + const ColumnString & column_string = static_cast(column); + size_t size = column_string.size(); + + auto res_column = ColumnString::create(); + res_column->reserve(size); + + std::string new_string; + for (size_t i = 0; i < size; ++i) + { + StringRef src_string = column_string.getDataAt(i); + size_t desired_string_size = transform(src_string.size, seed); + new_string.resize(desired_string_size); + + size_t actual_size = markov_model.generate(new_string.data(), desired_string_size, seed, src_string.data, src_string.size); + + res_column->insertData(new_string.data(), actual_size); + } + + return res_column; + } +}; + + +class ModelFactory +{ +public: + ModelPtr get(const IDataType & data_type, UInt64 seed) const + { + if (data_type.isInteger()) + { + if (data_type.isUnsignedInteger()) + return std::make_unique(seed); + else + return std::make_unique(seed); + } + if (typeid_cast(&data_type)) + return std::make_unique>(seed); + if (typeid_cast(&data_type)) + return std::make_unique>(seed); + if (typeid_cast(&data_type)) + return std::make_unique(); + if (typeid_cast(&data_type)) + return std::make_unique(seed); + if (typeid_cast(&data_type)) + return std::make_unique(seed); + throw Exception("Unsupported data type"); + } +}; + + +class Anonymizer +{ +private: + std::vector models; + +public: + Anonymizer(const Block & header, UInt64 seed) + { + ModelFactory factory; + + size_t columns = header.columns(); + models.reserve(columns); + + for (size_t i = 0; i < columns; ++i) + models.emplace_back(factory.get(*header.getByPosition(i).type, hash(seed, i))); + } + + void train(const Columns & columns) + { + size_t size = columns.size(); + for (size_t i = 0; i < size; ++i) + models[i]->train(*columns[i]); + } + + void finalize() + { + for (auto & model : models) + model->finalize(); + } + + Columns generate(const Columns & columns) + { + size_t size = columns.size(); + Columns res(size); + for (size_t i = 0; i < size; ++i) + res[i] = models[i]->generate(*columns[i]); + return res; + } +}; + +} + + +int main(int argc, char ** argv) +{ + using namespace DB; + namespace po = boost::program_options; + + po::options_description description("Main options"); + description.add_options() + ("help", "produce help message") + ("structure,S", po::value(), "structure of the initial table (list of column and type names)") + ("input-format", po::value(), "input format of the initial table data") + ("output-format", po::value(), "default output format") + ("seed", po::value(), "seed (arbitary string), must be random string with at least 10 bytes length") + ; + + po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); + po::variables_map options; + po::store(parsed, options); + + if (options.count("help")) + { + /// TODO + return 0; + } + + UInt64 seed = sipHash64(options["seed"].as()); + + std::string structure = options["structure"].as(); + std::string input_format = options["input-format"].as(); + std::string output_format = options["output-format"].as(); + + // Create header block + std::vector structure_vals; + boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); + + if (structure_vals.size() % 2 != 0) + throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); + + Block header; + const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); + + for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) + { + ColumnWithTypeAndName column; + column.name = structure_vals[i]; + column.type = data_type_factory.get(structure_vals[i + 1]); + column.column = column.type->createColumn(); + header.insert(std::move(column)); + } + + Context context = Context::createGlobal(); + + /// stdin must be seekable + ReadBufferFromFileDescriptor file_in(STDIN_FILENO); + WriteBufferFromFileDescriptor file_out(STDOUT_FILENO); + + Anonymizer anonymizer(header, seed); + + size_t max_block_size = 8192; + + /// Train step + { + BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); + + input->readPrefix(); + while (Block block = input->read()) + anonymizer.train(block.getColumns()); + input->readSuffix(); + } + + anonymizer.finalize(); + + /// Generation step + { + file_in.seek(0); + + BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); + BlockOutputStreamPtr output = context.getOutputFormat(output_format, file_out, header); + + input->readPrefix(); + output->writePrefix(); + while (Block block = input->read()) + { + Columns columns = anonymizer.generate(block.getColumns()); + output->write(header.cloneWithColumns(columns)); + } + output->writeSuffix(); + input->readSuffix(); + } + + return 0; +} diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 1cae4ba252e..514289ee325 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -307,6 +307,16 @@ MutableColumns Block::cloneEmptyColumns() const } +Columns Block::getColumns() const +{ + size_t num_columns = data.size(); + Columns columns(num_columns); + for (size_t i = 0; i < num_columns; ++i) + columns[i] = data[i].column; + return columns; +} + + MutableColumns Block::mutateColumns() const { size_t num_columns = data.size(); @@ -325,6 +335,15 @@ void Block::setColumns(MutableColumns && columns) } +void Block::setColumns(const Columns & columns) +{ + size_t num_columns = data.size(); + for (size_t i = 0; i < num_columns; ++i) + data[i].column = columns[i]; +} + + + Block Block::cloneWithColumns(MutableColumns && columns) const { Block res; @@ -337,6 +356,18 @@ Block Block::cloneWithColumns(MutableColumns && columns) const } +Block Block::cloneWithColumns(const Columns & columns) const +{ + Block res; + + size_t num_columns = data.size(); + for (size_t i = 0; i < num_columns; ++i) + res.insert({ columns[i], data[i].type, data[i].name }); + + return res; +} + + Block Block::sortColumns() const { Block sorted_block; diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index ea4a4c00cef..94a4147aac2 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -106,6 +106,10 @@ public: /** Get the same block, but empty. */ Block cloneEmpty() const; + Columns getColumns() const; + void setColumns(const Columns & columns); + Block cloneWithColumns(const Columns & columns) const; + /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; From e1b85c84f7eceffe1b686aa3e0e533006cb8c328 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 09:14:39 +0300 Subject: [PATCH 066/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 583b333cb26..366516aad3a 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -25,6 +25,8 @@ #include #include +#include + namespace DB { @@ -73,25 +75,26 @@ UInt64 maskBits(UInt64 x, size_t num_bits) /// Apply Feistel network round to least significant num_bits part of x. UInt64 feistelRound(UInt64 x, size_t num_bits, UInt64 seed, size_t round) { - size_t num_bits_right_half = num_bits / 2; - size_t num_bits_left_half = num_bits - num_bits_right_half; + size_t num_bits_left_half = num_bits / 2; + size_t num_bits_right_half = num_bits - num_bits_left_half; - UInt64 right_half = maskBits(x, num_bits_right_half); UInt64 left_half = maskBits(x >> num_bits_right_half, num_bits_left_half); + UInt64 right_half = maskBits(x, num_bits_right_half); UInt64 new_left_half = right_half; - UInt64 new_right_half = left_half ^ hash(right_half, seed, round); + UInt64 new_right_half = left_half ^ maskBits(hash(right_half, seed, round), num_bits_left_half); - return (new_left_half << num_bits_right_half) ^ new_right_half; + return (new_left_half << num_bits_left_half) ^ new_right_half; } /// Apply Feistel network with num_rounds to least significant num_bits part of x. UInt64 feistelNetwork(UInt64 x, size_t num_bits, UInt64 seed, size_t num_rounds = 4) { + UInt64 bits = maskBits(x, num_bits); for (size_t i = 0; i < num_rounds; ++i) - x = feistelRound(x, num_bits, seed, i); - return x; + bits = feistelRound(bits, num_bits, seed, i); + return (x & ~((1 << num_bits) - 1)) ^ bits; } @@ -107,6 +110,7 @@ UInt64 transform(UInt64 x, UInt64 seed) return x ^ (seed & 1); size_t num_leading_zeros = __builtin_clzll(x); + return feistelNetwork(x, 64 - num_leading_zeros - 1, seed); } @@ -351,6 +355,7 @@ private: CodePoint res = 0; memcpy(&res, pos, length); + pos += length; return res; } @@ -476,6 +481,7 @@ public: for (size_t i = 0; i < size; ++i) { + std::cerr << i << "\n"; StringRef string = column_string.getDataAt(i); markov_model.consume(string.data, string.size); } @@ -582,6 +588,7 @@ public: int main(int argc, char ** argv) +try { using namespace DB; namespace po = boost::program_options; @@ -672,3 +679,9 @@ int main(int argc, char ** argv) return 0; } +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; +} From 5fa604db7b715e4530d9cfea9bfcf36576ad4028 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 09:27:16 +0300 Subject: [PATCH 067/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 366516aad3a..323b376e017 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -376,11 +376,11 @@ private: } public: - explicit MarkovModel(size_t order) : order(order) {} + explicit MarkovModel(size_t order) : order(order), code_points(order, -1) {} void consume(const char * data, size_t size) { - code_points.clear(); + code_points.resize(order); const char * pos = data; const char * end = data + size; @@ -390,12 +390,7 @@ public: code_points.push_back(readCodePoint(pos, end)); for (size_t context_size = 0; context_size < order; ++context_size) - { - if (code_points.size() <= context_size) - break; - table[hashContext(&code_points.back() - context_size, &code_points.back())].add(code_points.back()); - } } } @@ -409,7 +404,7 @@ public: size_t generate(char * data, size_t size, UInt64 seed, const char * determinator_data, size_t determinator_size) { - code_points.clear(); + code_points.resize(order); char * pos = data; char * end = data + size; @@ -418,7 +413,7 @@ public: { Table::iterator it = table.end(); - size_t context_size = std::min(order, code_points.size()); + size_t context_size = order; while (true) { it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size())); From 5bc0f26496874eec5f9b19a9d599ae6819eda238 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 15 Jun 2018 10:05:14 +0300 Subject: [PATCH 068/151] Add missed file --- dbms/src/IO/WriteBufferFromHTTP.cpp | 25 ++++++++++++++++++++++ dbms/src/IO/WriteBufferFromHTTP.h | 33 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 dbms/src/IO/WriteBufferFromHTTP.cpp create mode 100644 dbms/src/IO/WriteBufferFromHTTP.h diff --git a/dbms/src/IO/WriteBufferFromHTTP.cpp b/dbms/src/IO/WriteBufferFromHTTP.cpp new file mode 100644 index 00000000000..adc4574eaf8 --- /dev/null +++ b/dbms/src/IO/WriteBufferFromHTTP.cpp @@ -0,0 +1,25 @@ +#include + +#include +#include + +namespace DB +{ +WriteBufferFromHTTP::WriteBufferFromHTTP( + const Poco::URI & uri, const std::string & method, const ConnectionTimeouts & timeouts, size_t buffer_size_) + : WriteBufferFromOStream(buffer_size_) + , session{getPreparedSession(uri, timeouts)} + , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} +{ + request.setHost(uri.getHost()); + request.setChunkedTransferEncoding(true); + + LOG_TRACE((&Logger::get("WriteBufferToHTTP")), "Sending request to " << uri.toString()); + + ostr = &session->sendRequest(request); +} +void WriteBufferFromHTTP::finalize() +{ + makeRequest(*session, request, response); +} +} diff --git a/dbms/src/IO/WriteBufferFromHTTP.h b/dbms/src/IO/WriteBufferFromHTTP.h new file mode 100644 index 00000000000..66c1e6786bd --- /dev/null +++ b/dbms/src/IO/WriteBufferFromHTTP.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +/* Perform HTTP POST/PUT request. + */ +class WriteBufferFromHTTP : public WriteBufferFromOStream +{ +private: + std::unique_ptr session; + Poco::Net::HTTPRequest request; + Poco::Net::HTTPResponse response; + +public: + explicit WriteBufferFromHTTP(const Poco::URI & uri, + const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only + const ConnectionTimeouts & timeouts = {}, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + + ~WriteBufferFromHTTP() override {} + + // This method have to be called, to make actual request + void finalize(); +}; +} From 43a98634a9d77ca3511f58485addb99a98050ca9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 10:09:54 +0300 Subject: [PATCH 069/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 152 +++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 323b376e017..4e522a7f0f2 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -1,11 +1,17 @@ #include #include #include +#include +#include +#include #include #include #include #include #include +#include +#include +#include #include #include #include @@ -240,6 +246,67 @@ public: }; +/// Just pseudorandom function. +void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UInt64 seed) +{ + { + SipHash hash; + hash.update(seed); + hash.update(reinterpret_cast(src), size); + seed = hash.get64(); + } + + UInt8 * pos = dst; + UInt8 * end = dst + size; + + size_t i = 0; + while (pos < end) + { + SipHash hash; + hash.update(seed); + hash.update(i); + + char * dst = reinterpret_cast(std::min(pos, end - 16)); + hash.get128(dst); + + pos += 16; + ++i; + } +} + + +class FixedStringModel : public IModel +{ +private: + const UInt64 seed; + +public: + FixedStringModel(UInt64 seed) : seed(seed) {} + + void train(const IColumn &) override {} + void finalize() override {} + + ColumnPtr generate(const IColumn & column) override + { + const ColumnFixedString & column_fixed_string = static_cast(column); + const size_t string_size = column_fixed_string.getN(); + + const auto & src_data = column_fixed_string.getChars(); + size_t size = column_fixed_string.size(); + + auto res_column = ColumnFixedString::create(string_size); + auto & res_data = res_column->getChars(); + + res_data.resize(src_data.size()); + + for (size_t i = 0; i < size; ++i) + transformFixedString(&src_data[i * string_size], &res_data[i * string_size], string_size, seed); + + return res_column; + } +}; + + /// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class. class DateTimeModel : public IModel { @@ -502,7 +569,9 @@ public: size_t desired_string_size = transform(src_string.size, seed); new_string.resize(desired_string_size); - size_t actual_size = markov_model.generate(new_string.data(), desired_string_size, seed, src_string.data, src_string.size); + size_t actual_size = 0; + if (desired_string_size != 0) + actual_size = markov_model.generate(new_string.data(), desired_string_size, seed, src_string.data, src_string.size); res_column->insertData(new_string.data(), actual_size); } @@ -512,6 +581,72 @@ public: }; +class ArrayModel : public IModel +{ +private: + ModelPtr nested_model; + +public: + ArrayModel(ModelPtr nested_model) : nested_model(std::move(nested_model)) {} + + void train(const IColumn & column) override + { + const ColumnArray & column_array = static_cast(column); + const IColumn & nested_column = column_array.getData(); + + nested_model->train(nested_column); + } + + void finalize() override + { + nested_model->finalize(); + } + + ColumnPtr generate(const IColumn & column) override + { + const ColumnArray & column_array = static_cast(column); + const IColumn & nested_column = column_array.getData(); + + ColumnPtr new_nested_column = nested_model->generate(nested_column); + + return ColumnArray::create((*std::move(new_nested_column)).mutate(), (*std::move(column_array.getOffsetsPtr())).mutate()); + } +}; + + +class NullableModel : public IModel +{ +private: + ModelPtr nested_model; + +public: + NullableModel(ModelPtr nested_model) : nested_model(std::move(nested_model)) {} + + void train(const IColumn & column) override + { + const ColumnNullable & column_nullable = static_cast(column); + const IColumn & nested_column = column_nullable.getNestedColumn(); + + nested_model->train(nested_column); + } + + void finalize() override + { + nested_model->finalize(); + } + + ColumnPtr generate(const IColumn & column) override + { + const ColumnNullable & column_nullable = static_cast(column); + const IColumn & nested_column = column_nullable.getNestedColumn(); + + ColumnPtr new_nested_column = nested_model->generate(nested_column); + + return ColumnNullable::create((*std::move(new_nested_column)).mutate(), (*std::move(column_nullable.getNullMapColumnPtr())).mutate()); + } +}; + + class ModelFactory { public: @@ -524,16 +659,31 @@ public: else return std::make_unique(seed); } + if (typeid_cast(&data_type)) return std::make_unique>(seed); + if (typeid_cast(&data_type)) return std::make_unique>(seed); + if (typeid_cast(&data_type)) return std::make_unique(); + if (typeid_cast(&data_type)) return std::make_unique(seed); + if (typeid_cast(&data_type)) return std::make_unique(seed); + + if (typeid_cast(&data_type)) + return std::make_unique(seed); + + if (auto type = typeid_cast(&data_type)) + return std::make_unique(get(*type->getNestedType(), seed)); + + if (auto type = typeid_cast(&data_type)) + return std::make_unique(get(*type->getNestedType(), seed)); + throw Exception("Unsupported data type"); } }; From ffad002bf5d76f89bf0201bc4b0cfa59dd93be8c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 15 Jun 2018 10:42:57 +0300 Subject: [PATCH 070/151] Add missed stl header --- dbms/src/IO/HTTPCommon.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/IO/HTTPCommon.cpp b/dbms/src/IO/HTTPCommon.cpp index eb81610c17d..579755309f7 100644 --- a/dbms/src/IO/HTTPCommon.cpp +++ b/dbms/src/IO/HTTPCommon.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB { From bd5247864b043cf705b1376a44fd152bace89e63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 11:53:06 +0300 Subject: [PATCH 071/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 145 +++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 30 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 4e522a7f0f2..d2ae40b588e 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -357,16 +358,19 @@ private: using CodePoint = UInt32; using NGramHash = UInt32; - struct HistogramElement + struct Bucket { CodePoint code; UInt64 count; + + Bucket(CodePoint code) : code(code), count(1) {} }; struct Histogram { - UInt32 total = 0; - std::vector data; + UInt64 total = 0; /// Not including count_end. + UInt64 count_end = 0; + std::vector data; void add(CodePoint code) { @@ -381,12 +385,21 @@ private: } } - data.emplace_back(HistogramElement{.code = code, .count = 1}); + data.emplace_back(code); } - UInt8 sample(UInt64 random) const + void addEnd() { - random %= total; + ++count_end; + } + + CodePoint sample(UInt64 random, double end_multiplier) const + { + UInt64 range = total + UInt64(count_end * end_multiplier); + if (range == 0) + return END; + + random %= range; UInt64 sum = 0; for (const auto & elem : data) @@ -396,7 +409,7 @@ private: return elem.code; } - __builtin_unreachable(); + return END; } }; @@ -404,9 +417,13 @@ private: Table table; size_t order; + size_t frequency_cutoff; std::vector code_points; + static constexpr CodePoint BEGIN = -1; + static constexpr CodePoint END = -2; + NGramHash hashContext(const CodePoint * begin, const CodePoint * end) const { @@ -443,7 +460,8 @@ private: } public: - explicit MarkovModel(size_t order) : order(order), code_points(order, -1) {} + explicit MarkovModel(size_t order, size_t frequency_cutoff) + : order(order), frequency_cutoff(frequency_cutoff), code_points(order, BEGIN) {} void consume(const char * data, size_t size) { @@ -452,29 +470,80 @@ public: const char * pos = data; const char * end = data + size; - while (pos < end) + while (true) { - code_points.push_back(readCodePoint(pos, end)); + bool inside = pos < end; + + CodePoint next_code_point; + + if (inside) + next_code_point = readCodePoint(pos, end); for (size_t context_size = 0; context_size < order; ++context_size) - table[hashContext(&code_points.back() - context_size, &code_points.back())].add(code_points.back()); + { + NGramHash context_hash = hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()); + + if (inside) + table[context_hash].add(next_code_point); + else /// if (context_size != 0 || order == 0) /// Don't allow to break string without context (except order-0 model). + table[context_hash].addEnd(); + } + + if (inside) + code_points.push_back(next_code_point); + else + break; } } void finalize() { - /// TODO: Clean low frequencies. + if (frequency_cutoff == 0) + return; + + // size_t total_buckets = 0; + // size_t erased_buckets = 0; + + for (auto & elem : table) + { + Histogram & histogram = elem.second; + // total_buckets += histogram.data.size(); + + if (histogram.total + histogram.count_end < frequency_cutoff) + { + // erased_buckets += histogram.data.size(); + + histogram.data.clear(); + histogram.total = 0; + } + else + { + auto erased = std::remove_if(histogram.data.begin(), histogram.data.end(), + [frequency_cutoff=frequency_cutoff](const Bucket & bucket) { return bucket.count < frequency_cutoff; }); + + UInt64 erased_count = 0; + for (auto it = erased; it < histogram.data.end(); ++it) + erased_count += it->count; + + // erased_buckets += histogram.data.end() - erased; + + histogram.data.erase(erased, histogram.data.end()); + histogram.total -= erased_count; + } + } + + // std::cerr << "Erased " << erased_buckets << " out of " << total_buckets << " buckets\n"; } - size_t generate(char * data, size_t size, + size_t generate(char * data, size_t desired_size, size_t buffer_size, UInt64 seed, const char * determinator_data, size_t determinator_size) { code_points.resize(order); char * pos = data; - char * end = data + size; + char * end = data + buffer_size; while (pos < end) { @@ -484,7 +553,7 @@ public: while (true) { it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size())); - if (table.end() != it) + if (table.end() != it && it->second.total + it->second.count_end != 0) break; if (context_size == 0) @@ -509,11 +578,21 @@ public: hash.update(determinator_sliding_window_overflow); UInt64 determinator = hash.get64(); - CodePoint code = it->second.sample(determinator); - code_points.push_back(code); + /// If string is greater than desired_size, increase probability of end. + double end_probability_multiplier = 0; + Int64 num_bytes_after_desired_size = (pos - data) - desired_size; + if (num_bytes_after_desired_size) + end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size); + + CodePoint code = it->second.sample(determinator, end_probability_multiplier); + + if (code == END) + break; if (!writeCodePoint(code, pos, end)) break; + + code_points.push_back(code); } return pos - data; @@ -531,10 +610,10 @@ class StringModel : public IModel { private: UInt64 seed; - MarkovModel markov_model{3}; + MarkovModel markov_model; public: - StringModel(UInt64 seed) : seed(seed) {} + StringModel(UInt64 seed, UInt8 order, UInt64 frequency_cutoff) : seed(seed), markov_model(order, frequency_cutoff) {} void train(const IColumn & column) override { @@ -543,7 +622,6 @@ public: for (size_t i = 0; i < size; ++i) { - std::cerr << i << "\n"; StringRef string = column_string.getDataAt(i); markov_model.consume(string.data, string.size); } @@ -551,7 +629,7 @@ public: void finalize() override { - /// TODO cut low frequencies + markov_model.finalize(); } ColumnPtr generate(const IColumn & column) override @@ -567,11 +645,11 @@ public: { StringRef src_string = column_string.getDataAt(i); size_t desired_string_size = transform(src_string.size, seed); - new_string.resize(desired_string_size); + new_string.resize(desired_string_size * 2); size_t actual_size = 0; if (desired_string_size != 0) - actual_size = markov_model.generate(new_string.data(), desired_string_size, seed, src_string.data, src_string.size); + actual_size = markov_model.generate(new_string.data(), desired_string_size, new_string.size(), seed, src_string.data, src_string.size); res_column->insertData(new_string.data(), actual_size); } @@ -650,7 +728,7 @@ public: class ModelFactory { public: - ModelPtr get(const IDataType & data_type, UInt64 seed) const + ModelPtr get(const IDataType & data_type, UInt64 seed, UInt8 markov_model_order, UInt64 frequency_cutoff) const { if (data_type.isInteger()) { @@ -673,16 +751,16 @@ public: return std::make_unique(seed); if (typeid_cast(&data_type)) - return std::make_unique(seed); + return std::make_unique(seed, markov_model_order, frequency_cutoff); if (typeid_cast(&data_type)) return std::make_unique(seed); if (auto type = typeid_cast(&data_type)) - return std::make_unique(get(*type->getNestedType(), seed)); + return std::make_unique(get(*type->getNestedType(), seed, markov_model_order, frequency_cutoff)); if (auto type = typeid_cast(&data_type)) - return std::make_unique(get(*type->getNestedType(), seed)); + return std::make_unique(get(*type->getNestedType(), seed, markov_model_order, frequency_cutoff)); throw Exception("Unsupported data type"); } @@ -695,7 +773,7 @@ private: std::vector models; public: - Anonymizer(const Block & header, UInt64 seed) + Anonymizer(const Block & header, UInt64 seed, UInt8 markov_model_order, UInt64 frequency_cutoff) { ModelFactory factory; @@ -703,7 +781,7 @@ public: models.reserve(columns); for (size_t i = 0; i < columns; ++i) - models.emplace_back(factory.get(*header.getByPosition(i).type, hash(seed, i))); + models.emplace_back(factory.get(*header.getByPosition(i).type, hash(seed, i), markov_model_order, frequency_cutoff)); } void train(const Columns & columns) @@ -745,6 +823,8 @@ try ("input-format", po::value(), "input format of the initial table data") ("output-format", po::value(), "default output format") ("seed", po::value(), "seed (arbitary string), must be random string with at least 10 bytes length") + ("order", po::value()->default_value(5), "order of markov model to generate strings") + ("cutoff", po::value()->default_value(5), "frequency cutoff for markov model") ; po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); @@ -763,6 +843,9 @@ try std::string input_format = options["input-format"].as(); std::string output_format = options["output-format"].as(); + UInt64 markov_model_order = options["order"].as(); + UInt64 frequency_cutoff = options["cutoff"].as(); + // Create header block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); @@ -788,11 +871,12 @@ try ReadBufferFromFileDescriptor file_in(STDIN_FILENO); WriteBufferFromFileDescriptor file_out(STDOUT_FILENO); - Anonymizer anonymizer(header, seed); + Anonymizer anonymizer(header, seed, markov_model_order, frequency_cutoff); size_t max_block_size = 8192; /// Train step + std::cerr << "Training models\n"; { BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); @@ -805,6 +889,7 @@ try anonymizer.finalize(); /// Generation step + std::cerr << "Generating data\n"; { file_in.seek(0); From 3de1efa2c26e481f6591a5dcfd28693e8fb91e9e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 12:07:42 +0300 Subject: [PATCH 072/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index d2ae40b588e..49656735e14 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -247,7 +248,7 @@ public: }; -/// Just pseudorandom function. +/// Pseudorandom function, but keep word characters as word characters. void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UInt64 seed) { { @@ -273,6 +274,15 @@ void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UInt64 se pos += 16; ++i; } + + for (size_t j = 0; j < size; ++j) + { + if (isWordCharASCII(src[j])) + { + static constexpr char word_chars[] = "_01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + dst[j] = word_chars[dst[j] % sizeof(word_chars)]; + } + } } From 47bded3520cf459e8e8afa6cd490859471d8a222 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 12:21:53 +0300 Subject: [PATCH 073/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 49656735e14..22113b660df 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -344,16 +344,17 @@ public: for (size_t i = 0; i < size; ++i) { - UInt32 src_time = src_data[i]; - UInt32 src_date = date_lut.toDate(src_time); + UInt32 src_datetime = src_data[i]; + UInt32 src_date = date_lut.toDate(src_datetime); - Int32 src_diff = src_time - src_prev_value; - Int32 res_diff = transform(src_diff, seed); + Int32 src_diff = src_datetime - src_prev_value; + Int32 res_diff = transformSigned(src_diff, seed); - UInt32 new_time = res_prev_value + res_diff; - res_data[i] = src_date + new_time % 86400; /// Don't care about tz changes and daylight saving time. + UInt32 new_datetime = res_prev_value + res_diff; + UInt32 new_time = new_datetime - date_lut.toDate(new_datetime); + res_data[i] = src_date + new_time; - src_prev_value = src_time; + src_prev_value = src_datetime; res_prev_value = res_data[i]; } @@ -482,9 +483,9 @@ public: while (true) { - bool inside = pos < end; + const bool inside = pos < end; - CodePoint next_code_point; + CodePoint next_code_point {}; if (inside) next_code_point = readCodePoint(pos, end); From 108cca6ac167bddc2dee7bf803e364ffd3abf0d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 12:40:40 +0300 Subject: [PATCH 074/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 56 ++++++++++--------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 22113b660df..c44d01455c2 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -369,34 +370,17 @@ private: using CodePoint = UInt32; using NGramHash = UInt32; - struct Bucket - { - CodePoint code; - UInt64 count; - - Bucket(CodePoint code) : code(code), count(1) {} - }; - struct Histogram { UInt64 total = 0; /// Not including count_end. UInt64 count_end = 0; - std::vector data; + using Buckets = boost::container::flat_map; + Buckets buckets; void add(CodePoint code) { ++total; - - for (auto & elem : data) - { - if (elem.code == code) - { - ++elem.count; - return; - } - } - - data.emplace_back(code); + ++buckets[code]; } void addEnd() @@ -413,11 +397,11 @@ private: random %= range; UInt64 sum = 0; - for (const auto & elem : data) + for (const auto & elem : buckets) { - sum += elem.count; + sum += elem.second; if (sum > random) - return elem.code; + return elem.first; } return END; @@ -513,38 +497,32 @@ public: if (frequency_cutoff == 0) return; - // size_t total_buckets = 0; - // size_t erased_buckets = 0; - for (auto & elem : table) { Histogram & histogram = elem.second; - // total_buckets += histogram.data.size(); if (histogram.total + histogram.count_end < frequency_cutoff) { - // erased_buckets += histogram.data.size(); - - histogram.data.clear(); + histogram.buckets.clear(); histogram.total = 0; } else { - auto erased = std::remove_if(histogram.data.begin(), histogram.data.end(), - [frequency_cutoff=frequency_cutoff](const Bucket & bucket) { return bucket.count < frequency_cutoff; }); - + Histogram::Buckets new_buckets; UInt64 erased_count = 0; - for (auto it = erased; it < histogram.data.end(); ++it) - erased_count += it->count; - // erased_buckets += histogram.data.end() - erased; + for (const auto & bucket : histogram.buckets) + { + if (bucket.second >= frequency_cutoff) + new_buckets.emplace(bucket); + else + erased_count += bucket.second; + } - histogram.data.erase(erased, histogram.data.end()); + histogram.buckets.swap(new_buckets); histogram.total -= erased_count; } } - - // std::cerr << "Erased " << erased_buckets << " out of " << total_buckets << " buckets\n"; } From ddd10d956b18d69b7b4425b76074fc42343807aa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 12:44:14 +0300 Subject: [PATCH 075/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index c44d01455c2..bad2c7088a8 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -869,9 +869,14 @@ try { BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); + UInt64 processed_rows = 0; input->readPrefix(); while (Block block = input->read()) + { anonymizer.train(block.getColumns()); + processed_rows += block.rows(); + std::cerr << "Processed " << processed_rows << " rows\n"; + } input->readSuffix(); } @@ -885,12 +890,15 @@ try BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); BlockOutputStreamPtr output = context.getOutputFormat(output_format, file_out, header); + UInt64 processed_rows = 0; input->readPrefix(); output->writePrefix(); while (Block block = input->read()) { Columns columns = anonymizer.generate(block.getColumns()); output->write(header.cloneWithColumns(columns)); + processed_rows += block.rows(); + std::cerr << "Processed " << processed_rows << " rows\n"; } output->writeSuffix(); input->readSuffix(); From 9043e36fa17bbe1c6a9fb0679a512b135504d962 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 13:03:02 +0300 Subject: [PATCH 076/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index bad2c7088a8..0948aed5e98 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -34,8 +34,6 @@ #include #include -#include - namespace DB { @@ -820,9 +818,12 @@ try po::variables_map options; po::store(parsed, options); - if (options.count("help")) + if (options.count("help") || !options.count("seed")) { - /// TODO + std::cout << "Usage: " << argv[0] << " [options] < in > out\n" + << "\nInput must be seekable file (it will be read twice).\n" + << "\n" << description << "\n" + << "\nExample:\n " << argv[0] << " --seed $RANDOM --order 5 --cutoff 5 --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; return 0; } From 16c16dcc2d99757ad6244aa72818de8e14004125 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Jun 2018 13:42:13 +0300 Subject: [PATCH 077/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 0948aed5e98..3d6396a5d45 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -267,8 +267,17 @@ void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UInt64 se hash.update(seed); hash.update(i); - char * dst = reinterpret_cast(std::min(pos, end - 16)); - hash.get128(dst); + if (size >= 16) + { + char * dst = reinterpret_cast(std::min(pos, end - 16)); + hash.get128(dst); + } + else + { + char value[16]; + hash.get128(value); + memcpy(dst, value, end - dst); + } pos += 16; ++i; @@ -453,7 +462,7 @@ private: } public: - explicit MarkovModel(size_t order, size_t frequency_cutoff) + MarkovModel(size_t order, size_t frequency_cutoff) : order(order), frequency_cutoff(frequency_cutoff), code_points(order, BEGIN) {} void consume(const char * data, size_t size) @@ -803,7 +812,7 @@ try using namespace DB; namespace po = boost::program_options; - po::options_description description("Main options"); + po::options_description description("Options"); description.add_options() ("help", "produce help message") ("structure,S", po::value(), "structure of the initial table (list of column and type names)") From 80ed67c8ee5f5cbfd50e9cd20863621de160b537 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 02:54:33 +0300 Subject: [PATCH 078/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/anonymizer/main.cpp | 69 +++++++++++++++++++------------ 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/anonymizer/main.cpp index 3d6396a5d45..8493964799d 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/anonymizer/main.cpp @@ -371,6 +371,13 @@ public: }; +struct MarkovModelParameters +{ + size_t order; + size_t frequency_cutoff; +}; + + class MarkovModel { private: @@ -418,12 +425,13 @@ private: using Table = HashMap; Table table; - size_t order; - size_t frequency_cutoff; + MarkovModelParameters params; std::vector code_points; + /// Special code point to form context before beginning of string. static constexpr CodePoint BEGIN = -1; + /// Special code point to indicate end of string. static constexpr CodePoint END = -2; @@ -462,12 +470,13 @@ private: } public: - MarkovModel(size_t order, size_t frequency_cutoff) - : order(order), frequency_cutoff(frequency_cutoff), code_points(order, BEGIN) {} + MarkovModel(MarkovModelParameters params) + : params(std::move(params)), code_points(params.order, BEGIN) {} void consume(const char * data, size_t size) { - code_points.resize(order); + /// First 'order' number of code points are pre-filled with BEGIN. + code_points.resize(params.order); const char * pos = data; const char * end = data + size; @@ -481,7 +490,7 @@ public: if (inside) next_code_point = readCodePoint(pos, end); - for (size_t context_size = 0; context_size < order; ++context_size) + for (size_t context_size = 0; context_size < params.order; ++context_size) { NGramHash context_hash = hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()); @@ -501,14 +510,14 @@ public: void finalize() { - if (frequency_cutoff == 0) + if (params.frequency_cutoff == 0) return; for (auto & elem : table) { Histogram & histogram = elem.second; - if (histogram.total + histogram.count_end < frequency_cutoff) + if (histogram.total + histogram.count_end < params.frequency_cutoff) { histogram.buckets.clear(); histogram.total = 0; @@ -520,7 +529,7 @@ public: for (const auto & bucket : histogram.buckets) { - if (bucket.second >= frequency_cutoff) + if (bucket.second >= params.frequency_cutoff) new_buckets.emplace(bucket); else erased_count += bucket.second; @@ -536,7 +545,7 @@ public: size_t generate(char * data, size_t desired_size, size_t buffer_size, UInt64 seed, const char * determinator_data, size_t determinator_size) { - code_points.resize(order); + code_points.resize(params.order); char * pos = data; char * end = data + buffer_size; @@ -545,7 +554,7 @@ public: { Table::iterator it = table.end(); - size_t context_size = order; + size_t context_size = params.order; while (true) { it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size())); @@ -609,7 +618,7 @@ private: MarkovModel markov_model; public: - StringModel(UInt64 seed, UInt8 order, UInt64 frequency_cutoff) : seed(seed), markov_model(order, frequency_cutoff) {} + StringModel(UInt64 seed, MarkovModelParameters params) : seed(seed), markov_model(std::move(params)) {} void train(const IColumn & column) override { @@ -724,7 +733,7 @@ public: class ModelFactory { public: - ModelPtr get(const IDataType & data_type, UInt64 seed, UInt8 markov_model_order, UInt64 frequency_cutoff) const + ModelPtr get(const IDataType & data_type, UInt64 seed, MarkovModelParameters markov_model_params) const { if (data_type.isInteger()) { @@ -747,29 +756,29 @@ public: return std::make_unique(seed); if (typeid_cast(&data_type)) - return std::make_unique(seed, markov_model_order, frequency_cutoff); + return std::make_unique(seed, markov_model_params); if (typeid_cast(&data_type)) return std::make_unique(seed); if (auto type = typeid_cast(&data_type)) - return std::make_unique(get(*type->getNestedType(), seed, markov_model_order, frequency_cutoff)); + return std::make_unique(get(*type->getNestedType(), seed, markov_model_params)); if (auto type = typeid_cast(&data_type)) - return std::make_unique(get(*type->getNestedType(), seed, markov_model_order, frequency_cutoff)); + return std::make_unique(get(*type->getNestedType(), seed, markov_model_params)); throw Exception("Unsupported data type"); } }; -class Anonymizer +class Obfuscator { private: std::vector models; public: - Anonymizer(const Block & header, UInt64 seed, UInt8 markov_model_order, UInt64 frequency_cutoff) + Obfuscator(const Block & header, UInt64 seed, MarkovModelParameters markov_model_params) { ModelFactory factory; @@ -777,7 +786,7 @@ public: models.reserve(columns); for (size_t i = 0; i < columns; ++i) - models.emplace_back(factory.get(*header.getByPosition(i).type, hash(seed, i), markov_model_order, frequency_cutoff)); + models.emplace_back(factory.get(*header.getByPosition(i).type, hash(seed, i), markov_model_params)); } void train(const Columns & columns) @@ -827,12 +836,16 @@ try po::variables_map options; po::store(parsed, options); - if (options.count("help") || !options.count("seed")) + if (options.count("help") + || !options.count("seed") + || !options.count("structure") + || !options.count("input-format") + || !options.count("output-format")) { std::cout << "Usage: " << argv[0] << " [options] < in > out\n" << "\nInput must be seekable file (it will be read twice).\n" << "\n" << description << "\n" - << "\nExample:\n " << argv[0] << " --seed $RANDOM --order 5 --cutoff 5 --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; + << "\nExample:\n " << argv[0] << " --seed \"$(head -c16 /dev/urandom)\" --order 5 --cutoff 5 --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; return 0; } @@ -842,8 +855,10 @@ try std::string input_format = options["input-format"].as(); std::string output_format = options["output-format"].as(); - UInt64 markov_model_order = options["order"].as(); - UInt64 frequency_cutoff = options["cutoff"].as(); + MarkovModelParameters markov_model_params; + + markov_model_params.order = options["order"].as(); + markov_model_params.frequency_cutoff = options["cutoff"].as(); // Create header block std::vector structure_vals; @@ -870,7 +885,7 @@ try ReadBufferFromFileDescriptor file_in(STDIN_FILENO); WriteBufferFromFileDescriptor file_out(STDOUT_FILENO); - Anonymizer anonymizer(header, seed, markov_model_order, frequency_cutoff); + Obfuscator obfuscator(header, seed, markov_model_params); size_t max_block_size = 8192; @@ -883,14 +898,14 @@ try input->readPrefix(); while (Block block = input->read()) { - anonymizer.train(block.getColumns()); + obfuscator.train(block.getColumns()); processed_rows += block.rows(); std::cerr << "Processed " << processed_rows << " rows\n"; } input->readSuffix(); } - anonymizer.finalize(); + obfuscator.finalize(); /// Generation step std::cerr << "Generating data\n"; @@ -905,7 +920,7 @@ try output->writePrefix(); while (Block block = input->read()) { - Columns columns = anonymizer.generate(block.getColumns()); + Columns columns = obfuscator.generate(block.getColumns()); output->write(header.cloneWithColumns(columns)); processed_rows += block.rows(); std::cerr << "Processed " << processed_rows << " rows\n"; From 4e02a4c81f6d5dba700e284664660eac0d85b826 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 03:27:59 +0300 Subject: [PATCH 079/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/CMakeLists.txt | 42 ++++++++++++------- dbms/programs/anonymizer/CMakeLists.txt | 2 - dbms/programs/config_tools.h.in | 1 + dbms/programs/main.cpp | 6 +++ dbms/programs/obfuscator/CMakeLists.txt | 8 ++++ .../main.cpp => obfuscator/Obfuscator.cpp} | 40 +++++++++++++++--- .../obfuscator/clickhouse-obfuscator.cpp | 3 ++ 7 files changed, 78 insertions(+), 24 deletions(-) delete mode 100644 dbms/programs/anonymizer/CMakeLists.txt create mode 100644 dbms/programs/obfuscator/CMakeLists.txt rename dbms/programs/{anonymizer/main.cpp => obfuscator/Obfuscator.cpp} (96%) create mode 100644 dbms/programs/obfuscator/clickhouse-obfuscator.cpp diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 13651f74ecd..0a680ce87bc 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -3,15 +3,16 @@ # each of them is built and linked as a separate library, defined below. option (ENABLE_CLICKHOUSE_ALL "Enable all tools" ON) -option (ENABLE_CLICKHOUSE_SERVER "Enable server" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_CLIENT "Enable client" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_LOCAL "Enable local" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_BENCHMARK "Enable benchmark" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_PERFORMANCE "Enable performance" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG "Enable extract-from-config" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable compressor" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_COPIER "Enable copier" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_FORMAT "Enable format" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_SERVER "Enable clickhouse-server" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_CLIENT "Enable clickhouse-client" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_LOCAL "Enable clickhouse-local" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_BENCHMARK "Enable clickhouse-benchmark" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_PERFORMANCE "Enable clickhouse-performance-test" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG "Enable clickhouse-extract-from-config" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_FORMAT "Enable clickhouse-format" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_OBFUSCATOR "Enable clickhouse-obfuscator" ${ENABLE_CLICKHOUSE_ALL}) configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) @@ -25,7 +26,7 @@ add_subdirectory (compressor) add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) -add_subdirectory (anonymizer) +add_subdirectory (obfuscator) if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test @@ -47,9 +48,6 @@ else () target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - if (USE_EMBEDDED_COMPILER) - target_link_libraries (clickhouse clickhouse-compiler-lib) - endif () if (ENABLE_CLICKHOUSE_SERVER) target_link_libraries (clickhouse clickhouse-server-lib) endif () @@ -77,6 +75,12 @@ else () if (ENABLE_CLICKHOUSE_FORMAT) target_link_libraries (clickhouse clickhouse-format-lib) endif () + if (ENABLE_CLICKHOUSE_OBFUSCATOR) + target_link_libraries (clickhouse clickhouse-obfuscator-lib) + endif () + if (USE_EMBEDDED_COMPILER) + target_link_libraries (clickhouse clickhouse-compiler-lib) + endif () set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -104,6 +108,11 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-performance-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-performance-test) endif () + if (ENABLE_CLICKHOUSE_COPIER) + add_custom_target (clickhouse-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-copier DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-copier DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-copier) + endif () if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) add_custom_target (clickhouse-extract-from-config ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-extract-from-config DEPENDS clickhouse) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-extract-from-config DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) @@ -120,10 +129,11 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-format) endif () if (ENABLE_CLICKHOUSE_COPIER) - add_custom_target (clickhouse-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-copier DEPENDS clickhouse) - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-copier DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-copier) + add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () + # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) add_custom_target (clickhouse-lld ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-lld DEPENDS clickhouse) diff --git a/dbms/programs/anonymizer/CMakeLists.txt b/dbms/programs/anonymizer/CMakeLists.txt deleted file mode 100644 index 4b84e81b828..00000000000 --- a/dbms/programs/anonymizer/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_executable (clickhouse-anonymizer main.cpp) -target_link_libraries (clickhouse-anonymizer dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/dbms/programs/config_tools.h.in b/dbms/programs/config_tools.h.in index 6bdc4df942f..a7a538d9f1d 100644 --- a/dbms/programs/config_tools.h.in +++ b/dbms/programs/config_tools.h.in @@ -11,3 +11,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG #cmakedefine01 ENABLE_CLICKHOUSE_COMPRESSOR #cmakedefine01 ENABLE_CLICKHOUSE_FORMAT +#cmakedefine01 ENABLE_CLICKHOUSE_OBFUSCATOR diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index f46e6f682a9..26001b3f307 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -46,6 +46,9 @@ int mainEntryClickHouseFormat(int argc, char ** argv); #if ENABLE_CLICKHOUSE_COPIER int mainEntryClickHouseClusterCopier(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_OBFUSCATOR +int mainEntryClickHouseObfuscator(int argc, char ** argv); +#endif #if USE_EMBEDDED_COMPILER int mainEntryClickHouseClang(int argc, char ** argv); @@ -88,6 +91,9 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_COPIER {"copier", mainEntryClickHouseClusterCopier}, #endif +#if ENABLE_CLICKHOUSE_OBFUSCATOR + {"obfuscator", mainEntryClickHouseObfuscator}, +#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, {"clang++", mainEntryClickHouseClang}, diff --git a/dbms/programs/obfuscator/CMakeLists.txt b/dbms/programs/obfuscator/CMakeLists.txt new file mode 100644 index 00000000000..b62063eaa7d --- /dev/null +++ b/dbms/programs/obfuscator/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library (clickhouse-obfuscator-lib Obfuscator.cpp) +target_link_libraries (clickhouse-obfuscator-lib dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) + +if (CLICKHOUSE_SPLIT_BINARY) + add_executable (clickhouse-obfuscator clickhouse-obfuscator.cpp) + set_target_properties(clickhouse-obfuscator PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) + target_link_libraries (clickhouse-obfuscator clickhouse-obfuscator-lib) +endif () diff --git a/dbms/programs/anonymizer/main.cpp b/dbms/programs/obfuscator/Obfuscator.cpp similarity index 96% rename from dbms/programs/anonymizer/main.cpp rename to dbms/programs/obfuscator/Obfuscator.cpp index 8493964799d..313593ea2e7 100644 --- a/dbms/programs/anonymizer/main.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -815,7 +816,7 @@ public: } -int main(int argc, char ** argv) +int mainEntryClickHouseObfuscator(int argc, char ** argv) try { using namespace DB; @@ -828,6 +829,8 @@ try ("input-format", po::value(), "input format of the initial table data") ("output-format", po::value(), "default output format") ("seed", po::value(), "seed (arbitary string), must be random string with at least 10 bytes length") + ("limit", po::value(), "if specified - stop after generating that number of rows") + ("silent", po::value()->default_value(false), "don't print information messages to stderr") ("order", po::value()->default_value(5), "order of markov model to generate strings") ("cutoff", po::value()->default_value(5), "frequency cutoff for markov model") ; @@ -855,6 +858,12 @@ try std::string input_format = options["input-format"].as(); std::string output_format = options["output-format"].as(); + std::optional limit; + if (options.count("limit")) + limit = options["limit"].as(); + + bool silent = options["silent"].as(); + MarkovModelParameters markov_model_params; markov_model_params.order = options["order"].as(); @@ -881,17 +890,29 @@ try Context context = Context::createGlobal(); - /// stdin must be seekable ReadBufferFromFileDescriptor file_in(STDIN_FILENO); WriteBufferFromFileDescriptor file_out(STDOUT_FILENO); + try + { + /// stdin must be seekable + file_in.seek(0); + } + catch (Exception & e) + { + e.addMessage("Input must be seekable file (it will be read twice)."); + throw; + } + Obfuscator obfuscator(header, seed, markov_model_params); size_t max_block_size = 8192; /// Train step - std::cerr << "Training models\n"; { + if (!silent) + std::cerr << "Training models\n"; + BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); UInt64 processed_rows = 0; @@ -900,7 +921,8 @@ try { obfuscator.train(block.getColumns()); processed_rows += block.rows(); - std::cerr << "Processed " << processed_rows << " rows\n"; + if (!silent) + std::cerr << "Processed " << processed_rows << " rows\n"; } input->readSuffix(); } @@ -908,13 +930,18 @@ try obfuscator.finalize(); /// Generation step - std::cerr << "Generating data\n"; { + if (!silent) + std::cerr << "Generating data\n"; + file_in.seek(0); BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size); BlockOutputStreamPtr output = context.getOutputFormat(output_format, file_out, header); + if (limit) + input = std::make_shared(input, *limit, 0); + UInt64 processed_rows = 0; input->readPrefix(); output->writePrefix(); @@ -923,7 +950,8 @@ try Columns columns = obfuscator.generate(block.getColumns()); output->write(header.cloneWithColumns(columns)); processed_rows += block.rows(); - std::cerr << "Processed " << processed_rows << " rows\n"; + if (!silent) + std::cerr << "Processed " << processed_rows << " rows\n"; } output->writeSuffix(); input->readSuffix(); diff --git a/dbms/programs/obfuscator/clickhouse-obfuscator.cpp b/dbms/programs/obfuscator/clickhouse-obfuscator.cpp new file mode 100644 index 00000000000..e57fa6d1b54 --- /dev/null +++ b/dbms/programs/obfuscator/clickhouse-obfuscator.cpp @@ -0,0 +1,3 @@ +int mainEntryClickHouseObfuscator(int argc, char ** argv); +int main(int argc_, char ** argv_) { return mainEntryClickHouseObfuscator(argc_, argv_); } + From 116c91c9ca9f145a785e0353e194783b7080a2fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 03:50:52 +0300 Subject: [PATCH 080/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 110 +++++++++++++++++++----- 1 file changed, 87 insertions(+), 23 deletions(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 313593ea2e7..6b6e0ef2366 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -376,9 +376,20 @@ struct MarkovModelParameters { size_t order; size_t frequency_cutoff; + size_t num_buckets_cutoff; + size_t frequency_add; + double frequency_desaturate; }; +/** Actually it's not an order-N model, but a mix of order-{0..N} models. + * + * We calculate code point counts for every context of 0..N previous code points. + * Then throw off some context with low amount of statistics. + * + * When generating data, we try to find statistics for a context of maximum order. + * And if not found - use context of smaller order, up to 0. + */ class MarkovModel { private: @@ -511,33 +522,80 @@ public: void finalize() { - if (params.frequency_cutoff == 0) - return; - - for (auto & elem : table) + if (params.num_buckets_cutoff) { - Histogram & histogram = elem.second; - - if (histogram.total + histogram.count_end < params.frequency_cutoff) + for (auto & elem : table) { - histogram.buckets.clear(); - histogram.total = 0; - } - else - { - Histogram::Buckets new_buckets; - UInt64 erased_count = 0; + Histogram & histogram = elem.second; - for (const auto & bucket : histogram.buckets) + if (histogram.buckets.size() < params.num_buckets_cutoff) { - if (bucket.second >= params.frequency_cutoff) - new_buckets.emplace(bucket); - else - erased_count += bucket.second; + histogram.buckets.clear(); + histogram.total = 0; + } + } + } + + if (params.frequency_cutoff) + { + for (auto & elem : table) + { + Histogram & histogram = elem.second; + + if (histogram.total + histogram.count_end < params.frequency_cutoff) + { + histogram.buckets.clear(); + histogram.total = 0; + } + else + { + Histogram::Buckets new_buckets; + UInt64 erased_count = 0; + + for (const auto & bucket : histogram.buckets) + { + if (bucket.second >= params.frequency_cutoff) + new_buckets.emplace(bucket); + else + erased_count += bucket.second; + } + + histogram.buckets.swap(new_buckets); + histogram.total -= erased_count; + } + } + } + + if (params.frequency_add) + { + for (auto & elem : table) + { + Histogram & histogram = elem.second; + + for (auto & bucket : histogram.buckets) + bucket.second += params.frequency_add; + + histogram.count_end += params.frequency_add; + histogram.total += params.frequency_add * histogram.buckets.size(); + } + } + + if (params.frequency_desaturate) + { + for (auto & elem : table) + { + Histogram & histogram = elem.second; + + double average = histogram.total / histogram.buckets.size(); + + UInt64 new_total = 0; + for (auto & bucket : histogram.buckets) + { + bucket.second = bucket.second * (1 - params.frequency_desaturate) + average * params.frequency_desaturate; + new_total += bucket.second; } - histogram.buckets.swap(new_buckets); - histogram.total -= erased_count; + histogram.total = new_total; } } } @@ -832,7 +890,10 @@ try ("limit", po::value(), "if specified - stop after generating that number of rows") ("silent", po::value()->default_value(false), "don't print information messages to stderr") ("order", po::value()->default_value(5), "order of markov model to generate strings") - ("cutoff", po::value()->default_value(5), "frequency cutoff for markov model") + ("frequency-cutoff", po::value()->default_value(5), "frequency cutoff for markov model: remove all buckets with count less than specified") + ("num-buckets-cutoff", po::value()->default_value(2), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets") + ("frequency-add", po::value()->default_value(0), "add a constant to every count to lower probability distribution skew") + ("frequency-desaturate", po::value()->default_value(0), "0..1 - move every frequency towards average to lower probability distribution skew") ; po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); @@ -867,7 +928,10 @@ try MarkovModelParameters markov_model_params; markov_model_params.order = options["order"].as(); - markov_model_params.frequency_cutoff = options["cutoff"].as(); + markov_model_params.frequency_cutoff = options["frequency-cutoff"].as(); + markov_model_params.num_buckets_cutoff = options["num-buckets-cutoff"].as(); + markov_model_params.frequency_add = options["frequency-add"].as(); + markov_model_params.frequency_desaturate = options["frequency-desaturate"].as(); // Create header block std::vector structure_vals; From 914b1fc9679657638005e3aad15edccc38d299a9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 03:52:27 +0300 Subject: [PATCH 081/151] Data obfuscator: development [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 6b6e0ef2366..15256712248 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -909,7 +909,7 @@ try std::cout << "Usage: " << argv[0] << " [options] < in > out\n" << "\nInput must be seekable file (it will be read twice).\n" << "\n" << description << "\n" - << "\nExample:\n " << argv[0] << " --seed \"$(head -c16 /dev/urandom)\" --order 5 --cutoff 5 --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; + << "\nExample:\n " << argv[0] << " --seed \"$(head -c16 /dev/urandom)\" --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; return 0; } From 9d137881a9593f161dfba7ad8a6d3a032251adb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 04:44:25 +0300 Subject: [PATCH 082/151] Data obfuscator: added documentation [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 63 +++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 15256712248..308567f01ad 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -35,6 +35,51 @@ #include #include +#include + + +static const char * documantation = R"( +Simple tool for table data obfuscation. + +It reads input table and produces output table, that retain some properties of input, but contains different data. +It allows to publish almost real production data for usage in benchmarks. + +It is designed to retain the following properties of data: +- cardinalities of values (number of distinct values) for every column and for every tuple of columns; +- conditional cardinalities: number of distinct values of one column under condition on value of another column; +- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats; +- probability distributions of length of strings; +- probability of zero values of numbers; empty strings and arrays, NULLs; +- data compression ratio when compressed with LZ77 and entropy family of codecs; +- continuouty (magnitude of difference) of time values across table; continuouty of floating point values. +- date component of DateTime values; +- UTF-8 validity of string values; +- string values continue to look somewhat natural. + +Most of the properties above are viable for performance testing: +- reading data, filtering, aggregation and sorting will work at almost the same speed + as on original data due to saved cardinalities, magnitudes, compression ratios, etc. + +It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed. +Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret. + +It use some cryptographic primitives to transform data, but from the cryptographic point of view, + it doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it. + +It may retain some data you don't want to publish. + +It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data. +For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value. +So, the user will be able to count exact ratio of mobile traffic. + +Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address. +If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others, + it will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them. +And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters. + +This tool works fine only with reasonable amount of data (at least 1000s of rows). +)"; + namespace DB { @@ -379,6 +424,7 @@ struct MarkovModelParameters size_t num_buckets_cutoff; size_t frequency_add; double frequency_desaturate; + size_t determinator_sliding_window_size; }; @@ -541,6 +587,8 @@ public: for (auto & elem : table) { Histogram & histogram = elem.second; + if (!histogram.total) + continue; if (histogram.total + histogram.count_end < params.frequency_cutoff) { @@ -571,6 +619,8 @@ public: for (auto & elem : table) { Histogram & histogram = elem.second; + if (!histogram.total) + continue; for (auto & bucket : histogram.buckets) bucket.second += params.frequency_add; @@ -585,13 +635,15 @@ public: for (auto & elem : table) { Histogram & histogram = elem.second; + if (!histogram.total) + continue; double average = histogram.total / histogram.buckets.size(); UInt64 new_total = 0; for (auto & bucket : histogram.buckets) { - bucket.second = bucket.second * (1 - params.frequency_desaturate) + average * params.frequency_desaturate; + bucket.second = bucket.second * (1.0 - params.frequency_desaturate) + average * params.frequency_desaturate; new_total += bucket.second; } @@ -629,7 +681,9 @@ public: throw Exception("Logical error in markov model"); size_t offset_from_begin_of_string = pos - data; - constexpr size_t determinator_sliding_window_size = 8; + size_t determinator_sliding_window_size = params.determinator_sliding_window_size; + if (determinator_sliding_window_size > determinator_size) + determinator_sliding_window_size = determinator_size; size_t determinator_sliding_window_overflow = offset_from_begin_of_string + determinator_sliding_window_size > determinator_size ? offset_from_begin_of_string + determinator_sliding_window_size - determinator_size : 0; @@ -894,6 +948,7 @@ try ("num-buckets-cutoff", po::value()->default_value(2), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets") ("frequency-add", po::value()->default_value(0), "add a constant to every count to lower probability distribution skew") ("frequency-desaturate", po::value()->default_value(0), "0..1 - move every frequency towards average to lower probability distribution skew") + ("determinator-sliding-window-size", po::value()->default_value(8), "size of a sliding window in a source string - its hash is used as a seed for RNG in markov model") ; po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); @@ -906,7 +961,8 @@ try || !options.count("input-format") || !options.count("output-format")) { - std::cout << "Usage: " << argv[0] << " [options] < in > out\n" + std::cout << documantation << "\n" + << "\nUsage: " << argv[0] << " [options] < in > out\n" << "\nInput must be seekable file (it will be read twice).\n" << "\n" << description << "\n" << "\nExample:\n " << argv[0] << " --seed \"$(head -c16 /dev/urandom)\" --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; @@ -932,6 +988,7 @@ try markov_model_params.num_buckets_cutoff = options["num-buckets-cutoff"].as(); markov_model_params.frequency_add = options["frequency-add"].as(); markov_model_params.frequency_desaturate = options["frequency-desaturate"].as(); + markov_model_params.determinator_sliding_window_size = options["determinator-sliding-window-size"].as(); // Create header block std::vector structure_vals; From fae4359ee14daddaea8dbedbee4416a6db7c7e0d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 04:45:19 +0300 Subject: [PATCH 083/151] Data obfuscator: added documentation [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 308567f01ad..5f573399a34 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -35,8 +35,6 @@ #include #include -#include - static const char * documantation = R"( Simple tool for table data obfuscation. From bc68116deb96dfbc37a235873198778e8c7f0b97 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 04:53:26 +0300 Subject: [PATCH 084/151] Reverted submodule [#CLICKHOUSE-2] --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index 81d4fdfcb88..3a2d0a833a2 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 81d4fdfcb887f89b0f7b1e9b503cbe63e6d8366b +Subproject commit 3a2d0a833a22ef5e1164a9ada54e3253cb038904 From 2c72b8ae66b39c589f453c1fb71bdc26b89436fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 04:55:52 +0300 Subject: [PATCH 085/151] Reverted submodule [#CLICKHOUSE-2] --- contrib/capnproto | 2 +- contrib/librdkafka | 2 +- contrib/zstd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/capnproto b/contrib/capnproto index c949a18da5f..7173ab638fd 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit c949a18da5f041a36cc218c5c4b79c7705999b4f +Subproject commit 7173ab638fdf144032411dc69fb1082cd473e08f diff --git a/contrib/librdkafka b/contrib/librdkafka index c3d50eb6137..7478b5ef16a 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit c3d50eb613704fb9c8ab3bce95a88275cb5875b7 +Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98 diff --git a/contrib/zstd b/contrib/zstd index f4340f46b23..255597502c3 160000 --- a/contrib/zstd +++ b/contrib/zstd @@ -1 +1 @@ -Subproject commit f4340f46b2387bc8de7d5320c0b83bb1499933ad +Subproject commit 255597502c3a4ef150abc964e376d4202a8c2929 From e6aa58e66799fca3673b84cd9e8ed515aa3bb3b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 05:04:46 +0300 Subject: [PATCH 086/151] Fixed error [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 5f573399a34..5e13d4ee08d 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -332,7 +332,7 @@ void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UInt64 se if (isWordCharASCII(src[j])) { static constexpr char word_chars[] = "_01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - dst[j] = word_chars[dst[j] % sizeof(word_chars)]; + dst[j] = word_chars[dst[j] % (sizeof(word_chars) - 1)]; } } } From e66cdfe429d96c14570d12a78293a1b637975f1e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 05:06:51 +0300 Subject: [PATCH 087/151] Fixed error [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 5e13d4ee08d..9648c614364 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -720,7 +720,7 @@ public: /// To generate content of strings, use /// order-N Markov model on Unicode code points, /// and to generate next code point use deterministic RNG -/// determined by hash of 8-byte sliding window of source string. +/// determined by hash of a sliding window (default 8 bytes) of source string. /// This is intended to generate locally-similar strings from locally-similar sources. class StringModel : public IModel { From 55a712beab02a3cd334aae017f97597b940ee580 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 16 Jun 2018 05:11:11 +0300 Subject: [PATCH 088/151] Update system.parts.md --- docs/en/system_tables/system.parts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/system_tables/system.parts.md b/docs/en/system_tables/system.parts.md index 0fb5aff568d..7e56d139219 100644 --- a/docs/en/system_tables/system.parts.md +++ b/docs/en/system_tables/system.parts.md @@ -6,7 +6,7 @@ Each row describes one part of the data. Columns: -- partition (String) – The partition name. Low versions must be YYYYMM format, and now it can be any. To learn what a partition is, see the description of the [ALTER](../query_language/queries.md#query_language_queries_alter) query. +- partition (String) – The partition name. It's in YYYYMM format in case of old-style partitioning and is arbitary serialized value in case of custom partitioning. To learn what a partition is, see the description of the [ALTER](../query_language/queries.md#query_language_queries_alter) query. - name (String) – Name of the data part. - active (UInt8) – Indicates whether the part is active. If a part is active, it is used in a table; otherwise, it will be deleted. Inactive data parts remain after merging. - marks (UInt64) – The number of marks. To get the approximate number of rows in a data part, multiply ``marks`` by the index granularity (usually 8192). From 0272fc996af9136bbf81562f8ac4e04469d1aea6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 05:13:54 +0300 Subject: [PATCH 089/151] Fixed build [#CLICKHOUSE-2] --- dbms/src/Storages/AlterCommands.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index fca8a68f70d..87b3866e061 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include + namespace DB { From d578bcbd5c384a80c2922bdd779469dd507e4ad2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 16 Jun 2018 05:14:37 +0300 Subject: [PATCH 090/151] Update StringUtils.h --- dbms/src/Common/StringUtils/StringUtils.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 49c1c2d40a2..136a709b389 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -134,4 +134,6 @@ inline bool equalsCaseInsensitive(char a, char b) return a == b || (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b); } + +/// Inefficient. std::string trim(const std::string & str, const std::function & predicate); From 4e8082a69927466c6c3fa87cb0a68392adb638f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 05:16:19 +0300 Subject: [PATCH 091/151] Fixed build [#CLICKHOUSE-2] --- dbms/src/Storages/PartitionCommands.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/PartitionCommands.h b/dbms/src/Storages/PartitionCommands.h index 6fa127de899..2f2479eccc9 100644 --- a/dbms/src/Storages/PartitionCommands.h +++ b/dbms/src/Storages/PartitionCommands.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB From e4776e8b5d00fe6cd927ac09b056253291725fe9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 05:31:41 +0300 Subject: [PATCH 092/151] Fixed build (no include path was specified) #2513 --- dbms/src/Common/StringUtils/StringUtils.cpp | 9 +----- dbms/src/Common/StringUtils/StringUtils.h | 31 +++++++++++++++++++-- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/dbms/src/Common/StringUtils/StringUtils.cpp b/dbms/src/Common/StringUtils/StringUtils.cpp index 572905d21dc..8a0b25dbdad 100644 --- a/dbms/src/Common/StringUtils/StringUtils.cpp +++ b/dbms/src/Common/StringUtils/StringUtils.cpp @@ -1,5 +1,5 @@ #include "StringUtils.h" -#include + namespace detail { @@ -15,10 +15,3 @@ bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) } } - -std::string trim(const std::string & str, const std::function & predicate) -{ - std::string trimmed = str; - boost::trim_if(trimmed, predicate); - return trimmed; -} diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 8b5930aacb1..ffb4c0d85c1 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace detail @@ -135,5 +134,31 @@ inline bool equalsCaseInsensitive(char a, char b) } -/// Inefficient. -std::string trim(const std::string & str, const std::function & predicate); +template +std::string trim(const std::string & str, F && predicate) +{ + size_t cut_front = 0; + size_t cut_back = 0; + size_t size = str.size(); + + for (size_t i = 0; i < size; ++i) + { + if (predicate(str[i])) + ++cut_front; + else + break; + } + + if (cut_front == size) + return {}; + + for (auto it = str.rbegin(); it != str.rend(); ++it) + { + if (predicate(*it)) + ++cut_back; + else + break; + } + + return str.substr(cut_front, size - cut_front - cut_back); +} From d8eb8c8ddf75eaf415a234f848f6bb0bbe747caf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 06:31:49 +0300 Subject: [PATCH 093/151] Fixed error [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 9648c614364..b935e266c95 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -963,7 +963,7 @@ try << "\nUsage: " << argv[0] << " [options] < in > out\n" << "\nInput must be seekable file (it will be read twice).\n" << "\n" << description << "\n" - << "\nExample:\n " << argv[0] << " --seed \"$(head -c16 /dev/urandom)\" --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; + << "\nExample:\n " << argv[0] << " --seed \"$(head -c16 /dev/urandom | base64)\" --input-format TSV --output-format TSV --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' < stats.tsv\n"; return 0; } From 52755f248aead4af9a4b6067100d9ac795716085 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 06:35:23 +0300 Subject: [PATCH 094/151] Fixed error [#CLICKHOUSE-2] --- dbms/programs/obfuscator/Obfuscator.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index b935e266c95..56724332c16 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int CANNOT_SEEK_THROUGH_FILE; } @@ -1012,15 +1014,11 @@ try ReadBufferFromFileDescriptor file_in(STDIN_FILENO); WriteBufferFromFileDescriptor file_out(STDOUT_FILENO); - try { /// stdin must be seekable - file_in.seek(0); - } - catch (Exception & e) - { - e.addMessage("Input must be seekable file (it will be read twice)."); - throw; + auto res = lseek(file_in.getFD(), 0, SEEK_SET); + if (-1 == res) + throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); } Obfuscator obfuscator(header, seed, markov_model_params); From 5f26c3753f123390da2591fe1965ce1de64db207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=93=D0=B5=D0=BE=D1=80=D0=B3=D0=B8=D0=B9=20=D0=9A=D0=BE?= =?UTF-8?q?=D0=BD=D0=B4=D1=80=D0=B0=D1=82=D1=8C=D0=B5=D0=B2?= Date: Sat, 16 Jun 2018 03:31:39 +0000 Subject: [PATCH 095/151] Accept libtinfo for libtermcap --- cmake/find_readline_edit.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find_readline_edit.cmake b/cmake/find_readline_edit.cmake index 5ec30b8e314..50373f11952 100644 --- a/cmake/find_readline_edit.cmake +++ b/cmake/find_readline_edit.cmake @@ -10,7 +10,7 @@ endif () list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2) -find_library (TERMCAP_LIB NAMES termcap) +find_library (TERMCAP_LIB NAMES termcap tinfo) find_library (EDIT_LIB NAMES edit) set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") From 05d31f5e46e3eb96ad7f8bc3a7912df51f013462 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 07:48:37 +0300 Subject: [PATCH 096/151] LZ4_decompress_faster: reordered variants and removed variant that is always worse [#CLICKHOUSE-3773] --- dbms/src/IO/LZ4_decompress_faster.cpp | 10 ++++------ dbms/src/IO/LZ4_decompress_faster.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dbms/src/IO/LZ4_decompress_faster.cpp b/dbms/src/IO/LZ4_decompress_faster.cpp index 28fe324c9ed..be8501200a3 100644 --- a/dbms/src/IO/LZ4_decompress_faster.cpp +++ b/dbms/src/IO/LZ4_decompress_faster.cpp @@ -475,13 +475,11 @@ void decompress( Stopwatch watch; if (best_variant == 0) - decompressImpl<8, false>(source, dest, dest_size); - if (best_variant == 1) - decompressImpl<8, true>(source, dest, dest_size); - if (best_variant == 2) - decompressImpl<16, false>(source, dest, dest_size); - if (best_variant == 3) decompressImpl<16, true>(source, dest, dest_size); + if (best_variant == 1) + decompressImpl<16, false>(source, dest, dest_size); + if (best_variant == 2) + decompressImpl<8, true>(source, dest, dest_size); watch.stop(); diff --git a/dbms/src/IO/LZ4_decompress_faster.h b/dbms/src/IO/LZ4_decompress_faster.h index d05614b6831..f44d72a8248 100644 --- a/dbms/src/IO/LZ4_decompress_faster.h +++ b/dbms/src/IO/LZ4_decompress_faster.h @@ -88,7 +88,7 @@ struct PerformanceStatistics }; /// Number of different algorithms to select from. - static constexpr size_t NUM_ELEMENTS = 4; + static constexpr size_t NUM_ELEMENTS = 3; /// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account. static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2; From 2298f1c68b31940812e50a6687ca2831bb52f49a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 08:54:06 +0300 Subject: [PATCH 097/151] Fixed strange code #2501 --- dbms/programs/server/Server.cpp | 5 ++- dbms/src/Common/ErrorCodes.cpp | 1 + .../Dictionaries/DictionarySourceFactory.cpp | 5 +-- dbms/src/IO/HTTPCommon.cpp | 32 ++++++++++++++----- dbms/src/IO/HTTPCommon.h | 22 ++++++++----- dbms/src/IO/ReadWriteBufferFromHTTP.cpp | 6 ++-- dbms/src/IO/WriteBufferFromHTTP.cpp | 9 ++++-- dbms/src/IO/WriteBufferFromHTTP.h | 9 +++--- dbms/src/Storages/StorageURL.cpp | 15 ++++++--- 9 files changed, 68 insertions(+), 36 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 368a8cdfa18..7a393d4246a 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -430,8 +430,7 @@ int Server::main(const std::vector & /*args*/) if (config().has("https_port")) { #if USE_POCO_NETSSL - std::call_once(ssl_init_once, SSLInit); - + initSSL(); Poco::Net::SecureServerSocket socket; auto address = socket_bind_listen(socket, listen_host, config().getInt("https_port"), /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); @@ -452,7 +451,6 @@ int Server::main(const std::vector & /*args*/) /// TCP if (config().has("tcp_port")) { - std::call_once(ssl_init_once, SSLInit); Poco::Net::ServerSocket socket; auto address = socket_bind_listen(socket, listen_host, config().getInt("tcp_port")); socket.setReceiveTimeout(settings.receive_timeout); @@ -470,6 +468,7 @@ int Server::main(const std::vector & /*args*/) if (config().has("tcp_port_secure")) { #if USE_POCO_NETSSL + initSSL(); Poco::Net::SecureServerSocket socket; auto address = socket_bind_listen(socket, listen_host, config().getInt("tcp_port_secure"), /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 21e5f65cda3..70d0afe8c8a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -375,6 +375,7 @@ namespace ErrorCodes extern const int UNKNOWN_MUTATION_COMMAND = 398; extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT = 399; extern const int CANNOT_STAT = 400; + extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME = 401; extern const int KEEPER_EXCEPTION = 999; diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp index 963a51c7923..d4788d0c25e 100644 --- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp +++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp @@ -168,11 +168,8 @@ DictionarySourcePtr DictionarySourceFactory::create( if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; -#if USE_POCO_NETSSL // Used for https queries - std::call_once(ssl_init_once, SSLInit); -#endif - + initSSL(); return std::make_unique(dict_struct, config, config_prefix + ".http", sample_block, context); } else if ("library" == source_type) diff --git a/dbms/src/IO/HTTPCommon.cpp b/dbms/src/IO/HTTPCommon.cpp index 579755309f7..0f4da941132 100644 --- a/dbms/src/IO/HTTPCommon.cpp +++ b/dbms/src/IO/HTTPCommon.cpp @@ -19,12 +19,15 @@ namespace DB { + namespace ErrorCodes { extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER; extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; + extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; } + void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout) { if (!response.getKeepAlive()) @@ -35,25 +38,37 @@ void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigne response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds())); } -std::once_flag ssl_init_once; -void SSLInit() +void initSSL() { // http://stackoverflow.com/questions/18315472/https-request-in-c-using-poco #if USE_POCO_NETSSL - Poco::Net::initializeSSL(); + struct Initializer + { + Initializer() + { + Poco::Net::initializeSSL(); + } + }; + + static Initializer initializer; #endif } -std::unique_ptr getPreparedSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts) +std::unique_ptr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts) { bool is_ssl = static_cast(uri.getScheme() == "https"); - std::unique_ptr session( + std::unique_ptr session; + + if (is_ssl) #if USE_POCO_NETSSL - is_ssl ? new Poco::Net::HTTPSClientSession : + session = std::make_unique(); +#else + throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); #endif - new Poco::Net::HTTPClientSession); + else + session = std::make_unique(); session->setHost(DNSResolver::instance().resolveHost(uri.getHost()).toString()); session->setPort(uri.getPort()); @@ -68,7 +83,7 @@ std::unique_ptr getPreparedSession(const Poco::URI } -std::istream * makeRequest( +std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response) { auto istr = &session.receiveResponse(response); @@ -86,4 +101,5 @@ std::istream * makeRequest( } return istr; } + } diff --git a/dbms/src/IO/HTTPCommon.h b/dbms/src/IO/HTTPCommon.h index 28bdd86bee4..473740d1666 100644 --- a/dbms/src/IO/HTTPCommon.h +++ b/dbms/src/IO/HTTPCommon.h @@ -24,18 +24,24 @@ namespace Poco namespace DB { - const int HTTP_TOO_MANY_REQUESTS = 429; void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout); -extern std::once_flag ssl_init_once; -void SSLInit(); -std::unique_ptr getPreparedSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts); +/// Call this method if you are going to make HTTPS requests. It's safe to call it many time from different threads. +void initSSL(); + + +/// Create session object to perform requests and set required parameters. +std::unique_ptr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts); + + +/** Used to receive response (response headers and possibly body) + * after sending data (request headers and possibly body). + * Throws exception in case of non HTTP_OK (200) response code. + * Returned istream lives in 'session' object. + */ +std::istream * receiveResponse(Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response); -/* Function makes HTTP-request from prepared structures and returns response istream - * in case of HTTP_OK and throws exception with details in case of not HTTP_OK - */ -std::istream* makeRequest(Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response); } diff --git a/dbms/src/IO/ReadWriteBufferFromHTTP.cpp b/dbms/src/IO/ReadWriteBufferFromHTTP.cpp index c707ed5eaf0..af0f34babbf 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/dbms/src/IO/ReadWriteBufferFromHTTP.cpp @@ -22,9 +22,8 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(const Poco::URI & uri, : ReadBuffer(nullptr, 0), uri{uri}, method{!method_.empty() ? method_ : out_stream_callback ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}, - session{getPreparedSession(uri, timeouts)} + session{makeHTTPSession(uri, timeouts)} { - Poco::Net::HTTPRequest request(method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(uri.getHost()); // use original, not resolved host name in header @@ -40,7 +39,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(const Poco::URI & uri, if (out_stream_callback) out_stream_callback(stream_out); - istr = makeRequest(*session, request, response); + istr = receiveResponse(*session, request, response); impl = std::make_unique(*istr, buffer_size_); } @@ -54,4 +53,5 @@ bool ReadWriteBufferFromHTTP::nextImpl() working_buffer = internal_buffer; return true; } + } diff --git a/dbms/src/IO/WriteBufferFromHTTP.cpp b/dbms/src/IO/WriteBufferFromHTTP.cpp index adc4574eaf8..51472de85e0 100644 --- a/dbms/src/IO/WriteBufferFromHTTP.cpp +++ b/dbms/src/IO/WriteBufferFromHTTP.cpp @@ -3,12 +3,14 @@ #include #include + namespace DB { + WriteBufferFromHTTP::WriteBufferFromHTTP( const Poco::URI & uri, const std::string & method, const ConnectionTimeouts & timeouts, size_t buffer_size_) : WriteBufferFromOStream(buffer_size_) - , session{getPreparedSession(uri, timeouts)} + , session{makeHTTPSession(uri, timeouts)} , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} { request.setHost(uri.getHost()); @@ -18,8 +20,11 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( ostr = &session->sendRequest(request); } + void WriteBufferFromHTTP::finalize() { - makeRequest(*session, request, response); + receiveResponse(*session, request, response); + /// TODO: Response body is ignored. } + } diff --git a/dbms/src/IO/WriteBufferFromHTTP.h b/dbms/src/IO/WriteBufferFromHTTP.h index 66c1e6786bd..5a64266e5a8 100644 --- a/dbms/src/IO/WriteBufferFromHTTP.h +++ b/dbms/src/IO/WriteBufferFromHTTP.h @@ -8,8 +8,10 @@ #include #include + namespace DB { + /* Perform HTTP POST/PUT request. */ class WriteBufferFromHTTP : public WriteBufferFromOStream @@ -20,14 +22,13 @@ private: Poco::Net::HTTPResponse response; public: - explicit WriteBufferFromHTTP(const Poco::URI & uri, + WriteBufferFromHTTP(const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only const ConnectionTimeouts & timeouts = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); - ~WriteBufferFromHTTP() override {} - - // This method have to be called, to make actual request + /// Receives response from the server after sending all data. void finalize(); }; + } diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 50aed748d1a..1c3b8246492 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -106,11 +106,14 @@ namespace { writer->write(block); } - void writePrefix() override { + + void writePrefix() override + { writer->writePrefix(); } - void writeSuffix() override { + void writeSuffix() override + { writer->writeSuffix(); writer->flush(); write_buf->finalize(); @@ -122,7 +125,10 @@ namespace BlockOutputStreamPtr writer; }; } -BlockInputStreams StorageURL::read(const Names & /*column_names*/, + + +BlockInputStreams StorageURL::read( + const Names & /*column_names*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum & /*processed_stage*/, @@ -146,7 +152,7 @@ BlockOutputStreamPtr StorageURL::write(const ASTPtr & /*query*/, const Settings return std::make_shared( uri, format_name, getSampleBlock(), context_global, ConnectionTimeouts::getHTTPTimeouts(context_global.getSettingsRef())); } - + void registerStorageURL(StorageFactory & factory) { factory.registerStorage("URL", [](const StorageFactory::Arguments & args) @@ -169,4 +175,5 @@ void registerStorageURL(StorageFactory & factory) return StorageURL::create(uri, args.table_name, format_name, args.columns, args.context); }); } + } From 6173e4b71f940ad5e0b12bccfd4060de7d7102c2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jun 2018 09:00:51 +0300 Subject: [PATCH 098/151] Returned back "explicit" #2501 --- dbms/src/IO/WriteBufferFromHTTP.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/WriteBufferFromHTTP.h b/dbms/src/IO/WriteBufferFromHTTP.h index 5a64266e5a8..12eed48021a 100644 --- a/dbms/src/IO/WriteBufferFromHTTP.h +++ b/dbms/src/IO/WriteBufferFromHTTP.h @@ -22,7 +22,7 @@ private: Poco::Net::HTTPResponse response; public: - WriteBufferFromHTTP(const Poco::URI & uri, + explicit WriteBufferFromHTTP(const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only const ConnectionTimeouts & timeouts = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); From 3ea8ce1c0a944d59ac46f079c09f957269ee9870 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Jun 2018 02:34:34 +0300 Subject: [PATCH 099/151] Obfuscator: better default parameter #2518 --- dbms/programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 56724332c16..c555ff2b6eb 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -945,7 +945,7 @@ try ("silent", po::value()->default_value(false), "don't print information messages to stderr") ("order", po::value()->default_value(5), "order of markov model to generate strings") ("frequency-cutoff", po::value()->default_value(5), "frequency cutoff for markov model: remove all buckets with count less than specified") - ("num-buckets-cutoff", po::value()->default_value(2), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets") + ("num-buckets-cutoff", po::value()->default_value(0), "cutoff for number of different possible continuations for a context: remove all histograms with less than specified number of buckets") ("frequency-add", po::value()->default_value(0), "add a constant to every count to lower probability distribution skew") ("frequency-desaturate", po::value()->default_value(0), "0..1 - move every frequency towards average to lower probability distribution skew") ("determinator-sliding-window-size", po::value()->default_value(8), "size of a sliding window in a source string - its hash is used as a seed for RNG in markov model") From 56d7e870d25a22a3f48dda499e3bfc9c09dee17b Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 18 Jun 2018 01:47:17 +0800 Subject: [PATCH 100/151] Add a new JDBC driver implementation to the document --- docs/en/interfaces/jdbc.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md index 08b6c6b055d..36994ce97a0 100644 --- a/docs/en/interfaces/jdbc.md +++ b/docs/en/interfaces/jdbc.md @@ -2,3 +2,6 @@ There is an official JDBC driver for ClickHouse. See [here](https://github.com/yandex/clickhouse-jdbc) . +JDBC drivers implemented by other organizations: + +- [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) From 82102c689689d90d615b075e6801aefb4366fdfc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 04:33:34 +0300 Subject: [PATCH 101/151] Fixed conflicting headers on Fedora Rawhide while using Linux native AIO #2520 --- dbms/src/Common/AIO.h | 227 --------------------------------- dbms/src/Common/ErrorCodes.cpp | 5 +- dbms/src/IO/AIO.cpp | 56 ++++++++ dbms/src/IO/AIO.h | 32 +++++ dbms/src/IO/AIOContextPool.cpp | 155 ++++++++++++++++++++++ dbms/src/IO/AIOContextPool.h | 53 ++++++++ dbms/src/IO/ReadBufferAIO.cpp | 1 + dbms/src/IO/ReadBufferAIO.h | 3 +- dbms/src/IO/WriteBufferAIO.cpp | 15 ++- dbms/src/IO/WriteBufferAIO.h | 7 +- 10 files changed, 313 insertions(+), 241 deletions(-) delete mode 100644 dbms/src/Common/AIO.h create mode 100644 dbms/src/IO/AIO.cpp create mode 100644 dbms/src/IO/AIO.h create mode 100644 dbms/src/IO/AIOContextPool.cpp create mode 100644 dbms/src/IO/AIOContextPool.h diff --git a/dbms/src/Common/AIO.h b/dbms/src/Common/AIO.h deleted file mode 100644 index 133da5f04eb..00000000000 --- a/dbms/src/Common/AIO.h +++ /dev/null @@ -1,227 +0,0 @@ -#pragma once - -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/** Small wrappers for asynchronous I/O. - */ - - -inline int io_setup(unsigned nr, aio_context_t * ctxp) -{ - return syscall(__NR_io_setup, nr, ctxp); -} - -inline int io_destroy(aio_context_t ctx) -{ - return syscall(__NR_io_destroy, ctx); -} - -/// last argument is an array of pointers technically speaking -inline int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]) -{ - return syscall(__NR_io_submit, ctx, nr, iocbpp); -} - -inline int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event *events, struct timespec * timeout) -{ - return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout); -} - - -struct AIOContext : private boost::noncopyable -{ - aio_context_t ctx; - - AIOContext(unsigned int nr_events = 128) - { - ctx = 0; - if (io_setup(nr_events, &ctx) < 0) - DB::throwFromErrno("io_setup failed"); - } - - ~AIOContext() - { - io_destroy(ctx); - } -}; - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int AIO_COMPLETION_ERROR; - extern const int AIO_SUBMIT_ERROR; -} - - -class AIOContextPool : public ext::singleton -{ - friend class ext::singleton; - - static const auto max_concurrent_events = 128; - static const auto timeout_sec = 1; - - AIOContext aio_context{max_concurrent_events}; - - using ID = size_t; - using BytesRead = ssize_t; - - /// Autoincremental id used to identify completed requests - ID id{}; - mutable std::mutex mutex; - mutable std::condition_variable have_resources; - std::map> promises; - - std::atomic cancelled{false}; - std::thread io_completion_monitor{&AIOContextPool::doMonitor, this}; - - ~AIOContextPool() - { - cancelled.store(true, std::memory_order_relaxed); - io_completion_monitor.join(); - } - - void doMonitor() - { - /// continue checking for events unless cancelled - while (!cancelled.load(std::memory_order_relaxed)) - waitForCompletion(); - - /// wait until all requests have been completed - while (!promises.empty()) - waitForCompletion(); - } - - void waitForCompletion() - { - /// array to hold completion events - io_event events[max_concurrent_events]; - - try - { - const auto num_events = getCompletionEvents(events, max_concurrent_events); - fulfillPromises(events, num_events); - notifyProducers(num_events); - } - catch (...) - { - /// there was an error, log it, return to any producer and continue - reportExceptionToAnyProducer(); - tryLogCurrentException("AIOContextPool::waitForCompletion()"); - } - } - - int getCompletionEvents(io_event events[], const int max_events) - { - timespec timeout{timeout_sec, 0}; - - auto num_events = 0; - - /// request 1 to `max_events` events - while ((num_events = io_getevents(aio_context.ctx, 1, max_events, events, &timeout)) < 0) - if (errno != EINTR) - throwFromErrno("io_getevents: Failed to wait for asynchronous IO completion", - ErrorCodes::AIO_COMPLETION_ERROR, errno); - - return num_events; - } - - void fulfillPromises(const io_event events[], const int num_events) - { - if (num_events == 0) - return; - - const std::lock_guard lock{mutex}; - - /// look at returned events and find corresponding promise, set result and erase promise from map - for (const auto & event : boost::make_iterator_range(events, events + num_events)) - { - /// get id from event - const auto id = event.data; - - /// set value via promise and release it - const auto it = promises.find(id); - if (it == std::end(promises)) - { - LOG_ERROR(&Poco::Logger::get("AIOcontextPool"), "Found io_event with unknown id " << id); - continue; - } - - it->second.set_value(event.res); - promises.erase(it); - } - } - - void notifyProducers(const int num_producers) const - { - if (num_producers == 0) - return; - - if (num_producers > 1) - have_resources.notify_all(); - else - have_resources.notify_one(); - } - - void reportExceptionToAnyProducer() - { - const std::lock_guard lock{mutex}; - - const auto any_promise_it = std::begin(promises); - any_promise_it->second.set_exception(std::current_exception()); - } - -public: - /// Request AIO read operation for iocb, returns a future with number of bytes read - std::future post(struct iocb & iocb) - { - std::unique_lock lock{mutex}; - - /// get current id and increment it by one - const auto request_id = id++; - - /// create a promise and put request in "queue" - promises.emplace(request_id, std::promise{}); - /// store id in AIO request for further identification - iocb.aio_data = request_id; - - auto num_requests = 0; - struct iocb * requests[] { &iocb }; - - /// submit a request - while ((num_requests = io_submit(aio_context.ctx, 1, requests)) < 0) - { - if (errno == EAGAIN) - /// wait until at least one event has been completed (or a spurious wakeup) and try again - have_resources.wait(lock); - else if (errno != EINTR) - throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", - ErrorCodes::AIO_SUBMIT_ERROR, errno); - } - - return promises[request_id].get_future(); - } -}; - - -} - -#endif diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 70d0afe8c8a..a1662563a1f 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -273,8 +273,8 @@ namespace ErrorCodes extern const int INFINITE_LOOP = 269; extern const int CANNOT_COMPRESS = 270; extern const int CANNOT_DECOMPRESS = 271; - extern const int AIO_SUBMIT_ERROR = 272; - extern const int AIO_COMPLETION_ERROR = 273; + extern const int CANNOT_IO_SUBMIT = 272; + extern const int CANNOT_IO_GETEVENTS = 273; extern const int AIO_READ_ERROR = 274; extern const int AIO_WRITE_ERROR = 275; extern const int INDEX_NOT_USED = 277; @@ -376,6 +376,7 @@ namespace ErrorCodes extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT = 399; extern const int CANNOT_STAT = 400; extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME = 401; + extern const int CANNOT_IOSETUP = 402; extern const int KEEPER_EXCEPTION = 999; diff --git a/dbms/src/IO/AIO.cpp b/dbms/src/IO/AIO.cpp new file mode 100644 index 00000000000..9c8160919f1 --- /dev/null +++ b/dbms/src/IO/AIO.cpp @@ -0,0 +1,56 @@ +#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) + +#include +#include +#include +#include + +#include + + +/** Small wrappers for asynchronous I/O. + */ + +namespace DB +{ + namespace ErrorCodes + { + extern const int CANNOT_IOSETUP; + } +} + + +int io_setup(unsigned nr, aio_context_t * ctxp) +{ + return syscall(__NR_io_setup, nr, ctxp); +} + +int io_destroy(aio_context_t ctx) +{ + return syscall(__NR_io_destroy, ctx); +} + +int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]) +{ + return syscall(__NR_io_submit, ctx, nr, iocbpp); +} + +int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout) +{ + return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout); +} + + +AIOContext::AIOContext(unsigned int nr_events) +{ + ctx = 0; + if (io_setup(nr_events, &ctx) < 0) + DB::throwFromErrno("io_setup failed", DB::ErrorCodes::CANNOT_IOSETUP); +} + +AIOContext::~AIOContext() +{ + io_destroy(ctx); +} + +#endif diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h new file mode 100644 index 00000000000..1cb55ad1f96 --- /dev/null +++ b/dbms/src/IO/AIO.h @@ -0,0 +1,32 @@ +#pragma once + +#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) + +/// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h +#define timespec linux_timespec +#include +#undef timespec + + +/** Small wrappers for asynchronous I/O. + */ + +int io_setup(unsigned nr, aio_context_t * ctxp); + +int io_destroy(aio_context_t ctx); + +/// last argument is an array of pointers technically speaking +int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]); + +int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout); + + +struct AIOContext : private boost::noncopyable +{ + aio_context_t ctx; + + AIOContext(unsigned int nr_events = 128); + ~AIOContext(); +}; + +#endif diff --git a/dbms/src/IO/AIOContextPool.cpp b/dbms/src/IO/AIOContextPool.cpp new file mode 100644 index 00000000000..888cad09a90 --- /dev/null +++ b/dbms/src/IO/AIOContextPool.cpp @@ -0,0 +1,155 @@ +#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_IO_SUBMIT; + extern const int CANNOT_IO_GETEVENTS; +} + + +AIOContextPool::~AIOContextPool() +{ + cancelled.store(true, std::memory_order_relaxed); + io_completion_monitor.join(); +} + + +void AIOContextPool::doMonitor() +{ + /// continue checking for events unless cancelled + while (!cancelled.load(std::memory_order_relaxed)) + waitForCompletion(); + + /// wait until all requests have been completed + while (!promises.empty()) + waitForCompletion(); +} + + +void AIOContextPool::waitForCompletion() +{ + /// array to hold completion events + io_event events[max_concurrent_events]; + + try + { + const auto num_events = getCompletionEvents(events, max_concurrent_events); + fulfillPromises(events, num_events); + notifyProducers(num_events); + } + catch (...) + { + /// there was an error, log it, return to any producer and continue + reportExceptionToAnyProducer(); + tryLogCurrentException("AIOContextPool::waitForCompletion()"); + } +} + + +int AIOContextPool::getCompletionEvents(io_event events[], const int max_events) +{ + timespec timeout{timeout_sec, 0}; + + auto num_events = 0; + + /// request 1 to `max_events` events + while ((num_events = io_getevents(aio_context.ctx, 1, max_events, events, &timeout)) < 0) + if (errno != EINTR) + throwFromErrno("io_getevents: Failed to wait for asynchronous IO completion", ErrorCodes::CANNOT_IO_GETEVENTS, errno); + + return num_events; +} + + +void AIOContextPool::fulfillPromises(const io_event events[], const int num_events) +{ + if (num_events == 0) + return; + + const std::lock_guard lock{mutex}; + + /// look at returned events and find corresponding promise, set result and erase promise from map + for (const auto & event : boost::make_iterator_range(events, events + num_events)) + { + /// get id from event + const auto id = event.data; + + /// set value via promise and release it + const auto it = promises.find(id); + if (it == std::end(promises)) + { + LOG_ERROR(&Poco::Logger::get("AIOcontextPool"), "Found io_event with unknown id " << id); + continue; + } + + it->second.set_value(event.res); + promises.erase(it); + } +} + + +void AIOContextPool::notifyProducers(const int num_producers) const +{ + if (num_producers == 0) + return; + + if (num_producers > 1) + have_resources.notify_all(); + else + have_resources.notify_one(); +} + + +void AIOContextPool::reportExceptionToAnyProducer() +{ + const std::lock_guard lock{mutex}; + + const auto any_promise_it = std::begin(promises); + any_promise_it->second.set_exception(std::current_exception()); +} + + +std::future AIOContextPool::post(struct iocb & iocb) +{ + std::unique_lock lock{mutex}; + + /// get current id and increment it by one + const auto request_id = id++; + + /// create a promise and put request in "queue" + promises.emplace(request_id, std::promise{}); + /// store id in AIO request for further identification + iocb.aio_data = request_id; + + auto num_requests = 0; + struct iocb * requests[] { &iocb }; + + /// submit a request + while ((num_requests = io_submit(aio_context.ctx, 1, requests)) < 0) + { + if (errno == EAGAIN) + /// wait until at least one event has been completed (or a spurious wakeup) and try again + have_resources.wait(lock); + else if (errno != EINTR) + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + } + + return promises[request_id].get_future(); +} + +} + +#endif diff --git a/dbms/src/IO/AIOContextPool.h b/dbms/src/IO/AIOContextPool.h new file mode 100644 index 00000000000..3e1c4a039d7 --- /dev/null +++ b/dbms/src/IO/AIOContextPool.h @@ -0,0 +1,53 @@ +#pragma once + +#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class AIOContextPool : public ext::singleton +{ + friend class ext::singleton; + + static const auto max_concurrent_events = 128; + static const auto timeout_sec = 1; + + AIOContext aio_context{max_concurrent_events}; + + using ID = size_t; + using BytesRead = ssize_t; + + /// Autoincremental id used to identify completed requests + ID id{}; + mutable std::mutex mutex; + mutable std::condition_variable have_resources; + std::map> promises; + + std::atomic cancelled{false}; + std::thread io_completion_monitor{&AIOContextPool::doMonitor, this}; + + ~AIOContextPool(); + + void doMonitor(); + void waitForCompletion(); + int getCompletionEvents(io_event events[], const int max_events); + void fulfillPromises(const io_event events[], const int num_events); + void notifyProducers(const int num_producers) const; + void reportExceptionToAnyProducer(); + +public: + /// Request AIO read operation for iocb, returns a future with number of bytes read + std::future post(struct iocb & iocb); +}; + +} + +#endif diff --git a/dbms/src/IO/ReadBufferAIO.cpp b/dbms/src/IO/ReadBufferAIO.cpp index 8225f27ecd4..9243b65e48e 100644 --- a/dbms/src/IO/ReadBufferAIO.cpp +++ b/dbms/src/IO/ReadBufferAIO.cpp @@ -1,6 +1,7 @@ #if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) #include +#include #include #include #include diff --git a/dbms/src/IO/ReadBufferAIO.h b/dbms/src/IO/ReadBufferAIO.h index 9f8451338ac..77e35f8e35a 100644 --- a/dbms/src/IO/ReadBufferAIO.h +++ b/dbms/src/IO/ReadBufferAIO.h @@ -5,11 +5,12 @@ #include #include #include +#include #include -#include #include #include #include +#include #include #include diff --git a/dbms/src/IO/WriteBufferAIO.cpp b/dbms/src/IO/WriteBufferAIO.cpp index ffd2cdfa0cf..dcd42e3c8fe 100644 --- a/dbms/src/IO/WriteBufferAIO.cpp +++ b/dbms/src/IO/WriteBufferAIO.cpp @@ -32,9 +32,9 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ARGUMENT_OUT_OF_BOUND; extern const int AIO_READ_ERROR; - extern const int AIO_SUBMIT_ERROR; extern const int AIO_WRITE_ERROR; - extern const int AIO_COMPLETION_ERROR; + extern const int CANNOT_IO_SUBMIT; + extern const int CANNOT_IO_GETEVENTS; extern const int CANNOT_TRUNCATE_FILE; extern const int CANNOT_FSYNC; } @@ -119,12 +119,12 @@ void WriteBufferAIO::nextImpl() request.aio_offset = region_aligned_begin; /// Send the request. - while (io_submit(aio_context.ctx, request_ptrs.size(), request_ptrs.data()) < 0) + while (io_submit(aio_context.ctx, 1, &request_ptr) < 0) { if (errno != EINTR) { aio_failed = true; - throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::AIO_SUBMIT_ERROR); + throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::CANNOT_IO_SUBMIT); } } @@ -184,17 +184,18 @@ bool WriteBufferAIO::waitForAIOCompletion() CurrentMetrics::Increment metric_increment{CurrentMetrics::Write}; - while (io_getevents(aio_context.ctx, events.size(), events.size(), events.data(), nullptr) < 0) + io_event event; + while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) < 0) { if (errno != EINTR) { aio_failed = true; - throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::AIO_COMPLETION_ERROR); + throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::CANNOT_IO_GETEVENTS); } } is_pending_write = false; - bytes_written = events[0].res; + bytes_written = event.res; ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); diff --git a/dbms/src/IO/WriteBufferAIO.h b/dbms/src/IO/WriteBufferAIO.h index 3c99eb40eed..f5b01637471 100644 --- a/dbms/src/IO/WriteBufferAIO.h +++ b/dbms/src/IO/WriteBufferAIO.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -58,9 +58,8 @@ private: BufferWithOwnMemory flush_buffer; /// Description of the asynchronous write request. - iocb request = {}; - std::vector request_ptrs{&request}; - std::vector events{1}; + iocb request{}; + iocb * request_ptr{&request}; AIOContext aio_context{1}; From ea01f6a3ff20fa76c67cc064ba44455aa0fe80f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:12:19 +0300 Subject: [PATCH 102/151] Addition to prev. revision #2520 --- dbms/src/IO/AIOContextPool.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/IO/AIOContextPool.cpp b/dbms/src/IO/AIOContextPool.cpp index 888cad09a90..336c03be7dd 100644 --- a/dbms/src/IO/AIOContextPool.cpp +++ b/dbms/src/IO/AIOContextPool.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include From 4b3b6b0d9b2a724cf7f475838814266c6f810044 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:14:29 +0300 Subject: [PATCH 103/151] Addition to prev. revision #2520 --- utils/iotest/iotest_aio.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index bfab8249d19..098da2f6170 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -22,7 +22,10 @@ #include #include #if !defined(__APPLE__) && !defined(__FreeBSD__) -#include + /// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h + #define timespec linux_timespec + #include + #undef timespec #endif #include From 447ffe32a96302f9400216d48e6f72754b17e138 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:15:19 +0300 Subject: [PATCH 104/151] Addition to prev. revision #2520 --- dbms/src/IO/AIO.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index 1cb55ad1f96..7ed767b1c7f 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -4,8 +4,10 @@ /// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h #define timespec linux_timespec +#define timeval linux_timeval #include #undef timespec +#undef timeval /** Small wrappers for asynchronous I/O. From 4871fb3626e2b61fb60d35ffbbf5094b223b6833 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:16:00 +0300 Subject: [PATCH 105/151] Addition to prev. revision #2520 --- dbms/src/IO/AIO.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index 7ed767b1c7f..150d5d107b2 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -5,9 +5,11 @@ /// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h #define timespec linux_timespec #define timeval linux_timeval +#define itimerspec linux_itimerspec #include #undef timespec #undef timeval +#undef itimerspec /** Small wrappers for asynchronous I/O. From 187d93ea82409c61161ea7f6c0af936f7c1f5f58 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:17:00 +0300 Subject: [PATCH 106/151] Addition to prev. revision #2520 --- dbms/src/IO/AIO.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index 150d5d107b2..e21a316a770 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -6,10 +6,14 @@ #define timespec linux_timespec #define timeval linux_timeval #define itimerspec linux_itimerspec +#define sigset_t linux_sigset_t + #include + #undef timespec #undef timeval #undef itimerspec +#undef sigset_t /** Small wrappers for asynchronous I/O. From eefc4fd9c56df6350b88776d639760d563735384 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:19:42 +0300 Subject: [PATCH 107/151] Addition to prev. revision #2520 --- utils/iotest/iotest_aio.cpp | 50 +++---------------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index 098da2f6170..57d7a3bcf11 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -13,45 +13,21 @@ #include #include #include -#if !defined(__APPLE__) && !defined(__FreeBSD__) -#include -#endif #include #include #include #include #include +#include + #if !defined(__APPLE__) && !defined(__FreeBSD__) - /// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h - #define timespec linux_timespec - #include - #undef timespec + #include #endif #include using DB::throwFromErrno; -inline int io_setup(unsigned nr, aio_context_t *ctxp) -{ - return syscall(__NR_io_setup, nr, ctxp); -} - -inline int io_destroy(aio_context_t ctx) -{ - return syscall(__NR_io_destroy, ctx); -} - -inline int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) -{ - return syscall(__NR_io_submit, ctx, nr, iocbpp); -} - -inline int io_getevents(aio_context_t ctx, long min_nr, long max_nr, - struct io_event *events, struct timespec *timeout) -{ - return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout); -} enum Mode { @@ -60,27 +36,9 @@ enum Mode }; -struct AioContext -{ - aio_context_t ctx; - - AioContext() - { - ctx = 0; - if (io_setup(128, &ctx) < 0) - throwFromErrno("io_setup failed"); - } - - ~AioContext() - { - io_destroy(ctx); - } -}; - - void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t buffers_count, size_t count) { - AioContext ctx; + AIOContext ctx; std::vector buffers(buffers_count); for (size_t i = 0; i < buffers_count; ++i) From a107f113f1731d24c9d0e20993502da34a3f32ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Jun 2018 05:32:49 +0300 Subject: [PATCH 108/151] CI scripts: added support for yum #2520 --- ci/install-os-packages.sh | 52 ++++++++++++++++++++++++++++++++++ ci/prepare-toolchain.sh | 1 + libs/libmysqlxx/CMakeLists.txt | 1 - 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/ci/install-os-packages.sh b/ci/install-os-packages.sh index e3e7e88044a..4aae6268aa1 100755 --- a/ci/install-os-packages.sh +++ b/ci/install-os-packages.sh @@ -34,6 +34,9 @@ case $PACKAGE_MANAGER in cmake) $SUDO apt-get install -y cmake3 || $SUDO apt-get install -y cmake ;; + ninja) + $SUDO apt-get install -y ninja-build + ;; curl) $SUDO apt-get install -y curl ;; @@ -69,6 +72,52 @@ case $PACKAGE_MANAGER in ;; esac ;; + yum) + case $WHAT in + prepare) + ;; + svn) + $SUDO yum install -y subversion + ;; + gcc*) + $SUDO yum install -y gcc gcc-c++ libstdc++-static + ;; + git) + $SUDO yum install -y git + ;; + cmake) + $SUDO yum install -y cmake + ;; + ninja) + $SUDO yum install -y ninja-build + ;; + curl) + $SUDO yum install -y curl + ;; + jq) + $SUDO yum install -y jq + ;; + libssl-dev) + $SUDO yum install -y openssl-devel + ;; + libicu-dev) + $SUDO yum install -y libicu-devel + ;; + libreadline-dev) + $SUDO yum install -y readline-devel + ;; + libunixodbc-dev) + $SUDO yum install -y unixODBC-devel libtool-ltdl-devel + ;; + libmariadbclient-dev) + echo "There is no package with static mysqlclient library"; echo 1; + #$SUDO yum install -y mariadb-connector-c-devel + ;; + *) + echo "Unknown package"; exit 1; + ;; + esac + ;; pkg) case $WHAT in prepare) @@ -88,6 +137,9 @@ case $PACKAGE_MANAGER in cmake) $SUDO pkg install -y cmake ;; + ninja) + $SUDO pkg install -y ninja-build + ;; curl) $SUDO pkg install -y curl ;; diff --git a/ci/prepare-toolchain.sh b/ci/prepare-toolchain.sh index 4718a854860..5edb19cc430 100755 --- a/ci/prepare-toolchain.sh +++ b/ci/prepare-toolchain.sh @@ -4,6 +4,7 @@ set -e -x source default-config ./install-os-packages.sh cmake +./install-os-packages.sh ninja if [[ "$COMPILER_INSTALL_METHOD" == "packages" ]]; then . install-compiler-from-packages.sh diff --git a/libs/libmysqlxx/CMakeLists.txt b/libs/libmysqlxx/CMakeLists.txt index d7a608994b4..6aa4dbe6a37 100644 --- a/libs/libmysqlxx/CMakeLists.txt +++ b/libs/libmysqlxx/CMakeLists.txt @@ -1,4 +1,3 @@ - add_library (mysqlxx ${SPLIT_SHARED} src/Connection.cpp src/Exception.cpp From 8eee32cdd250d34499a60fb90eb72a528695c8b2 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 14 Jun 2018 17:47:46 +0300 Subject: [PATCH 109/151] add ru changelog draft for the upcoming release --- CHANGELOG.draft.md | 11 --------- CHANGELOG_RU.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 11 deletions(-) delete mode 100644 CHANGELOG.draft.md diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md deleted file mode 100644 index ccef5afadf6..00000000000 --- a/CHANGELOG.draft.md +++ /dev/null @@ -1,11 +0,0 @@ -en: - -## Improvements: -* `clickhouse-client`: option --ask-password for interactively ask for credentials #1044 - - - -ru: - -## Улучшения: -* `clickhouse-client`: опция --ask-password для интерактивного ввода пароля #1044 diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index c05bfcdb4fb..09087467692 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,60 @@ +# ClickHouse release 1.1.5438x, 2018-06-xx + +## Новые возможности: +* Добавлена возможность вычислять аргументы функции `and` только там, где они нужны ([Анастасия Царькова](https://github.com/yandex/ClickHouse/pull/2272)) +* Добавлена возможность JIT-компиляции в нативный код некоторых выражений ([pyos](https://github.com/yandex/ClickHouse/pull/2277)). +* Добавлена агрегатная функция `windowFunnel` ([sundy-li](https://github.com/yandex/ClickHouse/pull/2352)). +* Добавлена возможность записи в таблицу с движком MySQL и соответствующую табличную функцию ([sundy-li](https://github.com/yandex/ClickHouse/pull/2294)). +* Добавлена поддержка запроса `ALTER TABLE t DELETE WHERE` для реплицированных таблиц и таблица `system.mutations`. +* Добавлена поддержка запроса `ALTER TABLE t [REPLACE|ATTACH] PARTITION` для реплицированных таблиц. +* Добавлена возможность интерактивного ввода пароля в `clickhouse-client`. +* Добавлена возможность отправки логов сервера в syslog ([Александр Крашенинников](https://github.com/yandex/ClickHouse/pull/2459)). +* Добавлено несколько новых `SYSTEM`-запросов для реплицированных таблиц (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|REPLICATED SENDS|REPLICATION QUEUES]`). +* Добавлены функции `startsWith` и `endsWith` для строк ([Вадим Плахтинский](https://github.com/yandex/ClickHouse/pull/2429)). +* Добавлена поддержка запроса `TRUNCATE TABLE` ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2260)) +* Добавлена поддержка логирования в словарях с источником shared library ([Александр Сапин](https://github.com/yandex/ClickHouse/pull/2472)). +* Добавлена поддержка произвольного разделителя в формате CSV ([Иван Жуков](https://github.com/yandex/ClickHouse/pull/2263)) +* Добавлена настройка `date_time_input_format`. Если переключить эту настройку в значение `'best_effort'`, значения DateTime будут читаться в широком диапазоне форматов. + +## Исправление ошибок: +* Исправлена ошибка при чтении столбца-массива из Nested-структуры ([#2066](https://github.com/yandex/ClickHouse/issues/2066)). +* Исправлено соответствие типов в табличной функции ODBC ([sundy-li](https://github.com/yandex/ClickHouse/pull/2268)). +* Исправлено применение настроек из параметров командной строки в программе clickhouse-local. +* Исправлена ошибка при анализе запросов с секцией HAVING вида `HAVING tuple IN (...)`. +* Исправлена ошибка при анализе запросов с рекурсивными алиасами. +* Запросы с `ARRAY JOIN` и `arrayFilter` возвращали некорректный результат. +* Исправлено некорректное сравнение типов `DateTime` с таймзоной и без неё ([Александр Бочаров](https://github.com/yandex/ClickHouse/pull/2400)). +* После `CLEAR COLUMN IN PARTITION` в соответствующей партиции теперь возможны слияния ([#2315](https://github.com/yandex/ClickHouse/issues/2315)). +* Исправлено появление дублей в запросе с `DISTINCT` и `ORDER BY`. +* Исправлена вставка в материализованное представление в случае, если движок таблицы представления - Distributed ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2411)). +* Исправлено отсечение ненужных кусков при запросе с условием на столбцы ключа партиционирования ([#2342](https://github.com/yandex/ClickHouse/issues/2342)). +* Настройки профиля пользователя не применялись при использовании сессий в HTTP-интерфейсе. +* Исправлена SSRF в табличной функции remote(). +* Клиентская библиотека ZooKeeper теперь использует таймаут сессии, полученный от сервера. +* Исправлен синтаксический разбор и форматирование оператора `CAST`. +* Исправлен race condition при записи данных из движка `Kafka` в материализованные представления ([Yangkuan Liu](https://github.com/yandex/ClickHouse/pull/2448)). + +## Улучшения: +* Сервер с реплицированными таблицами теперь может стартовать, даже если не сконфигурирован ZooKeeper. +* При расчёте количества доступных ядер CPU теперь учитываются ограничения cgroups ([Atri Sharma](https://github.com/yandex/ClickHouse/pull/2325)). +* DNS-кэш теперь автоматически обновляется при большом числе сетевых ошибок. +* Фоновые задачи в реплицированных таблицах теперь выполняются не в отдельных потоках, а в пуле потоков ([Silviu Caragea](https://github.com/yandex/ClickHouse/pull/1722)) +* Вставка в таблицу теперь не происходит, если вставка в одно из её материализованных представлений невозможна из-за того, что в нём много кусков. +* Разрешены выражения вида `tuple IN (SELECT tuple)`, если типы кортежей совпадают. +* Ускорен анализ запроса с большим числом JOIN-ов и подзапросов. +* Исправлено несоответствие в значениях счётчиков событий `Query`, `SelectQuery`, `InsertQuery`. + +## Изменения сборки: +* Используемая версия библиотеки librdkafka обновлена до v0.11.4. +* Исправлена сборка с использованием библиотеки vectorclass ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2274)). +* Добавлена возможность сборки компилятором gcc8. +* Добавлена возможность сборки llvm из submodule. +* Cmake теперь по умолчанию генерирует файлы для ninja (как при использовании `-G Ninja`). + +## Обратно несовместимые изменения: +* Убран escaping в форматах `Vertical` и `Pretty*`. + + # ClickHouse release 1.1.54385, 2018-06-01 ## Исправление ошибок: * Исправлена ошибка, которая в некоторых случаях приводила к блокировке операций с ZooKeeper. From a4e6f726e8251fa107dc1ed430cfdcd16b015769 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 19 Jun 2018 00:13:11 +0300 Subject: [PATCH 110/151] Build and test fixes --- cmake/find_llvm.cmake | 4 ++-- dbms/tests/clickhouse-test | 5 +++++ dbms/tests/queries/0_stateless/00505_secure.sh | 10 +++++----- dbms/tests/queries/0_stateless/00638_remote_ssrf.sh | 6 ++++++ dbms/tests/queries/0_stateless/99999_prepare.sql | 1 - debian/pbuilder-hooks/B90test-server | 9 ++++++--- 6 files changed, 24 insertions(+), 11 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/99999_prepare.sql diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index 3b04af7be34..6e45f715552 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -26,7 +26,7 @@ if (ENABLE_EMBEDDED_COMPILER) if (LLVM_FOUND) # Remove dynamically-linked zlib and libedit from LLVM's dependencies: - set_target_properties(LLVMSupport PROPERTIES INTERFACE_LINK_LIBRARIES "-lpthread;LLVMDemangle") + set_target_properties(LLVMSupport PROPERTIES INTERFACE_LINK_LIBRARIES "-lpthread;LLVMDemangle;${ZLIB_LIBRARIES}") set_target_properties(LLVMLineEditor PROPERTIES INTERFACE_LINK_LIBRARIES "LLVMSupport") option(LLVM_HAS_RTTI "Enable if LLVM was build with RTTI enabled" ON) @@ -65,6 +65,6 @@ function(llvm_libs_all REQUIRED_LLVM_LIBRARIES) if (TERMCAP_LIBRARY) list (APPEND result ${TERMCAP_LIBRARY}) endif () - list (APPEND result ${CMAKE_DL_LIBS}) + list (APPEND result ${CMAKE_DL_LIBS} ${ZLIB_LIBRARIES}) set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE) endfunction() diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 37e1b53c117..f734b784f9b 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -77,6 +77,8 @@ def main(args): os.environ.setdefault("CLICKHOUSE_BINARY", args.binary) os.environ.setdefault("CLICKHOUSE_CLIENT", args.client) os.environ.setdefault("CLICKHOUSE_CONFIG", args.configserver) + if args.configclient: + os.environ.setdefault("CLICKHOUSE_CONFIG_CLIENT", args.configclient) os.environ.setdefault("CLICKHOUSE_TMP", tmp_dir) # TODO ! use clickhouse-extract-from-config here: @@ -101,6 +103,9 @@ def main(args): skipped_total = 0 failures_total = 0 + clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) + clickhouse_proc_create.communicate("CREATE DATABASE IF NOT EXISTS test") + for suite in sorted(os.listdir(base_dir)): if SERVER_DIED: break diff --git a/dbms/tests/queries/0_stateless/00505_secure.sh b/dbms/tests/queries/0_stateless/00505_secure.sh index 80da07d3986..4394d23444d 100755 --- a/dbms/tests/queries/0_stateless/00505_secure.sh +++ b/dbms/tests/queries/0_stateless/00505_secure.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -#set -x +# set -x CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) if [ -n $CLICKHOUSE_CONFIG_CLIENT ]; then - USE_CONFIG="--config $CLICKHOUSE_CONFIG_CLIENT" + USE_CONFIG="--config-file $CLICKHOUSE_CONFIG_CLIENT" fi @@ -19,13 +19,13 @@ if [ -z $tcp_port_secure ]; then cat $CURDIR/00505_secure.reference else + CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:="${CLICKHOUSE_BINARY}-client"} if [[ $CLICKHOUSE_CLIENT != *"--port"* ]]; then - CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:="$CLICKHOUSE_CLIENT $USE_CONFIG --secure --port=$CLICKHOUSE_PORT_TCP_SECURE"} + CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:="$CLICKHOUSE_CLIENT_BINARY $USE_CONFIG --secure --port=$CLICKHOUSE_PORT_TCP_SECURE"} # Auto port detect. Cant test with re-definedvia command line ports - $CLICKHOUSE_CLIENT $USE_CONFIG --secure -q "SELECT 1"; + $CLICKHOUSE_CLIENT_BINARY $USE_CONFIG --secure -q "SELECT 1"; else - CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:="${CLICKHOUSE_BINARY}-client"} CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:="$CLICKHOUSE_CLIENT_BINARY $USE_CONFIG --secure --port=$CLICKHOUSE_PORT_TCP_SECURE"} echo 1 fi diff --git a/dbms/tests/queries/0_stateless/00638_remote_ssrf.sh b/dbms/tests/queries/0_stateless/00638_remote_ssrf.sh index b998424b530..6d9a87f7235 100755 --- a/dbms/tests/queries/0_stateless/00638_remote_ssrf.sh +++ b/dbms/tests/queries/0_stateless/00638_remote_ssrf.sh @@ -10,10 +10,14 @@ url="${CLICKHOUSE_PORT_HTTP_PROTO}://$address:$port/" # Port is arbitary +# nc does not support -w option with -l and will wait forever for first packet nc -l -p 61845 -q 0 > /dev/null & ${CLICKHOUSE_CURL} -sS $url --data-binary "SELECT * FROM remote('localhost:61845', system.one, 'user', 'password')" > /dev/null 2>&1 +# Send packet to close listening nc (if clickhouse fails to send). +( echo -e "Finish him\n" | nc localhost 61845 ) 2>/dev/null || true + wait nc -l -p 61846 -q 0 > /dev/null & @@ -21,4 +25,6 @@ nc -l -p 61846 -q 0 > /dev/null & ${CLICKHOUSE_CURL} -sS $url --data-binary "SELECT * FROM remote('localhost:61846', system.one, 'user', 'passw ord')" 2>&1 | grep -o 'must not contain ASCII control characters' +( echo -e "Finish him\n" | nc localhost 61846 ) 2>/dev/null || true + wait diff --git a/dbms/tests/queries/0_stateless/99999_prepare.sql b/dbms/tests/queries/0_stateless/99999_prepare.sql deleted file mode 100644 index e68c2efea8d..00000000000 --- a/dbms/tests/queries/0_stateless/99999_prepare.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE DATABASE IF NOT EXISTS test; diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index 041efdf041d..c4524a6ef6f 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -44,16 +44,19 @@ if [ "${TEST_CONNECT}" ]; then [ "${TEST_PORT_RANDOM}" ] && echo "${CLICKHOUSE_PORT_HTTP}${CLICKHOUSE_PORT_TCP}${CLICKHOUSE_PORT_INTERSERVER}" > /etc/clickhouse-server/config.d/port.xml if [ "${TEST_SSL}" ]; then - [ "${TEST_PORT_RANDOM}" ] && echo "${CLICKHOUSE_PORT_HTTPS}${CLICKHOUSE_PORT_TCP_SECURE}" > /etc/clickhouse-server/config.d/ssl.xml - echo "noneAcceptCertificateHandler" > /etc/clickhouse-client/config.d/ssl.xml + echo "${CLICKHOUSE_PORT_HTTPS}${CLICKHOUSE_PORT_TCP_SECURE}" > /etc/clickhouse-server/config.d/ssl.xml + echo "${CLICKHOUSE_PORT_TCP_SECURE}${CLICKHOUSE_PORT_TCP}noneAcceptCertificateHandler" > /etc/clickhouse-client/config.xml openssl dhparam -out /etc/clickhouse-server/dhparam.pem 256 openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt - chmod a+r /etc/clickhouse-server/* + chmod a+r /etc/clickhouse-server/* /etc/clickhouse-client/* CLIENT_ADD+="--secure --port $CLICKHOUSE_PORT_TCP_SECURE" else CLIENT_ADD+="--port $CLICKHOUSE_PORT_TCP" fi + # For debug + # tail -n +1 -- /etc/clickhouse-server/*.xml /etc/clickhouse-server/config.d/*.xml || true + function finish { service clickhouse-server stop tail -n 100 /var/log/clickhouse-server/*.log /var/log/clickhouse-server/stderr || true From 8eda273b0203ef3eee5c79ff9e0103c8c1ea0463 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 19 Jun 2018 00:14:17 +0300 Subject: [PATCH 111/151] Fixed build [#CLICKHOUSE-2] --- libs/libglibc-compatibility/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 3aeea0ffd05..2085a148bad 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -1,5 +1,9 @@ enable_language (ASM) +if (COMPILER_CLANG) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-builtin-requires-header") +endif () + add_library (glibc-compatibility glibc-compatibility.c musl/pipe2.c musl/fallocate.c From 16565bf111c9bc3bdfab2c252dd8a6f7c45d3278 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 19 Jun 2018 00:15:35 +0300 Subject: [PATCH 112/151] Fixed build [#CLICKHOUSE-2] --- libs/libglibc-compatibility/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 2085a148bad..bb72155b605 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -1,7 +1,7 @@ enable_language (ASM) if (COMPILER_CLANG) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-builtin-requires-header") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-builtin-requires-header") endif () add_library (glibc-compatibility glibc-compatibility.c From fbe7df73a24733f4ce0b133cb83b8acf962398a3 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 19 Jun 2018 21:09:09 +0300 Subject: [PATCH 113/151] CLICKHOUSE-3762 Build fixes (#2488) CONTRIB-254 --- cmake/dbms_generate_function.cmake | 6 +-- cmake/find_gtest.cmake | 16 ++---- cmake/find_readline_edit.cmake | 14 ++++-- cmake/lib_name.cmake | 2 +- contrib/CMakeLists.txt | 2 +- .../CMakeLists.txt | 0 contrib/{libcityhash => cityhash102}/COPYING | 0 .../include/city.h | 0 .../include/citycrc.h | 0 .../{libcityhash => cityhash102}/src/city.cc | 0 .../{libcityhash => cityhash102}/src/config.h | 0 contrib/libfarmhash/CMakeLists.txt | 2 +- contrib/libmetrohash/CMakeLists.txt | 2 +- dbms/CMakeLists.txt | 2 - dbms/programs/CMakeLists.txt | 2 + dbms/programs/clang/CMakeLists.txt | 1 - dbms/programs/client/CMakeLists.txt | 1 - dbms/programs/compressor/CMakeLists.txt | 1 - dbms/programs/local/CMakeLists.txt | 1 - dbms/programs/main.cpp | 49 +++++++++++-------- dbms/programs/obfuscator/Obfuscator.cpp | 1 + dbms/programs/server/CMakeLists.txt | 1 - dbms/src/Client/ConnectionPool.h | 19 ++++--- dbms/src/Common/SipHash.h | 5 +- dbms/src/Common/StackTrace.cpp | 9 +++- dbms/src/Common/Stopwatch.h | 5 +- dbms/src/Common/Throttler.h | 1 + dbms/src/Common/randomSeed.cpp | 4 +- dbms/src/Common/tests/Random.cpp | 4 +- dbms/src/Common/tests/int_hashes_perf.cpp | 7 +-- .../tests/integer_hash_tables_and_hashes.cpp | 4 +- dbms/src/Core/NamesAndTypes.cpp | 2 +- dbms/src/Core/tests/string_pool.cpp | 4 +- dbms/src/DataStreams/BlockStreamProfileInfo.h | 5 -- dbms/src/Functions/CMakeLists.txt | 8 +-- dbms/src/Functions/FunctionsHashing.h | 7 +-- dbms/src/Functions/FunctionsURL.h | 5 +- dbms/src/Functions/registerFunction.cpp.in | 2 +- dbms/src/IO/CachedCompressedReadBuffer.h | 2 +- dbms/src/IO/CompressedReadBufferFromFile.h | 2 +- dbms/src/IO/ReadBufferFromFileBase.h | 5 +- dbms/src/Interpreters/Aggregator.cpp | 6 +++ dbms/src/Interpreters/AsynchronousMetrics.cpp | 2 + dbms/src/Interpreters/Compiler.cpp | 12 ++++- dbms/src/Interpreters/DDLWorker.cpp | 8 --- dbms/src/Interpreters/tests/CMakeLists.txt | 2 - dbms/src/Interpreters/tests/hash_map.cpp | 8 +-- .../Interpreters/tests/hash_map_string_3.cpp | 2 +- dbms/src/Storages/ITableDeclaration.cpp | 8 +-- dbms/src/Storages/MergeTree/MergeTreeReader.h | 1 + debian/rules | 5 +- libs/libcommon/CMakeLists.txt | 20 ++------ .../include/common/config_common.h.in | 1 - libs/libcommon/include/common/readline_use.h | 2 + .../{common/apple_rt.h => port/clock.h} | 23 +++++++-- .../apple_memrchr.h => port/memrchr.h} | 10 +++- libs/libcommon/src/apple_rt.cpp | 6 ++- libs/libdaemon/cmake/find_unwind.cmake | 4 +- utils/check_include.sh | 2 +- utils/iotest/CMakeLists.txt | 6 +-- utils/iotest/iotest.cpp | 4 +- utils/iotest/iotest_aio.cpp | 5 ++ utils/iotest/iotest_nonblock.cpp | 4 +- 63 files changed, 181 insertions(+), 163 deletions(-) rename contrib/{libcityhash => cityhash102}/CMakeLists.txt (100%) rename contrib/{libcityhash => cityhash102}/COPYING (100%) rename contrib/{libcityhash => cityhash102}/include/city.h (100%) rename contrib/{libcityhash => cityhash102}/include/citycrc.h (100%) rename contrib/{libcityhash => cityhash102}/src/city.cc (100%) rename contrib/{libcityhash => cityhash102}/src/config.h (100%) rename libs/libcommon/include/{common/apple_rt.h => port/clock.h} (56%) rename libs/libcommon/include/{common/apple_memrchr.h => port/memrchr.h} (95%) diff --git a/cmake/dbms_generate_function.cmake b/cmake/dbms_generate_function.cmake index ef35a623e43..ae701f798fe 100644 --- a/cmake/dbms_generate_function.cmake +++ b/cmake/dbms_generate_function.cmake @@ -1,10 +1,10 @@ function(generate_function_register FUNCTION_AREA) foreach(FUNCTION IN LISTS ARGN) - configure_file (registerFunction.h.in register${FUNCTION}.h) - configure_file (registerFunction.cpp.in register${FUNCTION}.cpp) + configure_file (registerFunction.h.in ${FUNCTIONS_GENERATED_DIR}register${FUNCTION}.h) + configure_file (registerFunction.cpp.in ${FUNCTIONS_GENERATED_DIR}register${FUNCTION}.cpp) set(REGISTER_HEADERS "${REGISTER_HEADERS}#include \"register${FUNCTION}.h\"\n") set(REGISTER_FUNCTIONS "${REGISTER_FUNCTIONS} register${FUNCTION}(factory);\n") endforeach() - configure_file (registerFunctions_area.cpp.in registerFunctions${FUNCTION_AREA}.cpp) + configure_file (registerFunctions_area.cpp.in ${FUNCTIONS_GENERATED_DIR}registerFunctions${FUNCTION_AREA}.cpp) endfunction() diff --git a/cmake/find_gtest.cmake b/cmake/find_gtest.cmake index 5a861e7ec24..ce0a3d32785 100644 --- a/cmake/find_gtest.cmake +++ b/cmake/find_gtest.cmake @@ -9,20 +9,12 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeList endif () if (NOT USE_INTERNAL_GTEST_LIBRARY) - find_path (GTEST_INCLUDE_DIR NAMES /gtest/gtest.h PATHS ${GTEST_INCLUDE_PATHS}) - find_path (GTEST_ROOT NAMES src/gtest-all.cc PATHS /usr/src/googletest/googletest /usr/src/gtest) + find_package (GTest) endif () -if (GTEST_INCLUDE_DIR AND GTEST_ROOT) - # googletest package have no lib - add_library(gtest ${GTEST_ROOT}/src/gtest-all.cc) - add_library(gtest_main ${GTEST_ROOT}/src/gtest_main.cc) - target_include_directories(gtest PRIVATE ${GTEST_ROOT}) - target_link_libraries(gtest_main gtest) - set (GTEST_LIBRARY gtest_main) -elseif (NOT MISSING_INTERNAL_GTEST_LIBRARY) +if (NOT GTEST_INCLUDE_DIRS AND NOT MISSING_INTERNAL_GTEST_LIBRARY) set (USE_INTERNAL_GTEST_LIBRARY 1) - set (GTEST_LIBRARY gtest_main) + set (GTEST_MAIN_LIBRARIES gtest_main) endif () -message (STATUS "Using gtest: ${GTEST_INCLUDE_DIR} : ${GTEST_LIBRARY}") +message (STATUS "Using gtest: ${GTEST_INCLUDE_DIRS} : ${GTEST_MAIN_LIBRARIES}") diff --git a/cmake/find_readline_edit.cmake b/cmake/find_readline_edit.cmake index 50373f11952..c2bba6cbfab 100644 --- a/cmake/find_readline_edit.cmake +++ b/cmake/find_readline_edit.cmake @@ -1,6 +1,9 @@ include (CMakePushCheckState) cmake_push_check_state () +option (ENABLE_READLINE "Enable readline" 1) +if (ENABLE_READLINE) + set (READLINE_PATHS "/usr/local/opt/readline/lib") # First try find custom lib for macos users (default lib without history support) find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH) @@ -10,30 +13,31 @@ endif () list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2) -find_library (TERMCAP_LIB NAMES termcap tinfo) find_library (EDIT_LIB NAMES edit) set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") -if (READLINE_LIB AND TERMCAP_LIB) +if (READLINE_LIB AND TERMCAP_LIBRARY) find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH) if (NOT READLINE_INCLUDE_DIR) find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) endif () if (READLINE_INCLUDE_DIR AND READLINE_LIB) set (USE_READLINE 1) - set (LINE_EDITING_LIBS ${READLINE_LIB} ${TERMCAP_LIB}) + set (LINE_EDITING_LIBS ${READLINE_LIB} ${TERMCAP_LIBRARY}) message (STATUS "Using line editing libraries (readline): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") endif () -elseif (EDIT_LIB AND TERMCAP_LIB) +elseif (EDIT_LIB AND TERMCAP_LIBRARY) find_library (CURSES_LIB NAMES curses) find_path (READLINE_INCLUDE_DIR NAMES editline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) if (CURSES_LIB AND READLINE_INCLUDE_DIR) set (USE_LIBEDIT 1) - set (LINE_EDITING_LIBS ${EDIT_LIB} ${CURSES_LIB} ${TERMCAP_LIB}) + set (LINE_EDITING_LIBS ${EDIT_LIB} ${CURSES_LIB} ${TERMCAP_LIBRARY}) message (STATUS "Using line editing libraries (edit): ${READLINE_INCLUDE_DIR} : ${LINE_EDITING_LIBS}") endif () endif () +endif () + if (LINE_EDITING_LIBS AND READLINE_INCLUDE_DIR) include (CheckCXXSourceRuns) diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake index 79e1b3e19e4..b49276fc279 100644 --- a/cmake/lib_name.cmake +++ b/cmake/lib_name.cmake @@ -1,5 +1,5 @@ set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide) -set(CITYHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcityhash/include) +set(CITYHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include) set(COMMON_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libcommon/include ${ClickHouse_BINARY_DIR}/libs/libcommon/include) set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/dbms/src ${ClickHouse_BINARY_DIR}/dbms/src) set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ef6bb617861..2f5e003fc2f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -26,7 +26,7 @@ if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY) endif () if (USE_INTERNAL_CITYHASH_LIBRARY) - add_subdirectory (libcityhash) + add_subdirectory (cityhash102) endif () if (USE_INTERNAL_FARMHASH_LIBRARY) diff --git a/contrib/libcityhash/CMakeLists.txt b/contrib/cityhash102/CMakeLists.txt similarity index 100% rename from contrib/libcityhash/CMakeLists.txt rename to contrib/cityhash102/CMakeLists.txt diff --git a/contrib/libcityhash/COPYING b/contrib/cityhash102/COPYING similarity index 100% rename from contrib/libcityhash/COPYING rename to contrib/cityhash102/COPYING diff --git a/contrib/libcityhash/include/city.h b/contrib/cityhash102/include/city.h similarity index 100% rename from contrib/libcityhash/include/city.h rename to contrib/cityhash102/include/city.h diff --git a/contrib/libcityhash/include/citycrc.h b/contrib/cityhash102/include/citycrc.h similarity index 100% rename from contrib/libcityhash/include/citycrc.h rename to contrib/cityhash102/include/citycrc.h diff --git a/contrib/libcityhash/src/city.cc b/contrib/cityhash102/src/city.cc similarity index 100% rename from contrib/libcityhash/src/city.cc rename to contrib/cityhash102/src/city.cc diff --git a/contrib/libcityhash/src/config.h b/contrib/cityhash102/src/config.h similarity index 100% rename from contrib/libcityhash/src/config.h rename to contrib/cityhash102/src/config.h diff --git a/contrib/libfarmhash/CMakeLists.txt b/contrib/libfarmhash/CMakeLists.txt index 16471da3f2f..20bba58cde7 100644 --- a/contrib/libfarmhash/CMakeLists.txt +++ b/contrib/libfarmhash/CMakeLists.txt @@ -6,4 +6,4 @@ if (MSVC) target_compile_definitions (farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1) endif () -target_include_directories (farmhash PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories (farmhash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt index 6947b92e054..2bd5628d0f8 100644 --- a/contrib/libmetrohash/CMakeLists.txt +++ b/contrib/libmetrohash/CMakeLists.txt @@ -10,4 +10,4 @@ add_library(metrohash src/metrohash128.cpp ${SOURCES_SSE42_ONLY}) -target_include_directories(metrohash PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(metrohash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 2c7939a2b77..119072158e7 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -101,7 +101,6 @@ endif () if (USE_EMBEDDED_COMPILER) llvm_libs_all(REQUIRED_LLVM_LIBRARIES) - target_link_libraries (dbms ${REQUIRED_LLVM_LIBRARIES}) target_include_directories (dbms SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) endif () @@ -242,7 +241,6 @@ target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_C target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) add_subdirectory (programs) - add_subdirectory (tests) if (ENABLE_TESTS) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 0a680ce87bc..20baa6b039c 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -36,6 +36,8 @@ if (CLICKHOUSE_SPLIT_BINARY) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) endif () + set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) + add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS}) add_custom_target (clickhouse ALL DEPENDS clickhouse-bundle) else () diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt index ab5934e071c..00d7215e74c 100644 --- a/dbms/programs/clang/CMakeLists.txt +++ b/dbms/programs/clang/CMakeLists.txt @@ -10,7 +10,6 @@ if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-lld clickhouse-lld.cpp) target_link_libraries (clickhouse-lld clickhouse-compiler-lib) install (TARGETS clickhouse-clang clickhouse-lld RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - set_target_properties(clickhouse-clang clickhouse-lld PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) endif () endif () diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 89c34669a6b..c7d2311b11e 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -4,7 +4,6 @@ target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCL if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-client clickhouse-client.cpp) - set_target_properties(clickhouse-client PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_link_libraries (clickhouse-client clickhouse-client-lib) endif () diff --git a/dbms/programs/compressor/CMakeLists.txt b/dbms/programs/compressor/CMakeLists.txt index 59f1b2563d1..aa48a8b5f0e 100644 --- a/dbms/programs/compressor/CMakeLists.txt +++ b/dbms/programs/compressor/CMakeLists.txt @@ -4,6 +4,5 @@ target_link_libraries (clickhouse-compressor-lib clickhouse_common_io ${Boost_PR if (CLICKHOUSE_SPLIT_BINARY) # Also in utils add_executable (clickhouse-compressor clickhouse-compressor.cpp) - set_target_properties(clickhouse-compressor PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_link_libraries (clickhouse-compressor clickhouse-compressor-lib) endif () diff --git a/dbms/programs/local/CMakeLists.txt b/dbms/programs/local/CMakeLists.txt index c68887b129e..ae964f0c91a 100644 --- a/dbms/programs/local/CMakeLists.txt +++ b/dbms/programs/local/CMakeLists.txt @@ -3,6 +3,5 @@ target_link_libraries (clickhouse-local-lib clickhouse-server-lib clickhouse_fun if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-local clickhouse-local.cpp) - set_target_properties(clickhouse-local PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_link_libraries (clickhouse-local clickhouse-local-lib) endif () diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 26001b3f307..36ab99a7f8e 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -1,11 +1,18 @@ -#include -#include -#include #include #include #include #include /// pair +#if __has_include("config_tools.h") +#include "config_tools.h" +#endif +#if __has_include() /// "Arcadia" build system lacks configure files. +#include +#endif +#if __has_include() +#include +#endif + #if USE_TCMALLOC #include #endif @@ -19,31 +26,31 @@ #include /// Universal executable for various clickhouse applications -#if ENABLE_CLICKHOUSE_SERVER +#if ENABLE_CLICKHOUSE_SERVER || !defined(ENABLE_CLICKHOUSE_SERVER) int mainEntryClickHouseServer(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_CLIENT +#if ENABLE_CLICKHOUSE_CLIENT || !defined(ENABLE_CLICKHOUSE_CLIENT) int mainEntryClickHouseClient(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_LOCAL +#if ENABLE_CLICKHOUSE_LOCAL || !defined(ENABLE_CLICKHOUSE_LOCAL) int mainEntryClickHouseLocal(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_BENCHMARK +#if ENABLE_CLICKHOUSE_BENCHMARK || !defined(ENABLE_CLICKHOUSE_BENCHMARK) int mainEntryClickHouseBenchmark(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_PERFORMANCE +#if ENABLE_CLICKHOUSE_PERFORMANCE || !defined(ENABLE_CLICKHOUSE_PERFORMANCE) int mainEntryClickHousePerformanceTest(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG +#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG || !defined(ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) int mainEntryClickHouseExtractFromConfig(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_COMPRESSOR +#if ENABLE_CLICKHOUSE_COMPRESSOR || !defined(ENABLE_CLICKHOUSE_COMPRESSOR) int mainEntryClickHouseCompressor(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_FORMAT +#if ENABLE_CLICKHOUSE_FORMAT || !defined(ENABLE_CLICKHOUSE_FORMAT) int mainEntryClickHouseFormat(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_COPIER +#if ENABLE_CLICKHOUSE_COPIER || !defined(ENABLE_CLICKHOUSE_COPIER) int mainEntryClickHouseClusterCopier(int argc, char ** argv); #endif #if ENABLE_CLICKHOUSE_OBFUSCATOR @@ -64,31 +71,31 @@ using MainFunc = int (*)(int, char**); /// Add an item here to register new application std::pair clickhouse_applications[] = { -#if ENABLE_CLICKHOUSE_LOCAL +#if ENABLE_CLICKHOUSE_LOCAL || !defined(ENABLE_CLICKHOUSE_LOCAL) {"local", mainEntryClickHouseLocal}, #endif -#if ENABLE_CLICKHOUSE_CLIENT +#if ENABLE_CLICKHOUSE_CLIENT || !defined(ENABLE_CLICKHOUSE_CLIENT) {"client", mainEntryClickHouseClient}, #endif -#if ENABLE_CLICKHOUSE_BENCHMARK +#if ENABLE_CLICKHOUSE_BENCHMARK || !defined(ENABLE_CLICKHOUSE_BENCHMARK) {"benchmark", mainEntryClickHouseBenchmark}, #endif -#if ENABLE_CLICKHOUSE_SERVER +#if ENABLE_CLICKHOUSE_SERVER || !defined(ENABLE_CLICKHOUSE_SERVER) {"server", mainEntryClickHouseServer}, #endif -#if ENABLE_CLICKHOUSE_PERFORMANCE +#if ENABLE_CLICKHOUSE_PERFORMANCE || !defined(ENABLE_CLICKHOUSE_PERFORMANCE) {"performance-test", mainEntryClickHousePerformanceTest}, #endif -#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG +#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG || !defined(ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) {"extract-from-config", mainEntryClickHouseExtractFromConfig}, #endif -#if ENABLE_CLICKHOUSE_COMPRESSOR +#if ENABLE_CLICKHOUSE_COMPRESSOR || !defined(ENABLE_CLICKHOUSE_COMPRESSOR) {"compressor", mainEntryClickHouseCompressor}, #endif -#if ENABLE_CLICKHOUSE_FORMAT +#if ENABLE_CLICKHOUSE_FORMAT || !defined(ENABLE_CLICKHOUSE_FORMAT) {"format", mainEntryClickHouseFormat}, #endif -#if ENABLE_CLICKHOUSE_COPIER +#if ENABLE_CLICKHOUSE_COPIER || !defined(ENABLE_CLICKHOUSE_COPIER) {"copier", mainEntryClickHouseClusterCopier}, #endif #if ENABLE_CLICKHOUSE_OBFUSCATOR diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index c555ff2b6eb..a634a41f468 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/programs/server/CMakeLists.txt b/dbms/programs/server/CMakeLists.txt index 945f12051a6..74297d29864 100644 --- a/dbms/programs/server/CMakeLists.txt +++ b/dbms/programs/server/CMakeLists.txt @@ -15,7 +15,6 @@ target_include_directories (clickhouse-server-lib PUBLIC ${ClickHouse_SOURCE_DIR if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-server clickhouse-server.cpp) - set_target_properties(clickhouse-server PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_link_libraries (clickhouse-server clickhouse-server-lib) install (TARGETS clickhouse-server ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/src/Client/ConnectionPool.h b/dbms/src/Client/ConnectionPool.h index 12d9ca1d8ee..83d527a9e28 100644 --- a/dbms/src/Client/ConnectionPool.h +++ b/dbms/src/Client/ConnectionPool.h @@ -45,17 +45,24 @@ public: using Base = PoolBase; ConnectionPool(unsigned max_connections_, - const String & host_, UInt16 port_, + const String & host_, + UInt16 port_, const String & default_database_, - const String & user_, const String & password_, + const String & user_, + const String & password_, const ConnectionTimeouts & timeouts, const String & client_name_ = "client", Protocol::Compression compression_ = Protocol::Compression::Enable, Protocol::Secure secure_ = Protocol::Secure::Disable) - : Base(max_connections_, &Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), - host(host_), port(port_), default_database(default_database_), - user(user_), password(password_), - client_name(client_name_), compression(compression_), + : Base(max_connections_, + &Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), + host(host_), + port(port_), + default_database(default_database_), + user(user_), + password(password_), + client_name(client_name_), + compression(compression_), secure{secure_}, timeouts(timeouts) { diff --git a/dbms/src/Common/SipHash.h b/dbms/src/Common/SipHash.h index a9f007f7aa4..97852c2eb72 100644 --- a/dbms/src/Common/SipHash.h +++ b/dbms/src/Common/SipHash.h @@ -147,8 +147,11 @@ public: reinterpret_cast(out)[1] = v2 ^ v3; } - void get128(UInt64 & lo, UInt64 & hi) + /// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos + template + void get128(T & lo, T & hi) { + static_assert(sizeof(T) == 8); finalize(); lo = v0 ^ v1; hi = v2 ^ v3; diff --git a/dbms/src/Common/StackTrace.cpp b/dbms/src/Common/StackTrace.cpp index a1490ef4e59..28ec49fa7e8 100644 --- a/dbms/src/Common/StackTrace.cpp +++ b/dbms/src/Common/StackTrace.cpp @@ -9,10 +9,17 @@ #include #include +/// Arcadia compatibility DEVTOOLS-3976 +#if defined(BACKTRACE_INCLUDE) +#include BACKTRACE_INCLUDE +#endif +#if !defined(BACKTRACE_FUNC) + #define BACKTRACE_FUNC backtrace +#endif StackTrace::StackTrace() { - frames_size = backtrace(frames, STACK_TRACE_MAX_DEPTH); + frames_size = BACKTRACE_FUNC(frames, STACK_TRACE_MAX_DEPTH); } std::string StackTrace::toString() const diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index 62a881c0a31..334e1574dde 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -3,10 +3,7 @@ #include #include #include - -#ifdef __APPLE__ -#include -#endif +#include namespace StopWatchDetail diff --git a/dbms/src/Common/Throttler.h b/dbms/src/Common/Throttler.h index 22aa102cd1b..abc87ffae50 100644 --- a/dbms/src/Common/Throttler.h +++ b/dbms/src/Common/Throttler.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Common/randomSeed.cpp b/dbms/src/Common/randomSeed.cpp index 7618c41a020..8402fa4acf8 100644 --- a/dbms/src/Common/randomSeed.cpp +++ b/dbms/src/Common/randomSeed.cpp @@ -5,9 +5,7 @@ #include #include #include -#ifdef __APPLE__ -#include -#endif +#include namespace DB diff --git a/dbms/src/Common/tests/Random.cpp b/dbms/src/Common/tests/Random.cpp index e8aeee0a3ea..d926b21c133 100644 --- a/dbms/src/Common/tests/Random.cpp +++ b/dbms/src/Common/tests/Random.cpp @@ -1,8 +1,6 @@ /// Taken from SMHasher. -#ifdef __APPLE__ -#include -#endif #include "Random.h" +#include Rand g_rand1(1); Rand g_rand2(2); diff --git a/dbms/src/Common/tests/int_hashes_perf.cpp b/dbms/src/Common/tests/int_hashes_perf.cpp index 1174f27a4a4..851d7affb13 100644 --- a/dbms/src/Common/tests/int_hashes_perf.cpp +++ b/dbms/src/Common/tests/int_hashes_perf.cpp @@ -4,19 +4,14 @@ #include #include - #include #include #include #include - #include "AvalancheTest.h" /// Taken from SMHasher. +#include -#ifdef __APPLE__ -#include -#endif - void setAffinity() { #if !defined(__APPLE__) && !defined(__FreeBSD__) diff --git a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp index 59e64a407bc..86c301e04d2 100644 --- a/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp +++ b/dbms/src/Common/tests/integer_hash_tables_and_hashes.cpp @@ -331,8 +331,8 @@ void NO_INLINE testForEachMapAndHash(const Key * data, size_t size) testForEachHash(data, size, nothing); testForEachHash(data, size, nothing); - testForEachHash(data, size, [](auto & map){ map.set_empty_key(-1); }); - testForEachHash(data, size, nothing); + testForEachHash(data, size, [](auto & map){ map.set_empty_key(-1); }); + testForEachHash(data, size, nothing); } diff --git a/dbms/src/Core/NamesAndTypes.cpp b/dbms/src/Core/NamesAndTypes.cpp index a5240ba4a91..cbc4f4b159f 100644 --- a/dbms/src/Core/NamesAndTypes.cpp +++ b/dbms/src/Core/NamesAndTypes.cpp @@ -121,7 +121,7 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const { /// NOTE It's better to make a map in `IStorage` than to create it here every time again. - google::dense_hash_map types; + GOOGLE_NAMESPACE::dense_hash_map types; types.set_empty_key(StringRef()); for (const NameAndTypePair & column : *this) diff --git a/dbms/src/Core/tests/string_pool.cpp b/dbms/src/Core/tests/string_pool.cpp index e4bdc440d78..caaa6fd81c5 100644 --- a/dbms/src/Core/tests/string_pool.cpp +++ b/dbms/src/Core/tests/string_pool.cpp @@ -33,8 +33,8 @@ int main(int argc, char ** argv) using Vec = std::vector; using Set = std::unordered_map; using RefsSet = std::unordered_map; - using DenseSet = google::dense_hash_map; - using RefsDenseSet = google::dense_hash_map; + using DenseSet = GOOGLE_NAMESPACE::dense_hash_map; + using RefsDenseSet = GOOGLE_NAMESPACE::dense_hash_map; using RefsHashMap = HashMap; Vec vec; diff --git a/dbms/src/DataStreams/BlockStreamProfileInfo.h b/dbms/src/DataStreams/BlockStreamProfileInfo.h index 578b0ed5b26..f1adb5f9ebc 100644 --- a/dbms/src/DataStreams/BlockStreamProfileInfo.h +++ b/dbms/src/DataStreams/BlockStreamProfileInfo.h @@ -2,13 +2,8 @@ #include #include - #include -#if __APPLE__ -#include -#endif - namespace DB { diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 2f9907f2247..901ba7d50ba 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -1,6 +1,8 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) include(${ClickHouse_SOURCE_DIR}/cmake/dbms_generate_function.cmake) +set (FUNCTIONS_GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/) + generate_function_register(Arithmetic FunctionPlus FunctionMinus @@ -79,7 +81,9 @@ generate_function_register(Projection add_headers_and_sources(clickhouse_functions .) add_headers_and_sources(clickhouse_functions ./GatherUtils) add_headers_and_sources(clickhouse_functions ./Conditional) -add_headers_and_sources(clickhouse_functions ${ClickHouse_BINARY_DIR}/dbms/src/Functions) +#add_headers_and_sources(clickhouse_functions ${ClickHouse_BINARY_DIR}/dbms/src/Functions) +add_headers_and_sources(clickhouse_functions ${FUNCTIONS_GENERATED_DIR}) + list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp) list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h) @@ -88,8 +92,6 @@ add_library(clickhouse_functions ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) -target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash) -target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 2e81fffedb7..86d5274b881 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -806,13 +806,14 @@ struct ImplCityHash64 static auto Hash64(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); } }; +// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS struct ImplFarmHash64 { static constexpr auto name = "farmHash64"; - using uint128_t = farmhash::uint128_t; + using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t; - static auto Hash128to64(const uint128_t & x) { return farmhash::Hash128to64(x); } - static auto Hash64(const char * s, const size_t len) { return farmhash::Hash64(s, len); } + static auto Hash128to64(const uint128_t & x) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(x); } + static auto Hash64(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); } }; struct ImplMetroHash64 diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 19e8be2ba86..41290e38aff 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -8,10 +8,7 @@ #include #include #include - -#ifdef __APPLE__ -#include -#endif +#include namespace DB diff --git a/dbms/src/Functions/registerFunction.cpp.in b/dbms/src/Functions/registerFunction.cpp.in index 3f23a9ddfb0..71bab6292d5 100644 --- a/dbms/src/Functions/registerFunction.cpp.in +++ b/dbms/src/Functions/registerFunction.cpp.in @@ -1,4 +1,4 @@ -#include +#include "register@FUNCTION@.h" #include #include diff --git a/dbms/src/IO/CachedCompressedReadBuffer.h b/dbms/src/IO/CachedCompressedReadBuffer.h index 1364643d975..9be9dd01b1f 100644 --- a/dbms/src/IO/CachedCompressedReadBuffer.h +++ b/dbms/src/IO/CachedCompressedReadBuffer.h @@ -2,10 +2,10 @@ #include #include - #include #include #include +#include namespace DB diff --git a/dbms/src/IO/CompressedReadBufferFromFile.h b/dbms/src/IO/CompressedReadBufferFromFile.h index d07fa060adc..f1332ea4187 100644 --- a/dbms/src/IO/CompressedReadBufferFromFile.h +++ b/dbms/src/IO/CompressedReadBufferFromFile.h @@ -2,9 +2,9 @@ #include #include - #include #include +#include namespace DB diff --git a/dbms/src/IO/ReadBufferFromFileBase.h b/dbms/src/IO/ReadBufferFromFileBase.h index bdadc0ffe68..461a91102d9 100644 --- a/dbms/src/IO/ReadBufferFromFileBase.h +++ b/dbms/src/IO/ReadBufferFromFileBase.h @@ -6,10 +6,7 @@ #include #include #include - -#ifdef __APPLE__ -#include -#endif +#include namespace DB { diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 227e4e482d3..2413db3e204 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -23,7 +23,9 @@ #include #include #include +#if __has_include() #include +#endif namespace ProfileEvents @@ -173,6 +175,9 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type) compiled_if_possible = true; +#if !defined(INTERNAL_COMPILER_HEADERS) + throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE); +#else std::string method_typename; std::string method_typename_two_level; @@ -351,6 +356,7 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type) /// If the result is already ready. if (lib) on_ready(lib); +#endif } diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index ab5f8274a34..daa36651700 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -10,7 +10,9 @@ #include #include +#if __has_include() #include +#endif #if USE_TCMALLOC #include diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index 2217ff379c4..87958ca0fa4 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -10,8 +10,10 @@ #include #include #include -#include +#if __has_include() +#include +#endif namespace ProfileEvents { @@ -25,6 +27,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_DLOPEN; + extern const int CANNOT_COMPILE_CODE; } Compiler::Compiler(const std::string & path_, size_t threads) @@ -202,6 +205,9 @@ void Compiler::compile( { ProfileEvents::increment(ProfileEvents::CompileAttempt); +#if !defined(INTERNAL_COMPILER_EXECUTABLE) + throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE); +#else std::string prefix = path + "/" + file_name; std::string cpp_file_path = prefix + ".cpp"; std::string so_file_path = prefix + ".so"; @@ -250,7 +256,7 @@ void Compiler::compile( " -isystem " << compiler_headers_root << "/usr/include" #endif " -I " << compiler_headers << "/dbms/src/" - " -I " << compiler_headers << "/contrib/libcityhash/include/" + " -I " << compiler_headers << "/contrib/cityhash102/include/" " -I " << compiler_headers << "/contrib/libpcg-random/include/" " -I " << compiler_headers << INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR " -I " << compiler_headers << INTERNAL_Poco_Foundation_INCLUDE_DIR @@ -291,6 +297,8 @@ void Compiler::compile( ProfileEvents::increment(ProfileEvents::CompileSuccess); on_ready(lib); + +#endif } diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 1d46d33ac26..297fd217391 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -1,42 +1,34 @@ #include - #include #include #include #include #include - #include #include #include #include - #include #include - #include #include #include #include - #include #include #include #include - #include #include #include #include #include #include - #include #include #include #include #include - #include #include #include diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index c88ce789c81..026994f4532 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -26,8 +26,6 @@ target_link_libraries (hash_map_string_2 dbms) add_executable (hash_map_string_3 hash_map_string_3.cpp) target_link_libraries (hash_map_string_3 dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) -target_include_directories (hash_map_string_3 SYSTEM BEFORE PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash) -target_include_directories (hash_map_string_3 SYSTEM BEFORE PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) add_executable (hash_map_string_small hash_map_string_small.cpp) target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp index 6bf083a5a03..d63f7421e89 100644 --- a/dbms/src/Interpreters/tests/hash_map.cpp +++ b/dbms/src/Interpreters/tests/hash_map.cpp @@ -264,8 +264,8 @@ int main(int argc, char ** argv) { Stopwatch watch; - google::dense_hash_map> map; - google::dense_hash_map>::iterator it; + GOOGLE_NAMESPACE::dense_hash_map> map; + GOOGLE_NAMESPACE::dense_hash_map>::iterator it; map.set_empty_key(-1ULL); for (size_t i = 0; i < n; ++i) { @@ -285,8 +285,8 @@ int main(int argc, char ** argv) { Stopwatch watch; - google::sparse_hash_map> map; - google::sparse_hash_map>::iterator it; + GOOGLE_NAMESPACE::sparse_hash_map> map; + GOOGLE_NAMESPACE::sparse_hash_map>::iterator it; for (size_t i = 0; i < n; ++i) { map.insert(std::make_pair(data[i], std::move(value))); diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index 6771b3d53d9..03c27890dad 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -319,7 +319,7 @@ struct FarmHash64 { size_t operator() (StringRef x) const { - return farmhash::Hash64(x.data, x.size); + return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(x.data, x.size); } }; diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp index 92762046f7f..90386d77495 100644 --- a/dbms/src/Storages/ITableDeclaration.cpp +++ b/dbms/src/Storages/ITableDeclaration.cpp @@ -92,7 +92,7 @@ static std::string listOfColumns(const NamesAndTypesList & available_columns) } -using NamesAndTypesMap = google::dense_hash_map; +using NamesAndTypesMap = GOOGLE_NAMESPACE::dense_hash_map; static NamesAndTypesMap & getColumnsMapImpl(NamesAndTypesMap & res) { return res; } @@ -127,7 +127,7 @@ void ITableDeclaration::check(const Names & column_names) const const auto columns_map = getColumnsMap(available_columns); - using UniqueStrings = google::dense_hash_set; + using UniqueStrings = GOOGLE_NAMESPACE::dense_hash_set; UniqueStrings unique_names; unique_names.set_empty_key(StringRef()); @@ -150,7 +150,7 @@ void ITableDeclaration::check(const NamesAndTypesList & provided_columns) const const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); const auto columns_map = getColumnsMap(available_columns); - using UniqueStrings = google::dense_hash_set; + using UniqueStrings = GOOGLE_NAMESPACE::dense_hash_set; UniqueStrings unique_names; unique_names.set_empty_key(StringRef()); @@ -183,7 +183,7 @@ void ITableDeclaration::check(const NamesAndTypesList & provided_columns, const throw Exception("Empty list of columns queried. There are columns: " + listOfColumns(available_columns), ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); - using UniqueStrings = google::dense_hash_set; + using UniqueStrings = GOOGLE_NAMESPACE::dense_hash_set; UniqueStrings unique_names; unique_names.set_empty_key(StringRef()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h index a4a6b4dc0f7..62aaf141f50 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/debian/rules b/debian/rules index 4533dd9ca3d..edece55c89e 100755 --- a/debian/rules +++ b/debian/rules @@ -69,8 +69,9 @@ override_dh_auto_configure: dh_auto_configure -- $(CMAKE_FLAGS) override_dh_auto_build: - # Fix fir ninja. Do not add -O. - cd $(BUILDDIR) && $(MAKE) -j$(THREADS_COUNT) + # Fix for ninja. Do not add -O. + #cd $(BUILDDIR) && $(MAKE) -j$(THREADS_COUNT) + cd $(BUILDDIR) && cmake --build . -- -j$(THREADS_COUNT) override_dh_auto_test: #TODO, use ENABLE_TESTS=1 diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 0dd5939fc46..31b72c8fee0 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -1,21 +1,11 @@ -if (APPLE) - if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin" AND NOT "${CMAKE_SYSTEM_VERSION}" VERSION_LESS "16.0.0") - set (APPLE_SIERRA_OR_NEWER 1) - else () - set (APPLE_SIERRA_OR_NEWER 0) - endif () -endif () - set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/include/common/config_common.h) configure_file (${CMAKE_CURRENT_SOURCE_DIR}/include/common/config_common.h.in ${CONFIG_COMMON}) -if (APPLE) - add_library (apple_rt - src/apple_rt.cpp - include/common/apple_rt.h - ) - target_include_directories (apple_rt PUBLIC ${COMMON_INCLUDE_DIR}) -endif () +add_library (apple_rt + src/apple_rt.cpp + include/port/clock.h +) +target_include_directories (apple_rt PUBLIC ${COMMON_INCLUDE_DIR}) add_library (common ${SPLIT_SHARED} src/DateLUT.cpp diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 3518a42e523..0cc0950efba 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -2,7 +2,6 @@ // .h autogenerated by cmake ! -#cmakedefine01 APPLE_SIERRA_OR_NEWER #cmakedefine01 USE_TCMALLOC #cmakedefine01 USE_JEMALLOC #cmakedefine01 USE_READLINE diff --git a/libs/libcommon/include/common/readline_use.h b/libs/libcommon/include/common/readline_use.h index 510957060c8..acecc0ffa08 100644 --- a/libs/libcommon/include/common/readline_use.h +++ b/libs/libcommon/include/common/readline_use.h @@ -1,6 +1,8 @@ #pragma once +#if __has_include() #include +#endif /// Different line editing libraries can be used depending on the environment. #if USE_READLINE diff --git a/libs/libcommon/include/common/apple_rt.h b/libs/libcommon/include/port/clock.h similarity index 56% rename from libs/libcommon/include/common/apple_rt.h rename to libs/libcommon/include/port/clock.h index ee6c546eab3..f4e55925609 100644 --- a/libs/libcommon/include/common/apple_rt.h +++ b/libs/libcommon/include/port/clock.h @@ -6,11 +6,17 @@ * To use, include this file with -include compiler parameter. */ +#include + #ifdef __APPLE__ -#include +#include +#ifndef MAC_OS_X_VERSION_10_12 +#define MAC_OS_X_VERSION_10_12 101200 +#endif +#define APPLE_HAVE_CLOCK_GETTIME MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12 -#if APPLE_SIERRA_OR_NEWER == 0 +#if !APPLE_HAVE_CLOCK_GETTIME || !defined(CLOCK_MONOTONIC) /** * MacOS X doesn't support different clock sources * @@ -18,14 +24,23 @@ * CLOCK_THREAD_CPUTIME_ID, because there is a way * to implement it using in-kernel stats about threads */ -#define CLOCK_MONOTONIC_COARSE 0 -#define CLOCK_REALTIME 0 +#if !defined(CLOCK_MONOTONIC) #define CLOCK_MONOTONIC 0 +#endif +#if !defined(CLOCK_REALTIME) +#define CLOCK_REALTIME CLOCK_MONOTONIC +#endif +#if !defined(CLOCK_THREAD_CPUTIME_ID) #define CLOCK_THREAD_CPUTIME_ID 3 +#endif typedef int clockid_t; int clock_gettime(int clk_id, struct timespec* t); #else + +#endif + +#if !defined(CLOCK_MONOTONIC_COARSE) #define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC #endif diff --git a/libs/libcommon/include/common/apple_memrchr.h b/libs/libcommon/include/port/memrchr.h similarity index 95% rename from libs/libcommon/include/common/apple_memrchr.h rename to libs/libcommon/include/port/memrchr.h index 17e2c410011..771ee837879 100644 --- a/libs/libcommon/include/common/apple_memrchr.h +++ b/libs/libcommon/include/port/memrchr.h @@ -1,3 +1,10 @@ +#pragma once + +/// Arcadia compatibility DEVTOOLS-3976 +#if defined(MEMRCHR_INCLUDE) +#include MEMRCHR_INCLUDE +#else + /* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. @@ -25,7 +32,6 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#pragma once #ifdef __APPLE__ #include @@ -51,5 +57,5 @@ inline void *memrchr(const void *s, int c, size_t n) { #if defined (__cplusplus) } #endif - +#endif #endif diff --git a/libs/libcommon/src/apple_rt.cpp b/libs/libcommon/src/apple_rt.cpp index 04e43fa68e9..cf260cce604 100644 --- a/libs/libcommon/src/apple_rt.cpp +++ b/libs/libcommon/src/apple_rt.cpp @@ -5,9 +5,10 @@ * To use, include this file with -include compiler parameter. */ -#include +#include -#if APPLE_SIERRA_OR_NEWER == 0 +#ifdef __APPLE__ +#if !APPLE_HAVE_CLOCK_GETTIME #include #include @@ -48,3 +49,4 @@ int clock_gettime(int clk_id, struct timespec* t) { } #endif +#endif diff --git a/libs/libdaemon/cmake/find_unwind.cmake b/libs/libdaemon/cmake/find_unwind.cmake index 1b408295a2f..876bc7298e8 100644 --- a/libs/libdaemon/cmake/find_unwind.cmake +++ b/libs/libdaemon/cmake/find_unwind.cmake @@ -28,8 +28,8 @@ if (NOT USE_INTERNAL_UNWIND_LIBRARY) unw_init_local2(&cursor, &context, UNW_INIT_SIGNAL_FRAME); return 0; } - " HAVE_UNWIND_INIT_LOCAL_SIGNAL) - if (NOT HAVE_UNWIND_INIT_LOCAL_SIGNAL) + " HAVE_UNW_INIT_LOCAL2) + if (NOT HAVE_UNW_INIT_LOCAL2) set(UNWIND_LIBRARY "") set(UNWIND_INCLUDE_DIR "") endif () diff --git a/utils/check_include.sh b/utils/check_include.sh index 3e3ef44aaf1..2a7addb1876 100755 --- a/utils/check_include.sh +++ b/utils/check_include.sh @@ -13,7 +13,7 @@ inc="-I. \ -I./contrib/libfarmhash \ -I./contrib/libmetrohash/src \ -I./contrib/double-conversion \ --I./contrib/libcityhash/include \ +-I./contrib/cityhash102/include \ -I./contrib/zookeeper/src/c/include \ -I./contrib/zookeeper/src/c/generated \ -I./contrib/libtcmalloc/include \ diff --git a/utils/iotest/CMakeLists.txt b/utils/iotest/CMakeLists.txt index c41ba0303ec..f690409a54d 100644 --- a/utils/iotest/CMakeLists.txt +++ b/utils/iotest/CMakeLists.txt @@ -5,7 +5,5 @@ target_link_libraries (iotest clickhouse_common_io) add_executable (iotest_nonblock iotest_nonblock.cpp ${SRCS}) target_link_libraries (iotest_nonblock clickhouse_common_io) -if (NOT APPLE AND NOT ARCH_FREEBSD) - add_executable (iotest_aio iotest_aio.cpp ${SRCS}) - target_link_libraries (iotest_aio clickhouse_common_io) -endif() +add_executable (iotest_aio iotest_aio.cpp ${SRCS}) +target_link_libraries (iotest_aio clickhouse_common_io) diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp index a4849638e48..e2ce099d259 100644 --- a/utils/iotest/iotest.cpp +++ b/utils/iotest/iotest.cpp @@ -16,9 +16,7 @@ #include #include #include -#ifdef __APPLE__ -#include -#endif +#include using DB::throwFromErrno; diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp index 57d7a3bcf11..911bcb4ec14 100644 --- a/utils/iotest/iotest_aio.cpp +++ b/utils/iotest/iotest_aio.cpp @@ -1,3 +1,7 @@ +#if __APPLE__ || __FreeBSD__ +int main(int argc, char ** argv) { return 0; } +#else + #include #include #include @@ -193,3 +197,4 @@ int main(int argc, char ** argv) return 1; } } +#endif diff --git a/utils/iotest/iotest_nonblock.cpp b/utils/iotest/iotest_nonblock.cpp index 8f1ad435bf0..2de28af8513 100644 --- a/utils/iotest/iotest_nonblock.cpp +++ b/utils/iotest/iotest_nonblock.cpp @@ -19,9 +19,7 @@ #include #include #include -#ifdef __APPLE__ -#include -#endif +#include using DB::throwFromErrno; From 30a6c6479b27a350433d145e1572bdbdb944df86 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 20 Jun 2018 23:18:47 +0300 Subject: [PATCH 114/151] Fix systemd's clickhouse-server.service (#2537) --- debian/clickhouse-server.service | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index b2e041fce86..1e57ebef0a5 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -8,7 +8,8 @@ Group=clickhouse PermissionsStartOnly=true Restart=always RestartSec=30 -ExecStartPre=chown clickhouse:clickhouse -R /etc/clickhouse-server +ExecStartPre=-/usr/bin/chown clickhouse:clickhouse -R /etc/clickhouse-server +ExecStartPre=-/bin/chown clickhouse:clickhouse -R /etc/clickhouse-server ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml LimitCORE=infinity LimitNOFILE=500000 From 2d9d723d3ca97712f75c44d1fcc167b06ebd765b Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 21 Jun 2018 17:54:48 +0300 Subject: [PATCH 115/151] Wait test server startup for 5s --- dbms/tests/clickhouse-test-server | 2 +- debian/pbuilder-hooks/B90test-server | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index 37c5999fd9d..1cf2fea62d8 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -75,7 +75,7 @@ $GDB ${BIN_DIR}clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- \ $INTERNAL_COMPILER_PARAMS \ > $LOG_DIR/stdout 2>&1 & CH_PID=$! -sleep 3 +sleep ${TEST_SERVER_STARTUP_WAIT:=5} if [ "$GDB" ]; then # Long symbols read diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index c4524a6ef6f..6ed2d593fa5 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -66,7 +66,7 @@ if [ "${TEST_CONNECT}" ]; then trap finish EXIT SIGINT SIGQUIT SIGTERM service clickhouse-server start - sleep 3 + sleep {TEST_SERVER_STARTUP_WAIT:=5} # TODO: remove me or make only on error: tail -n100 /var/log/clickhouse-server/*.log /var/log/clickhouse-server/stderr || true From e1ae5233df113bf75986b6a0de960418bc11a9cf Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 21 Jun 2018 18:23:20 +0300 Subject: [PATCH 116/151] Add // Y_IGNORE tags --- contrib/libcpuid/include/cpuid/cpuid_main.c | 2 +- contrib/libpcg-random/include/pcg_extras.hpp | 2 +- dbms/programs/main.cpp | 2 +- dbms/src/Common/OptimizedRegularExpression.h | 2 +- dbms/src/DataTypes/Native.h | 2 +- dbms/src/Formats/CapnProtoRowInputStream.cpp | 6 +-- dbms/src/Functions/FunctionsArithmetic.h | 2 +- dbms/src/Functions/FunctionsLogical.h | 2 +- dbms/src/Functions/FunctionsMath.h | 6 +-- dbms/src/Functions/FunctionsStringSearch.cpp | 2 +- dbms/src/Functions/IFunction.cpp | 2 +- dbms/src/IO/AIO.h | 2 +- dbms/src/Interpreters/AsynchronousMetrics.cpp | 2 +- dbms/src/Interpreters/ExpressionJIT.cpp | 48 +++++++++---------- dbms/src/Storages/StorageKafka.cpp | 6 +-- libs/libcommon/include/common/readline_use.h | 2 +- 16 files changed, 45 insertions(+), 45 deletions(-) diff --git a/contrib/libcpuid/include/cpuid/cpuid_main.c b/contrib/libcpuid/include/cpuid/cpuid_main.c index 737aa706a37..c387ed6c5ca 100644 --- a/contrib/libcpuid/include/cpuid/cpuid_main.c +++ b/contrib/libcpuid/include/cpuid/cpuid_main.c @@ -83,7 +83,7 @@ static int parse_token(const char* expected_token, const char *token, /* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */ #ifdef __APPLE__ #include -#include +#include // Y_IGNORE #include #include static int get_total_cpus(void) diff --git a/contrib/libpcg-random/include/pcg_extras.hpp b/contrib/libpcg-random/include/pcg_extras.hpp index 929c756b151..08e8dbacd84 100644 --- a/contrib/libpcg-random/include/pcg_extras.hpp +++ b/contrib/libpcg-random/include/pcg_extras.hpp @@ -81,7 +81,7 @@ #define PCG_128BIT_CONSTANT(high,low) \ ((pcg128_t(high) << 64) + low) #else - #include "pcg_uint128.hpp" + #include "pcg_uint128.hpp" // Y_IGNORE namespace pcg_extras { typedef pcg_extras::uint_x4 pcg128_t; } diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 36ab99a7f8e..aba03a87a83 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -14,7 +14,7 @@ #endif #if USE_TCMALLOC -#include +#include // Y_IGNORE #endif #if ENABLE_CLICKHOUSE_SERVER diff --git a/dbms/src/Common/OptimizedRegularExpression.h b/dbms/src/Common/OptimizedRegularExpression.h index 81bfc2653c9..60566c9f029 100644 --- a/dbms/src/Common/OptimizedRegularExpression.h +++ b/dbms/src/Common/OptimizedRegularExpression.h @@ -6,7 +6,7 @@ #include #include #if USE_RE2_ST - #include + #include // Y_IGNORE #else #define re2_st re2 #endif diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h index e6167b03a73..7dfc6f7a292 100644 --- a/dbms/src/DataTypes/Native.h +++ b/dbms/src/DataTypes/Native.h @@ -16,7 +16,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include +#include // Y_IGNORE #pragma GCC diagnostic pop diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index a3177a0041e..72fbd376399 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -4,12 +4,12 @@ #include #include #include -#include +#include // Y_IGNORE #include #include -#include -#include +#include // Y_IGNORE +#include // Y_IGNORE #include #include #include diff --git a/dbms/src/Functions/FunctionsArithmetic.h b/dbms/src/Functions/FunctionsArithmetic.h index 865f1f934e1..adbfdba175f 100644 --- a/dbms/src/Functions/FunctionsArithmetic.h +++ b/dbms/src/Functions/FunctionsArithmetic.h @@ -23,7 +23,7 @@ #if USE_EMBEDDED_COMPILER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include +#include // Y_IGNORE #pragma GCC diagnostic pop #endif diff --git a/dbms/src/Functions/FunctionsLogical.h b/dbms/src/Functions/FunctionsLogical.h index 0f89f54804d..934bc7760a3 100644 --- a/dbms/src/Functions/FunctionsLogical.h +++ b/dbms/src/Functions/FunctionsLogical.h @@ -15,7 +15,7 @@ #if USE_EMBEDDED_COMPILER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include +#include // Y_IGNORE #pragma GCC diagnostic pop #endif diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h index e2c43859f71..1e3c9b72782 100644 --- a/dbms/src/Functions/FunctionsMath.h +++ b/dbms/src/Functions/FunctionsMath.h @@ -21,9 +21,9 @@ #pragma clang diagnostic ignored "-Wshift-negative-value" #endif - #include - #include - #include + #include // Y_IGNORE + #include // Y_IGNORE + #include // Y_IGNORE #ifdef __clang__ #pragma clang diagnostic pop diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 6947d81dbc3..95806183a13 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -14,7 +14,7 @@ #include #if USE_RE2_ST - #include + #include // Y_IGNORE #else #define re2_st re2 #endif diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 3e2ee5a0de4..0913790b72e 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -18,7 +18,7 @@ #if USE_EMBEDDED_COMPILER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include +#include // Y_IGNORE #pragma GCC diagnostic pop #endif diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index e21a316a770..59089f866cb 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -8,7 +8,7 @@ #define itimerspec linux_itimerspec #define sigset_t linux_sigset_t -#include +#include // Y_IGNORE #undef timespec #undef timeval diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index daa36651700..611711b317e 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -15,7 +15,7 @@ #endif #if USE_TCMALLOC - #include + #include // Y_IGNORE /// Initializing malloc extension in global constructor as required. struct MallocExtensionInitializer diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 66c43d69ee3..9e2982b8de8 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -18,30 +18,30 @@ #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE #pragma GCC diagnostic pop diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index 335cbb73618..a9666bab22c 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -23,15 +23,15 @@ #include #include #include -#include +#include // Y_IGNORE #include #include #include #if __has_include() // maybe bundled -#include +#include // Y_IGNORE #else // system -#include +#include // Y_IGNORE #endif diff --git a/libs/libcommon/include/common/readline_use.h b/libs/libcommon/include/common/readline_use.h index acecc0ffa08..97622b26839 100644 --- a/libs/libcommon/include/common/readline_use.h +++ b/libs/libcommon/include/common/readline_use.h @@ -10,7 +10,7 @@ #include #elif USE_LIBEDIT #include - #include + #include // Y_IGNORE #else #include #include From a31adcfdea04201a2818e70be7b61027bb080d1a Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 21 Jun 2018 20:35:03 +0300 Subject: [PATCH 117/151] Allow use unbundled cpuid (#2543) * Wait test server startup for 5s * clean * Allow use system libcpuid * cpuid: move include/cpuid/ -> include/libcpuid/ * fix --- CMakeLists.txt | 1 + cmake/find_cpuid.cmake | 20 +++++++++++++ contrib/libcpuid/CMakeLists.txt | 30 +++++++++---------- .../include/{cpuid => libcpuid}/asm-bits.c | 0 .../include/{cpuid => libcpuid}/asm-bits.h | 0 .../include/{cpuid => libcpuid}/config.h | 0 .../include/{cpuid => libcpuid}/cpuid_main.c | 2 +- .../include/{cpuid => libcpuid}/libcpuid.h | 0 .../{cpuid => libcpuid}/libcpuid_constants.h | 0 .../{cpuid => libcpuid}/libcpuid_types.h | 0 .../{cpuid => libcpuid}/libcpuid_util.c | 0 .../{cpuid => libcpuid}/libcpuid_util.h | 0 .../include/{cpuid => libcpuid}/rdtsc.c | 0 .../include/{cpuid => libcpuid}/rdtsc.h | 0 .../include/{cpuid => libcpuid}/recog_amd.c | 0 .../include/{cpuid => libcpuid}/recog_amd.h | 0 .../include/{cpuid => libcpuid}/recog_intel.c | 0 .../include/{cpuid => libcpuid}/recog_intel.h | 0 dbms/CMakeLists.txt | 2 +- .../Common/getNumberOfPhysicalCPUCores.cpp | 2 +- dbms/src/IO/AIO.h | 2 +- 21 files changed, 40 insertions(+), 19 deletions(-) create mode 100644 cmake/find_cpuid.cmake rename contrib/libcpuid/include/{cpuid => libcpuid}/asm-bits.c (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/asm-bits.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/config.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/cpuid_main.c (99%) rename contrib/libcpuid/include/{cpuid => libcpuid}/libcpuid.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/libcpuid_constants.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/libcpuid_types.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/libcpuid_util.c (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/libcpuid_util.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/rdtsc.c (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/rdtsc.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/recog_amd.c (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/recog_amd.h (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/recog_intel.c (100%) rename contrib/libcpuid/include/{cpuid => libcpuid}/recog_intel.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cff6f24a20..d193861d118 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -273,6 +273,7 @@ include (cmake/find_re2.cmake) include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) +include (cmake/find_cpuid.cmake) include (cmake/find_contrib_lib.cmake) find_contrib_lib(cityhash) diff --git a/cmake/find_cpuid.cmake b/cmake/find_cpuid.cmake new file mode 100644 index 00000000000..d486e0fb2a3 --- /dev/null +++ b/cmake/find_cpuid.cmake @@ -0,0 +1,20 @@ +option (USE_INTERNAL_CPUID_LIBRARY "Set to FALSE to use system cpuid library instead of bundled" ${NOT_UNBUNDLED}) + +#if (USE_INTERNAL_CPUID_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include/cpuid/libcpuid.h") +# message (WARNING "submodule contrib/libcpuid is missing. to fix try run: \n git submodule update --init --recursive") +# set (USE_INTERNAL_CPUID_LIBRARY 0) +#endif () + +if (NOT USE_INTERNAL_CPUID_LIBRARY) + find_library (CPUID_LIBRARY cpuid) + find_path (CPUID_INCLUDE_DIR NAMES libcpuid/libcpuid.h PATHS ${CPUID_INCLUDE_PATHS}) +endif () + +if (CPUID_LIBRARY AND CPUID_INCLUDE_DIR) +else () + set (CPUID_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include) + set (USE_INTERNAL_CPUID_LIBRARY 1) + set (CPUID_LIBRARY cpuid) +endif () + +message (STATUS "Using cpuid: ${CPUID_INCLUDE_DIR} : ${CPUID_LIBRARY}") diff --git a/contrib/libcpuid/CMakeLists.txt b/contrib/libcpuid/CMakeLists.txt index 4c257b20de8..c04acf99f36 100644 --- a/contrib/libcpuid/CMakeLists.txt +++ b/contrib/libcpuid/CMakeLists.txt @@ -1,20 +1,20 @@ add_library(cpuid -include/cpuid/asm-bits.c -include/cpuid/cpuid_main.c -include/cpuid/libcpuid_util.c -include/cpuid/rdtsc.c -include/cpuid/recog_amd.c -include/cpuid/recog_intel.c +include/libcpuid/asm-bits.c +include/libcpuid/cpuid_main.c +include/libcpuid/libcpuid_util.c +include/libcpuid/rdtsc.c +include/libcpuid/recog_amd.c +include/libcpuid/recog_intel.c -include/cpuid/asm-bits.h -include/cpuid/config.h -include/cpuid/libcpuid_constants.h -include/cpuid/libcpuid.h -include/cpuid/libcpuid_types.h -include/cpuid/libcpuid_util.h -include/cpuid/rdtsc.h -include/cpuid/recog_amd.h -include/cpuid/recog_intel.h +include/libcpuid/asm-bits.h +include/libcpuid/config.h +include/libcpuid/libcpuid_constants.h +include/libcpuid/libcpuid.h +include/libcpuid/libcpuid_types.h +include/libcpuid/libcpuid_util.h +include/libcpuid/rdtsc.h +include/libcpuid/recog_amd.h +include/libcpuid/recog_intel.h ) target_include_directories (cpuid PUBLIC include) diff --git a/contrib/libcpuid/include/cpuid/asm-bits.c b/contrib/libcpuid/include/libcpuid/asm-bits.c similarity index 100% rename from contrib/libcpuid/include/cpuid/asm-bits.c rename to contrib/libcpuid/include/libcpuid/asm-bits.c diff --git a/contrib/libcpuid/include/cpuid/asm-bits.h b/contrib/libcpuid/include/libcpuid/asm-bits.h similarity index 100% rename from contrib/libcpuid/include/cpuid/asm-bits.h rename to contrib/libcpuid/include/libcpuid/asm-bits.h diff --git a/contrib/libcpuid/include/cpuid/config.h b/contrib/libcpuid/include/libcpuid/config.h similarity index 100% rename from contrib/libcpuid/include/cpuid/config.h rename to contrib/libcpuid/include/libcpuid/config.h diff --git a/contrib/libcpuid/include/cpuid/cpuid_main.c b/contrib/libcpuid/include/libcpuid/cpuid_main.c similarity index 99% rename from contrib/libcpuid/include/cpuid/cpuid_main.c rename to contrib/libcpuid/include/libcpuid/cpuid_main.c index c387ed6c5ca..737aa706a37 100644 --- a/contrib/libcpuid/include/cpuid/cpuid_main.c +++ b/contrib/libcpuid/include/libcpuid/cpuid_main.c @@ -83,7 +83,7 @@ static int parse_token(const char* expected_token, const char *token, /* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */ #ifdef __APPLE__ #include -#include // Y_IGNORE +#include #include #include static int get_total_cpus(void) diff --git a/contrib/libcpuid/include/cpuid/libcpuid.h b/contrib/libcpuid/include/libcpuid/libcpuid.h similarity index 100% rename from contrib/libcpuid/include/cpuid/libcpuid.h rename to contrib/libcpuid/include/libcpuid/libcpuid.h diff --git a/contrib/libcpuid/include/cpuid/libcpuid_constants.h b/contrib/libcpuid/include/libcpuid/libcpuid_constants.h similarity index 100% rename from contrib/libcpuid/include/cpuid/libcpuid_constants.h rename to contrib/libcpuid/include/libcpuid/libcpuid_constants.h diff --git a/contrib/libcpuid/include/cpuid/libcpuid_types.h b/contrib/libcpuid/include/libcpuid/libcpuid_types.h similarity index 100% rename from contrib/libcpuid/include/cpuid/libcpuid_types.h rename to contrib/libcpuid/include/libcpuid/libcpuid_types.h diff --git a/contrib/libcpuid/include/cpuid/libcpuid_util.c b/contrib/libcpuid/include/libcpuid/libcpuid_util.c similarity index 100% rename from contrib/libcpuid/include/cpuid/libcpuid_util.c rename to contrib/libcpuid/include/libcpuid/libcpuid_util.c diff --git a/contrib/libcpuid/include/cpuid/libcpuid_util.h b/contrib/libcpuid/include/libcpuid/libcpuid_util.h similarity index 100% rename from contrib/libcpuid/include/cpuid/libcpuid_util.h rename to contrib/libcpuid/include/libcpuid/libcpuid_util.h diff --git a/contrib/libcpuid/include/cpuid/rdtsc.c b/contrib/libcpuid/include/libcpuid/rdtsc.c similarity index 100% rename from contrib/libcpuid/include/cpuid/rdtsc.c rename to contrib/libcpuid/include/libcpuid/rdtsc.c diff --git a/contrib/libcpuid/include/cpuid/rdtsc.h b/contrib/libcpuid/include/libcpuid/rdtsc.h similarity index 100% rename from contrib/libcpuid/include/cpuid/rdtsc.h rename to contrib/libcpuid/include/libcpuid/rdtsc.h diff --git a/contrib/libcpuid/include/cpuid/recog_amd.c b/contrib/libcpuid/include/libcpuid/recog_amd.c similarity index 100% rename from contrib/libcpuid/include/cpuid/recog_amd.c rename to contrib/libcpuid/include/libcpuid/recog_amd.c diff --git a/contrib/libcpuid/include/cpuid/recog_amd.h b/contrib/libcpuid/include/libcpuid/recog_amd.h similarity index 100% rename from contrib/libcpuid/include/cpuid/recog_amd.h rename to contrib/libcpuid/include/libcpuid/recog_amd.h diff --git a/contrib/libcpuid/include/cpuid/recog_intel.c b/contrib/libcpuid/include/libcpuid/recog_intel.c similarity index 100% rename from contrib/libcpuid/include/cpuid/recog_intel.c rename to contrib/libcpuid/include/libcpuid/recog_intel.c diff --git a/contrib/libcpuid/include/cpuid/recog_intel.h b/contrib/libcpuid/include/libcpuid/recog_intel.h similarity index 100% rename from contrib/libcpuid/include/cpuid/recog_intel.h rename to contrib/libcpuid/include/libcpuid/recog_intel.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 119072158e7..eaf21b0b6ac 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -128,7 +128,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW endif () if (NOT ARCH_ARM) - set (LINK_LIBRARIES_ONLY_ON_X86_64 cpuid) + set (LINK_LIBRARIES_ONLY_ON_X86_64 ${CPUID_LIBRARY}) endif() target_link_libraries (clickhouse_common_io diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp index 832f53e0d86..b1f091bd5e6 100644 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -4,7 +4,7 @@ #if defined(__x86_64__) - #include + #include #include namespace DB { namespace ErrorCodes { extern const int CPUID_ERROR; }} diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index 59089f866cb..e21a316a770 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -8,7 +8,7 @@ #define itimerspec linux_itimerspec #define sigset_t linux_sigset_t -#include // Y_IGNORE +#include #undef timespec #undef timeval From ea947ce808fcdcf36390d3ad9067b8969c619113 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 21 Jun 2018 22:01:02 +0300 Subject: [PATCH 118/151] fix TEST_SERVER_STARTUP_WAIT --- debian/pbuilder-hooks/B90test-server | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index 6ed2d593fa5..2e6d7b7c894 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -66,7 +66,7 @@ if [ "${TEST_CONNECT}" ]; then trap finish EXIT SIGINT SIGQUIT SIGTERM service clickhouse-server start - sleep {TEST_SERVER_STARTUP_WAIT:=5} + sleep ${TEST_SERVER_STARTUP_WAIT:=5} # TODO: remove me or make only on error: tail -n100 /var/log/clickhouse-server/*.log /var/log/clickhouse-server/stderr || true From b2cdfcbacac2b05d742364029da2d4914fee7fe0 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 18 Jun 2018 15:17:46 +0300 Subject: [PATCH 119/151] calculate and update parts_to_do for mutations [#CLICKHOUSE-3747] --- .../Storages/MergeTree/ActiveDataPartSet.cpp | 17 +- .../Storages/MergeTree/ActiveDataPartSet.h | 2 +- .../MergeTree/MergeTreeMutationStatus.h | 3 + .../MergeTree/ReplicatedMergeTreeLogEntry.h | 23 +-- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 187 ++++++++++++++---- .../MergeTree/ReplicatedMergeTreeQueue.h | 23 ++- .../System/StorageSystemMutations.cpp | 2 + 7 files changed, 205 insertions(+), 52 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp index 3e91cbc177a..e4d7465b360 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB @@ -12,16 +13,19 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, } -void ActiveDataPartSet::add(const String & name) +bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) { auto part_info = MergeTreePartInfo::fromPartName(name, format_version); if (getContainingPartImpl(part_info) != part_info_to_name.end()) - return; + return false; /// Parts contained in `part` are located contiguously in `part_info_to_name`, overlapping with the place where the part itself would be inserted. auto it = part_info_to_name.lower_bound(part_info); + if (out_replaced_parts) + out_replaced_parts->clear(); + /// Let's go left. while (it != part_info_to_name.begin()) { @@ -31,16 +35,25 @@ void ActiveDataPartSet::add(const String & name) ++it; break; } + + if (out_replaced_parts) + out_replaced_parts->push_back(it->second); part_info_to_name.erase(it++); } + if (out_replaced_parts) + std::reverse(out_replaced_parts->begin(), out_replaced_parts->end()); + /// Let's go to the right. while (it != part_info_to_name.end() && part_info.contains(it->first)) { + if (out_replaced_parts) + out_replaced_parts->push_back(it->second); part_info_to_name.erase(it++); } part_info_to_name.emplace(part_info, name); + return true; } diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index c5d3fdefb45..df0dbe9b8c6 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -43,7 +43,7 @@ public: return *this; } - void add(const String & name); + bool add(const String & name, Strings * out_replaced_parts = nullptr); bool remove(const MergeTreePartInfo & part_info) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h b/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h index 6df3bf58d20..a535688592e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h @@ -13,6 +13,9 @@ struct MergeTreeMutationStatus String command; time_t create_time = 0; std::map block_numbers; + + /// A number of parts that should be mutated/merged or otherwise moved to Obsolete state for this mutation to complete. + Int64 parts_to_do = 0; }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index bc451f12829..956520207c5 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -98,18 +98,19 @@ struct ReplicatedMergeTreeLogEntryData std::shared_ptr replace_range_entry; - /// Returns set of parts that will appear after the entry execution - /// These parts are added to virtual_parts - Strings getNewPartNames() const + /// Returns a set of parts that will appear after executing the entry + parts to block + /// selection of merges. These parts are added to queue.virtual_parts. + Strings getVirtualPartNames() const { - /// Clear column actually does not produce new parts - if (type == CLEAR_COLUMN) - return {}; - - /// It does not add a real part, it just disables merges in that range + /// DROP_RANGE does not add a real part, but we must disable merges in that range if (type == DROP_RANGE) return {new_part_name}; + /// Return {} because selection of merges in the partition where the column is cleared + /// should not be blocked (only execution of merges should be blocked). + if (type == CLEAR_COLUMN) + return {}; + if (type == REPLACE_RANGE) { Strings res = replace_range_entry->new_part_names; @@ -120,11 +121,11 @@ struct ReplicatedMergeTreeLogEntryData return {new_part_name}; } - /// Returns set of parts that should be blocked during the entry execution - /// These parts are added to future_parts + /// Returns set of parts that denote the block number ranges that should be blocked during the entry execution. + /// These parts are added to future_parts. Strings getBlockingPartNames() const { - Strings res = getNewPartNames(); + Strings res = getVirtualPartNames(); if (type == CLEAR_COLUMN) res.emplace_back(new_part_name); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 75d967bd90a..d191e7755a5 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -21,16 +21,21 @@ namespace ErrorCodes ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & storage_) : storage(storage_) , format_version(storage.data.format_version) + , current_parts(format_version) , virtual_parts(format_version) {} void ReplicatedMergeTreeQueue::addVirtualParts(const MergeTreeData::DataParts & parts) { - std::lock_guard lock(target_state_mutex); + std::lock_guard target_state_lock(target_state_mutex); + std::lock_guard queue_lock(queue_mutex); for (const auto & part : parts) + { + current_parts.add(part->name); virtual_parts.add(part->name); + } } @@ -97,9 +102,24 @@ void ReplicatedMergeTreeQueue::insertUnlocked( std::lock_guard & /* target_state_lock */, std::lock_guard & /* queue_lock */) { - for (const String & virtual_part_name : entry->getNewPartNames()) + for (const String & virtual_part_name : entry->getVirtualPartNames()) + { virtual_parts.add(virtual_part_name); + /// Update mutations status. + /// To complete each mutation with a block number bigger than part_info.getDataVersion() + /// we would need to mutate the part `virtual_part_name`. + + auto part_info = MergeTreePartInfo::fromPartName(virtual_part_name, format_version); + auto mutations_in_partition = mutations_by_partition.find(part_info.partition_id); + if (mutations_in_partition == mutations_by_partition.end()) + continue; + + auto from_it = mutations_in_partition->second.upper_bound(part_info.getDataVersion()); + for (auto it = from_it; it != mutations_in_partition->second.end(); ++it) + ++it->second->parts_to_do; + } + /// Put 'DROP PARTITION' entries at the beginning of the queue not to make superfluous fetches of parts that will be eventually deleted if (entry->type != LogEntry::DROP_RANGE) queue.push_back(entry); @@ -133,33 +153,92 @@ void ReplicatedMergeTreeQueue::insert(zkutil::ZooKeeperPtr zookeeper, LogEntryPt } -void ReplicatedMergeTreeQueue::updateTimesOnRemoval( +void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( const LogEntryPtr & entry, + bool is_successful, std::optional & min_unprocessed_insert_time_changed, std::optional & max_processed_insert_time_changed, std::unique_lock & /* queue_lock */) { - if (entry->type != LogEntry::GET_PART) - return; - - inserts_by_time.erase(entry); - - if (inserts_by_time.empty()) + /// Update insert times. + if (entry->type == LogEntry::GET_PART) { - min_unprocessed_insert_time = 0; - min_unprocessed_insert_time_changed = min_unprocessed_insert_time; - } - else if ((*inserts_by_time.begin())->create_time > min_unprocessed_insert_time) - { - min_unprocessed_insert_time = (*inserts_by_time.begin())->create_time; - min_unprocessed_insert_time_changed = min_unprocessed_insert_time; + inserts_by_time.erase(entry); + + if (inserts_by_time.empty()) + { + min_unprocessed_insert_time = 0; + min_unprocessed_insert_time_changed = min_unprocessed_insert_time; + } + else if ((*inserts_by_time.begin())->create_time > min_unprocessed_insert_time) + { + min_unprocessed_insert_time = (*inserts_by_time.begin())->create_time; + min_unprocessed_insert_time_changed = min_unprocessed_insert_time; + } + + if (entry->create_time > max_processed_insert_time) + { + max_processed_insert_time = entry->create_time; + max_processed_insert_time_changed = max_processed_insert_time; + } } - if (entry->create_time > max_processed_insert_time) + if (is_successful) { - max_processed_insert_time = entry->create_time; - max_processed_insert_time_changed = max_processed_insert_time; + for (const String & virtual_part_name : entry->getVirtualPartNames()) + { + Strings replaced_parts; + current_parts.add(virtual_part_name, &replaced_parts); + + /// Update mutations state. + /// Each part from `replaced_parts` should become Obsolete as a result of executing the entry. + /// So it is one less part to mutate for each mutation with block number greater than part_info.getDataVersion() + + for (const String & replaced_part_name : replaced_parts) + { + auto part_info = MergeTreePartInfo::fromPartName(replaced_part_name, format_version); + auto in_partition = mutations_by_partition.find(part_info.partition_id); + if (in_partition == mutations_by_partition.end()) + continue; + + auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); + for (auto it = from_it; it != in_partition->second.end(); ++it) + --it->second->parts_to_do; + } + } + + String drop_range_part_name; + if (entry->type == LogEntry::DROP_RANGE) + drop_range_part_name = entry->new_part_name; + else if (entry->type == LogEntry::REPLACE_RANGE) + drop_range_part_name = entry->replace_range_entry->drop_range_part_name; + + if (!drop_range_part_name.empty()) + { + current_parts.remove(drop_range_part_name); + virtual_parts.remove(drop_range_part_name); + } } + else + { + for (const String & virtual_part_name : entry->getVirtualPartNames()) + { + /// Update mutations state. + /// Because execution of the entry is unsuccessful, `virtual_part_name` will never appear + /// so we won't need to mutate it. + + auto part_info = MergeTreePartInfo::fromPartName(virtual_part_name, format_version); + auto in_partition = mutations_by_partition.find(part_info.partition_id); + if (in_partition == mutations_by_partition.end()) + continue; + + auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); + for (auto it = from_it; it != in_partition->second.end(); ++it) + --it->second->parts_to_do; + } + } + + /// TODO notify mutationsFinalizingTask if mutation.parts_to_do becomes 0. } @@ -221,14 +300,16 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep --it; if (*it == entry) { - queue.erase(it); found = true; + updateStateOnQueueEntryRemoval( + entry, /* is_successful = */ true, + min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); + + queue.erase(it); queue_size = queue.size(); break; } } - - updateTimesOnRemoval(entry, min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); } if (!found) @@ -259,9 +340,11 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri if ((*it)->new_part_name == part_name) { found = *it; + updateStateOnQueueEntryRemoval( + found, /* is_successful = */ false, + min_unprocessed_insert_time_changed, max_processed_insert_time_changed, queue_lock); queue.erase(it++); queue_size = queue.size(); - updateTimesOnRemoval(found, min_unprocessed_insert_time_changed, max_processed_insert_time_changed, queue_lock); break; } else @@ -441,11 +524,12 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z /// Compare with the local state, delete obsolete entries and determine which new entries to load. Strings entries_to_load; { - std::lock_guard lock(target_state_mutex); + std::lock_guard target_state_lock(target_state_mutex); + std::lock_guard queue_lock(queue_mutex); for (auto it = mutations_by_znode.begin(); it != mutations_by_znode.end(); ) { - const ReplicatedMergeTreeMutationEntry & entry = *it->second; + const ReplicatedMergeTreeMutationEntry & entry = *it->second.entry; if (!entries_in_zk_set.count(entry.znode_name)) { LOG_DEBUG(log, "Removing obsolete mutation " + entry.znode_name + " from local state."); @@ -456,6 +540,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z if (in_partition.empty()) mutations_by_partition.erase(partition_and_block_num.first); } + it = mutations_by_znode.erase(it); } else @@ -486,15 +571,45 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z } { - std::lock_guard lock(target_state_mutex); + std::lock_guard target_state_lock(target_state_mutex); + std::lock_guard queue_lock(queue_mutex); for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { - mutations_by_znode.emplace(entry->znode_name, entry); + auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus{entry, 0}) + .first->second; - for (const auto & partition_and_block_num : entry->block_numbers) - mutations_by_partition[partition_and_block_num.first].emplace( - partition_and_block_num.second, entry); + for (const auto & pair : entry->block_numbers) + { + const String & partition_id = pair.first; + Int64 block_num = pair.second; + + mutations_by_partition[partition_id].emplace(block_num, &mutation); + + /// Initialize `mutation.parts_to_do`. First we need to mutate all parts in `current_parts`. + MergeTreePartInfo covering_part_info( + partition_id, 0, block_num, MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); + for (const String & covered_part_name : current_parts.getPartsCoveredBy(covering_part_info)) + { + auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, format_version); + if (part_info.getDataVersion() < block_num) + ++mutation.parts_to_do; + } + } + + /// (continue initializing `mutation.parts_to_do`) And next we would need to mutate all + /// parts with getDataVersion() greater than mutation block number that would appear as + /// a result of executing the queue. + for (const auto & queue_entry : queue) + { + for (const String & produced_part_name : queue_entry->getVirtualPartNames()) + { + auto part_info = MergeTreePartInfo::fromPartName(produced_part_name, format_version); + auto it = entry->block_numbers.find(part_info.partition_id); + if (it != entry->block_numbers.end() && it->second > part_info.getDataVersion()) + ++mutation.parts_to_do; + } + } } } @@ -570,7 +685,9 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange(zkutil::ZooKeeperPt LOG_INFO(log, "Couldn't remove " << replica_path + "/queue/" + (*it)->znode_name << ": " << zkutil::ZooKeeper::error2string(code)); - updateTimesOnRemoval(*it, min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); + updateStateOnQueueEntryRemoval( + *it, /* is_successful = */ false, + min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); queue.erase(it++); ++removed_entries; } @@ -951,7 +1068,7 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( MutationCommands commands; for (auto it = begin; it != end; ++it) - commands.insert(commands.end(), it->second->commands.begin(), it->second->commands.end()); + commands.insert(commands.end(), it->second->entry->commands.begin(), it->second->entry->commands.end()); return commands; } @@ -1045,12 +1162,13 @@ void ReplicatedMergeTreeQueue::getInsertTimes(time_t & out_min_unprocessed_inser std::vector ReplicatedMergeTreeQueue::getMutationsStatus() const { - std::lock_guard lock(target_state_mutex); + std::lock_guard lock(queue_mutex); std::vector result; for (const auto & pair : mutations_by_znode) { - const ReplicatedMergeTreeMutationEntry & entry = *pair.second; + const MutationStatus & status = pair.second; + const ReplicatedMergeTreeMutationEntry & entry = *status.entry; for (const MutationCommand & command : entry.commands) { @@ -1062,6 +1180,7 @@ std::vector ReplicatedMergeTreeQueue::getMutationsStatu ss.str(), entry.create_time, entry.block_numbers, + status.parts_to_do, }); } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index cacdab7c288..d42cd0cac69 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -59,6 +59,11 @@ private: /// Protects the queue, future_parts and other queue state variables. mutable std::mutex queue_mutex; + /// A set of parts that should be on this replica according to the queue entries executed up to this point. + /// Note: it can be different from the actual set of parts because the replica can decide to fetch + /// a bigger part instead of the part mentioned in the log entry. + ActiveDataPartSet current_parts; + /** The queue of what you need to do on this line to catch up. It is taken from ZooKeeper (/replicas/me/queue/). * In ZK records in chronological order. Here it is not necessary. */ @@ -71,7 +76,7 @@ private: time_t last_queue_update = 0; /// parts that will appear as a result of actions performed right now by background threads (these actions are not in the queue). - /// Used to not perform other actions at the same time with these parts. + /// Used to block other actions on parts in the range covered by future_parts. using FuturePartsSet = std::map; FuturePartsSet future_parts; @@ -92,8 +97,17 @@ private: /// mutations_by_partition is an index partition ID -> block ID -> mutation into this list. /// Note that mutations are updated in such a way that they are always more recent than /// log_pointer (see pullLogsToQueue()). - std::map mutations_by_znode; - std::unordered_map> mutations_by_partition; + + struct MutationStatus + { + ReplicatedMergeTreeMutationEntryPtr entry; + + /// A number of parts that should be mutated/merged or otherwise moved to Obsolete state for this mutation to complete. + Int64 parts_to_do = 0; + }; + + std::map mutations_by_znode; + std::unordered_map> mutations_by_partition; /// Provides only one simultaneous call to pullLogsToQueue. @@ -160,7 +174,8 @@ private: /// After removing the queue element, update the insertion times in the RAM. Running under queue_mutex. /// Returns information about what times have changed - this information can be passed to updateTimesInZooKeeper. - void updateTimesOnRemoval(const LogEntryPtr & entry, + void updateStateOnQueueEntryRemoval(const LogEntryPtr & entry, + bool is_successful, std::optional & min_unprocessed_insert_time_changed, std::optional & max_processed_insert_time_changed, std::unique_lock & queue_lock); diff --git a/dbms/src/Storages/System/StorageSystemMutations.cpp b/dbms/src/Storages/System/StorageSystemMutations.cpp index 90fce84d709..e064c1fabb9 100644 --- a/dbms/src/Storages/System/StorageSystemMutations.cpp +++ b/dbms/src/Storages/System/StorageSystemMutations.cpp @@ -25,6 +25,7 @@ StorageSystemMutations::StorageSystemMutations(const std::string & name_) std::make_shared()) }, { "block_numbers.number", std::make_shared( std::make_shared()) }, + { "parts_to_do", std::make_shared() }, })); } @@ -110,6 +111,7 @@ BlockInputStreams StorageSystemMutations::read( res_columns[col_num++]->insert(UInt64(status.create_time)); res_columns[col_num++]->insert(block_partition_ids); res_columns[col_num++]->insert(block_numbers); + res_columns[col_num++]->insert(status.parts_to_do); } } From 36982b967d2caedfc7d8df0962ec20177d60dce6 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 20 Jun 2018 14:12:16 +0300 Subject: [PATCH 120/151] use single mutex in ReplicatedMergeTreeQueue for simplicity --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 85 +++++++++---------- .../MergeTree/ReplicatedMergeTreeQueue.h | 28 +++--- 2 files changed, 49 insertions(+), 64 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d191e7755a5..2e1d262b047 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -28,8 +28,7 @@ ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & void ReplicatedMergeTreeQueue::addVirtualParts(const MergeTreeData::DataParts & parts) { - std::lock_guard target_state_lock(target_state_mutex); - std::lock_guard queue_lock(queue_mutex); + std::lock_guard lock(state_mutex); for (const auto & part : parts) { @@ -48,7 +47,7 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) std::optional min_unprocessed_insert_time_changed; { - std::lock_guard target_state_lock(target_state_mutex); + std::lock_guard lock(state_mutex); String log_pointer_str = zookeeper->get(replica_path + "/log_pointer"); log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); @@ -63,14 +62,13 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) for (const String & child : children) futures.emplace_back(child, zookeeper->asyncGet(queue_path + "/" + child)); - std::lock_guard queue_lock(queue_mutex); for (auto & future : futures) { zkutil::GetResponse res = future.second.get(); LogEntryPtr entry = LogEntry::parse(res.data, res.stat); entry->znode_name = future.first; - insertUnlocked(entry, min_unprocessed_insert_time_changed, target_state_lock, queue_lock); + insertUnlocked(entry, min_unprocessed_insert_time_changed, lock); updated = true; } @@ -99,8 +97,7 @@ void ReplicatedMergeTreeQueue::initialize( void ReplicatedMergeTreeQueue::insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, - std::lock_guard & /* target_state_lock */, - std::lock_guard & /* queue_lock */) + std::lock_guard & /* state_lock */) { for (const String & virtual_part_name : entry->getVirtualPartNames()) { @@ -144,9 +141,8 @@ void ReplicatedMergeTreeQueue::insert(zkutil::ZooKeeperPtr zookeeper, LogEntryPt std::optional min_unprocessed_insert_time_changed; { - std::lock_guard target_state_lock(target_state_mutex); - std::lock_guard queue_lock(queue_mutex); - insertUnlocked(entry, min_unprocessed_insert_time_changed, target_state_lock, queue_lock); + std::lock_guard lock(state_mutex); + insertUnlocked(entry, min_unprocessed_insert_time_changed, lock); } updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, {}); @@ -289,7 +285,7 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep size_t queue_size = 0; { - std::unique_lock lock(queue_mutex); + std::unique_lock lock(state_mutex); /// Remove the job from the queue in the RAM. /// You can not just refer to a pre-saved iterator, because someone else might be able to delete the task. @@ -330,8 +326,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri std::optional max_processed_insert_time_changed; { - std::unique_lock target_state_lock(target_state_mutex); - std::unique_lock queue_lock(queue_mutex); + std::unique_lock lock(state_mutex); virtual_parts.remove(part_name); @@ -342,7 +337,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri found = *it; updateStateOnQueueEntryRemoval( found, /* is_successful = */ false, - min_unprocessed_insert_time_changed, max_processed_insert_time_changed, queue_lock); + min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); queue.erase(it++); queue_size = queue.size(); break; @@ -366,7 +361,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri bool ReplicatedMergeTreeQueue::removeFromVirtualParts(const MergeTreePartInfo & part_info) { - std::unique_lock target_state_lock(target_state_mutex); + std::unique_lock lock(state_mutex); return virtual_parts.remove(part_info); } @@ -459,7 +454,7 @@ void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, z const auto & entry = *copied_entries.back(); if (entry.type == LogEntry::GET_PART) { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); if (entry.create_time && (!min_unprocessed_insert_time || entry.create_time < min_unprocessed_insert_time)) { min_unprocessed_insert_time = entry.create_time; @@ -481,19 +476,17 @@ void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, z try { - std::lock_guard target_state_lock(target_state_mutex); + std::lock_guard lock(state_mutex); log_pointer = last_entry_index + 1; - std::lock_guard queue_lock(queue_mutex); - for (size_t i = 0, size = copied_entries.size(); i < size; ++i) { String path_created = dynamic_cast(*responses[i]).path_created; copied_entries[i]->znode_name = path_created.substr(path_created.find_last_of('/') + 1); std::optional unused = false; - insertUnlocked(copied_entries[i], unused, target_state_lock, queue_lock); + insertUnlocked(copied_entries[i], unused, lock); } last_queue_update = time(nullptr); @@ -524,8 +517,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z /// Compare with the local state, delete obsolete entries and determine which new entries to load. Strings entries_to_load; { - std::lock_guard target_state_lock(target_state_mutex); - std::lock_guard queue_lock(queue_mutex); + std::lock_guard lock(state_mutex); for (auto it = mutations_by_znode.begin(); it != mutations_by_znode.end(); ) { @@ -571,8 +563,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z } { - std::lock_guard target_state_lock(target_state_mutex); - std::lock_guard queue_lock(queue_mutex); + std::lock_guard lock(state_mutex); for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { @@ -620,7 +611,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsForMergeToEndOfQueue(const String & part_name) { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); /// Let's find the action to merge this part with others. Let's remember others. StringSet parts_for_merge; @@ -670,7 +661,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange(zkutil::ZooKeeperPt std::optional max_processed_insert_time_changed; /// Remove operations with parts, contained in the range to be deleted, from the queue. - std::unique_lock lock(queue_mutex); + std::unique_lock lock(state_mutex); for (Queue::iterator it = queue.begin(); it != queue.end();) { auto type = (*it)->type; @@ -743,7 +734,7 @@ size_t ReplicatedMergeTreeQueue::getConflictsCountForRange( void ReplicatedMergeTreeQueue::checkThereAreNoConflictsInRange(const MergeTreePartInfo & range, const LogEntry & entry) { String conflicts_description; - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); if (0 != getConflictsCountForRange(range, entry, &conflicts_description, lock)) throw Exception(conflicts_description, ErrorCodes::UNFINISHED); @@ -788,7 +779,7 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & new_ bool ReplicatedMergeTreeQueue::addFuturePartIfNotCoveredByThem(const String & part_name, LogEntry & entry, String & reject_reason) { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); if (isNotCoveredByFuturePartsImpl(part_name, reject_reason, lock)) { @@ -890,7 +881,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( Int64 ReplicatedMergeTreeQueue::getCurrentMutationVersionImpl( - const String & partition_id, Int64 data_version, std::lock_guard & /* target_state_lock */) const + const String & partition_id, Int64 data_version, std::lock_guard & /* state_lock */) const { auto in_partition = mutations_by_partition.find(partition_id); if (in_partition == mutations_by_partition.end()) @@ -907,7 +898,7 @@ Int64 ReplicatedMergeTreeQueue::getCurrentMutationVersionImpl( Int64 ReplicatedMergeTreeQueue::getCurrentMutationVersion(const String & partition_id, Int64 data_version) const { - std::lock_guard lock(target_state_mutex); + std::lock_guard lock(state_mutex); return getCurrentMutationVersionImpl(partition_id, data_version, lock); } @@ -946,7 +937,7 @@ void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName(ReplicatedM ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting() { - std::lock_guard lock(queue.queue_mutex); + std::lock_guard lock(queue.state_mutex); entry->currently_executing = false; entry->execution_complete.notify_all(); @@ -971,7 +962,7 @@ ReplicatedMergeTreeQueue::SelectedEntry ReplicatedMergeTreeQueue::selectEntryToP { LogEntryPtr entry; - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); for (auto it = queue.begin(); it != queue.end(); ++it) { @@ -1017,7 +1008,7 @@ bool ReplicatedMergeTreeQueue::processEntry( if (saved_exception) { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); entry->exception = saved_exception; return false; } @@ -1046,7 +1037,7 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( return MutationCommands{}; } - std::lock_guard lock(target_state_mutex); + std::lock_guard lock(state_mutex); auto in_partition = mutations_by_partition.find(part->info.partition_id); if (in_partition == mutations_by_partition.end()) @@ -1075,14 +1066,14 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( void ReplicatedMergeTreeQueue::disableMergesInRange(const String & part_name) { - std::lock_guard lock(target_state_mutex); + std::lock_guard lock(state_mutex); virtual_parts.add(part_name); } ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); Status res; @@ -1144,7 +1135,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const void ReplicatedMergeTreeQueue::getEntries(LogEntriesData & res) const { res.clear(); - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); res.reserve(queue.size()); for (const auto & entry : queue) @@ -1154,7 +1145,7 @@ void ReplicatedMergeTreeQueue::getEntries(LogEntriesData & res) const void ReplicatedMergeTreeQueue::getInsertTimes(time_t & out_min_unprocessed_insert_time, time_t & out_max_processed_insert_time) const { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); out_min_unprocessed_insert_time = min_unprocessed_insert_time; out_max_processed_insert_time = max_processed_insert_time; } @@ -1162,7 +1153,7 @@ void ReplicatedMergeTreeQueue::getInsertTimes(time_t & out_min_unprocessed_inser std::vector ReplicatedMergeTreeQueue::getMutationsStatus() const { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); std::vector result; for (const auto & pair : mutations_by_znode) @@ -1195,7 +1186,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( , prev_virtual_parts(queue.format_version) { { - std::lock_guard lock(queue.target_state_mutex); + std::lock_guard lock(queue.state_mutex); prev_virtual_parts = queue.virtual_parts; } @@ -1356,7 +1347,7 @@ bool ReplicatedMergeTreeMergePredicate::operator()( } } - std::lock_guard target_state_lock(queue.target_state_mutex); + std::lock_guard lock(queue.state_mutex); for (const MergeTreeData::DataPartPtr & part : {left, right}) { @@ -1389,9 +1380,9 @@ bool ReplicatedMergeTreeMergePredicate::operator()( } Int64 left_mutation_ver = queue.getCurrentMutationVersionImpl( - left->info.partition_id, left->info.getDataVersion(), target_state_lock); + left->info.partition_id, left->info.getDataVersion(), lock); Int64 right_mutation_ver = queue.getCurrentMutationVersionImpl( - left->info.partition_id, right->info.getDataVersion(), target_state_lock); + left->info.partition_id, right->info.getDataVersion(), lock); if (left_mutation_ver != right_mutation_ver) { if (out_reason) @@ -1406,7 +1397,7 @@ bool ReplicatedMergeTreeMergePredicate::operator()( size_t ReplicatedMergeTreeMergePredicate::countMergesAndPartMutations() const { - std::lock_guard lock(queue.queue_mutex); + std::lock_guard lock(queue.state_mutex); size_t count = 0; for (const auto & entry : queue.queue) @@ -1420,7 +1411,7 @@ size_t ReplicatedMergeTreeMergePredicate::countMergesAndPartMutations() const size_t ReplicatedMergeTreeMergePredicate::countMutations() const { - std::lock_guard lock(queue.target_state_mutex); + std::lock_guard lock(queue.state_mutex); return queue.mutations_by_znode.size(); } @@ -1438,7 +1429,7 @@ std::optional ReplicatedMergeTreeMergePredicate::getDesiredMutationVersio || part->name == inprogress_quorum_part) return {}; - std::lock_guard lock(queue.target_state_mutex); + std::lock_guard lock(queue.state_mutex); if (queue.virtual_parts.getContainingPart(part->info) != part->name) return {}; @@ -1458,7 +1449,7 @@ std::optional ReplicatedMergeTreeMergePredicate::getDesiredMutationVersio ReplicatedMergeTreeQueue::SubscriberHandler ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCallBack && callback) { - std::lock_guard lock(queue_mutex); + std::lock_guard lock(state_mutex); std::lock_guard lock_subscribers(subscribers_mutex); auto it = subscribers.emplace(subscribers.end(), std::move(callback)); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index d42cd0cac69..c740ca47db4 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -57,7 +57,7 @@ private: Logger * log = nullptr; /// Protects the queue, future_parts and other queue state variables. - mutable std::mutex queue_mutex; + mutable std::mutex state_mutex; /// A set of parts that should be on this replica according to the queue entries executed up to this point. /// Note: it can be different from the actual set of parts because the replica can decide to fetch @@ -80,11 +80,6 @@ private: using FuturePartsSet = std::map; FuturePartsSet future_parts; - - /// Protects virtual_parts, log_pointer, mutations. - /// If you intend to lock both target_state_mutex and queue_mutex, lock target_state_mutex first. - mutable std::mutex target_state_mutex; - /// Index of the first log entry that we didn't see yet. Int64 log_pointer = 0; @@ -150,35 +145,34 @@ private: void insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, - std::lock_guard & target_state_lock, - std::lock_guard & queue_lock); + std::lock_guard & state_lock); void removeProcessedEntry(zkutil::ZooKeeperPtr zookeeper, LogEntryPtr & entry); /** Can I now try this action. If not, you need to leave it in the queue and try another one. - * Called under the queue_mutex. + * Called under the state_mutex. */ bool shouldExecuteLogEntry( const LogEntry & entry, String & out_postpone_reason, MergeTreeDataMergerMutator & merger_mutator, MergeTreeData & data, - std::lock_guard & queue_lock) const; + std::lock_guard & state_lock) const; - Int64 getCurrentMutationVersionImpl(const String & partition_id, Int64 data_version, std::lock_guard & /* target_state_lock */) const; + Int64 getCurrentMutationVersionImpl(const String & partition_id, Int64 data_version, std::lock_guard & /* state_lock */) const; /** Check that part isn't in currently generating parts and isn't covered by them. - * Should be called under queue_mutex. + * Should be called under state_mutex. */ bool isNotCoveredByFuturePartsImpl( const String & new_part_name, String & out_reason, - std::lock_guard & queue_lock) const; + std::lock_guard & state_lock) const; - /// After removing the queue element, update the insertion times in the RAM. Running under queue_mutex. + /// After removing the queue element, update the insertion times in the RAM. Running under state_mutex. /// Returns information about what times have changed - this information can be passed to updateTimesInZooKeeper. void updateStateOnQueueEntryRemoval(const LogEntryPtr & entry, bool is_successful, std::optional & min_unprocessed_insert_time_changed, std::optional & max_processed_insert_time_changed, - std::unique_lock & queue_lock); + std::unique_lock & state_lock); /// Update the insertion times in ZooKeeper. void updateTimesInZooKeeper(zkutil::ZooKeeperPtr zookeeper, @@ -188,7 +182,7 @@ private: /// Returns list of currently executing parts blocking execution a command modifying specified range size_t getConflictsCountForRange( const MergeTreePartInfo & range, const LogEntry & entry, String * out_description, - std::lock_guard & queue_lock) const; + std::lock_guard & state_lock) const; /// Marks the element of the queue as running. class CurrentlyExecuting @@ -202,7 +196,7 @@ private: /// Created only in the selectEntryToProcess function. It is called under mutex. CurrentlyExecuting(const ReplicatedMergeTreeQueue::LogEntryPtr & entry_, ReplicatedMergeTreeQueue & queue); - /// In case of fetch, we determine actual part during the execution, so we need to update entry. It is called under queue_mutex. + /// In case of fetch, we determine actual part during the execution, so we need to update entry. It is called under state_mutex. static void setActualPartName(ReplicatedMergeTreeQueue::LogEntry & entry, const String & actual_part_name, ReplicatedMergeTreeQueue & queue); public: From 2fb3d42dfde789769f2725d61d08a726277d53c0 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 20 Jun 2018 14:37:50 +0300 Subject: [PATCH 121/151] temporary fix for a bug when several mutations have the same predicate [#CLICKHOUSE-3747] --- .../ApplyingMutationsBlockInputStream.cpp | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp b/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp index bfcfcb85418..0de7b5acbaf 100644 --- a/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp +++ b/dbms/src/DataStreams/ApplyingMutationsBlockInputStream.cpp @@ -13,7 +13,19 @@ ApplyingMutationsBlockInputStream::ApplyingMutationsBlockInputStream( { children.push_back(input); - impl = input; + if (commands.empty()) + { + impl = input; + return; + } + + /// Create a total predicate for all mutations and then pass it to a single FilterBlockInputStream + /// because ExpressionAnalyzer won't detect that some columns in the block are already calculated + /// and will try to calculate them twice. This works as long as all mutations are DELETE. + /// TODO: fix ExpressionAnalyzer. + + std::vector predicates; + for (const MutationCommand & cmd : commands) { switch (cmd.type) @@ -25,12 +37,7 @@ ApplyingMutationsBlockInputStream::ApplyingMutationsBlockInputStream( predicate->arguments = std::make_shared(); predicate->arguments->children.push_back(cmd.predicate); predicate->children.push_back(predicate->arguments); - - auto predicate_expr = ExpressionAnalyzer( - predicate, context, nullptr, impl->getHeader().getNamesAndTypesList()).getActions(false); - String col_name = predicate->getColumnName(); - - impl = std::make_shared(impl, predicate_expr, col_name); + predicates.push_back(predicate); break; } default: @@ -38,6 +45,24 @@ ApplyingMutationsBlockInputStream::ApplyingMutationsBlockInputStream( ErrorCodes::LOGICAL_ERROR); } } + + ASTPtr total_predicate; + if (predicates.size() == 1) + total_predicate = predicates[0]; + else + { + auto and_func = std::make_shared(); + and_func->name = "and"; + and_func->arguments = std::make_shared(); + and_func->children.push_back(and_func->arguments); + and_func->arguments->children = predicates; + total_predicate = and_func; + } + + auto predicate_expr = ExpressionAnalyzer( + total_predicate, context, nullptr, input->getHeader().getNamesAndTypesList()).getActions(false); + String col_name = total_predicate->getColumnName(); + impl = std::make_shared(input, predicate_expr, col_name); } Block ApplyingMutationsBlockInputStream::getHeader() const From a6c61848f16b333749b20d88b83f06b9799e0ffa Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 21 Jun 2018 16:27:36 +0300 Subject: [PATCH 122/151] mutations finalizing task [#CLICKHOUSE-3747] --- .../MergeTree/MergeTreeMutationStatus.h | 3 + .../MergeTree/ReplicatedMergeTreeQueue.cpp | 135 +++++++++++++++++- .../MergeTree/ReplicatedMergeTreeQueue.h | 14 +- .../Storages/StorageReplicatedMergeTree.cpp | 29 ++++ .../src/Storages/StorageReplicatedMergeTree.h | 6 + .../System/StorageSystemMutations.cpp | 2 + 6 files changed, 186 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h b/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h index a535688592e..9f56681a8b5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMutationStatus.h @@ -16,6 +16,9 @@ struct MergeTreeMutationStatus /// A number of parts that should be mutated/merged or otherwise moved to Obsolete state for this mutation to complete. Int64 parts_to_do = 0; + + /// If the mutation is done. Note that in case of ReplicatedMergeTree parts_to_do == 0 doesn't imply is_done == true. + bool is_done = false; }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2e1d262b047..99cb38785e0 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -72,6 +72,8 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) updated = true; } + + zookeeper->tryGet(replica_path + "/mutation_pointer", mutation_pointer); } updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, {}); @@ -179,6 +181,8 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( } } + bool some_mutations_are_probably_done = false; + if (is_successful) { for (const String & virtual_part_name : entry->getVirtualPartNames()) @@ -199,7 +203,11 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); for (auto it = from_it; it != in_partition->second.end(); ++it) + { --it->second->parts_to_do; + if (it->second->parts_to_do <= 0) + some_mutations_are_probably_done = true; + } } } @@ -230,11 +238,16 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); for (auto it = from_it; it != in_partition->second.end(); ++it) + { --it->second->parts_to_do; + if (it->second->parts_to_do <= 0) + some_mutations_are_probably_done = true; + } } } - /// TODO notify mutationsFinalizingTask if mutation.parts_to_do becomes 0. + if (some_mutations_are_probably_done) + storage.mutations_finalizing_task->schedule(); } @@ -562,12 +575,13 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z ReplicatedMergeTreeMutationEntry::parse(futures[i].get().data, entries_to_load[i]))); } + bool some_mutations_are_probably_done = false; { std::lock_guard lock(state_mutex); for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { - auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus{entry, 0}) + auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus{entry, 0, false}) .first->second; for (const auto & pair : entry->block_numbers) @@ -601,10 +615,16 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z ++mutation.parts_to_do; } } + + if (mutation.parts_to_do == 0) + some_mutations_are_probably_done = true; } } storage.merge_selecting_task->schedule(); + + if (some_mutations_are_probably_done) + storage.mutations_finalizing_task->schedule(); } } @@ -1064,6 +1084,67 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( return commands; } + +bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeeper) +{ + std::vector candidates; + { + std::lock_guard lock(state_mutex); + + for (auto & kv : mutations_by_znode) + { + const String & znode = kv.first; + MutationStatus & mutation = kv.second; + + if (mutation.is_done) + continue; + + if (znode <= mutation_pointer) + { + LOG_TRACE(log, "Marking mutation " << znode << " done because it is <= mutation_pointer (" << mutation_pointer << ")"); + mutation.is_done = true; + } + else if (mutation.parts_to_do == 0) + { + LOG_TRACE(log, "Will check if mutation " << mutation.entry->znode_name << " is done"); + candidates.push_back(mutation.entry); + } + } + } + + if (candidates.empty()) + return false; + + auto merge_pred = getMergePredicate(zookeeper); + + std::vector finished; + for (const ReplicatedMergeTreeMutationEntryPtr & candidate : candidates) + { + if (merge_pred.isMutationFinished(*candidate)) + finished.push_back(candidate.get()); + } + + if (!finished.empty()) + zookeeper->set(replica_path + "/mutation_pointer", finished.back()->znode_name); + + { + std::lock_guard lock(state_mutex); + + for (const ReplicatedMergeTreeMutationEntry * entry : finished) + { + auto it = mutations_by_znode.find(entry->znode_name); + if (it != mutations_by_znode.end()) + { + LOG_TRACE(log, "Mutation " << entry->znode_name << " is done"); + it->second.is_done = true; + } + } + } + + return candidates.size() != finished.size(); +} + + void ReplicatedMergeTreeQueue::disableMergesInRange(const String & part_name) { std::lock_guard lock(state_mutex); @@ -1172,6 +1253,7 @@ std::vector ReplicatedMergeTreeQueue::getMutationsStatu entry.create_time, entry.block_numbers, status.parts_to_do, + status.is_done, }); } } @@ -1446,6 +1528,55 @@ std::optional ReplicatedMergeTreeMergePredicate::getDesiredMutationVersio return max_version; } + +bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const ReplicatedMergeTreeMutationEntry & mutation) const +{ + for (const auto & kv : mutation.block_numbers) + { + const String & partition_id = kv.first; + Int64 block_num = kv.second; + + auto partition_it = committing_blocks.find(partition_id); + if (partition_it != committing_blocks.end()) + { + size_t blocks_count = std::distance( + partition_it->second.begin(), partition_it->second.lower_bound(block_num)); + if (blocks_count) + { + LOG_TRACE(queue.log, "Mutation " << mutation.znode_name << " is not done yet because " + << "in partition ID " << partition_id << " there are still " + << blocks_count << " uncommitted blocks."); + return false; + } + } + + size_t suddenly_appeared_parts = 0; + { + std::lock_guard lock(queue.state_mutex); + + MergeTreePartInfo covering_part_info( + partition_id, 0, block_num, MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); + for (const String & covered_part_name : queue.virtual_parts.getPartsCoveredBy(covering_part_info)) + { + auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, queue.format_version); + if (part_info.getDataVersion() < block_num) + ++suddenly_appeared_parts; + } + } + + if (suddenly_appeared_parts) + { + LOG_TRACE(queue.log, "Mutation " << mutation.znode_name << " is not done yet because " + << "in partition ID " << partition_id << " " << suddenly_appeared_parts + << " suddenly appeared."); + return false; + } + } + + return true; +} + + ReplicatedMergeTreeQueue::SubscriberHandler ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCallBack && callback) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index c740ca47db4..d380fc791a9 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -89,7 +89,7 @@ private: ActiveDataPartSet virtual_parts; /// A set of mutations loaded from ZooKeeper. - /// mutations_by_partition is an index partition ID -> block ID -> mutation into this list. + /// mutations_by_partition is an index partition ID -> block ID -> mutation into this set. /// Note that mutations are updated in such a way that they are always more recent than /// log_pointer (see pullLogsToQueue()). @@ -99,10 +99,16 @@ private: /// A number of parts that should be mutated/merged or otherwise moved to Obsolete state for this mutation to complete. Int64 parts_to_do = 0; + + /// Note that is_done is not equivalent to parts_to_do == 0 + /// (even if parts_to_do == 0 some relevant parts can still commit in the future). + bool is_done = false; }; std::map mutations_by_znode; std::unordered_map> mutations_by_partition; + /// Znode ID of the latest mutation that is done. + String mutation_pointer; /// Provides only one simultaneous call to pullLogsToQueue. @@ -274,6 +280,10 @@ public: MutationCommands getMutationCommands(const MergeTreeData::DataPartPtr & part, Int64 desired_mutation_version) const; + /// Mark finished mutations as done. If the function needs to be called again at some later time + /// (because some mutations are probably done but we are not sure yet), returns true. + bool tryFinalizeMutations(zkutil::ZooKeeperPtr zookeeper); + /// Prohibit merges in the specified range. void disableMergesInRange(const String & part_name); @@ -340,6 +350,8 @@ public: /// Returned mutation version number is always the biggest possible. std::optional getDesiredMutationVersion(const MergeTreeData::DataPartPtr & part) const; + bool isMutationFinished(const ReplicatedMergeTreeMutationEntry & mutation) const; + private: const ReplicatedMergeTreeQueue & queue; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b0d94f66212..9538c9a36ef 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -119,6 +119,7 @@ namespace ActionLocks static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; static const auto MERGE_SELECTING_SLEEP_MS = 5 * 1000; +static const auto MUTATIONS_FINALIZING_SLEEP_MS = 1 * 1000; /** There are three places for each part, where it should be * 1. In the RAM, MergeTreeData::data_parts, all_data_parts. @@ -235,6 +236,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( /// Will be activated if we win leader election. merge_selecting_task->deactivate(); + mutations_finalizing_task = context.getSchedulePool().createTask(database_name + "." + table_name + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); }); + if (context.hasZooKeeper()) current_zookeeper = context.getZooKeeper(); @@ -307,6 +310,7 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() /// Mutations zookeeper->createIfNotExists(zookeeper_path + "/mutations", String()); + zookeeper->createIfNotExists(replica_path + "/mutation_pointer", String()); } @@ -2249,6 +2253,25 @@ void StorageReplicatedMergeTree::mergeSelectingTask() } +void StorageReplicatedMergeTree::mutationsFinalizingTask() +{ + bool needs_reschedule = false; + + try + { + needs_reschedule = queue.tryFinalizeMutations(getZooKeeper()); + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + needs_reschedule = true; + } + + if (needs_reschedule) + mutations_finalizing_task->scheduleAfter(MUTATIONS_FINALIZING_SLEEP_MS); +} + + bool StorageReplicatedMergeTree::createLogEntryToMergeParts( zkutil::ZooKeeperPtr & zookeeper, const MergeTreeData::DataPartsVector & parts, @@ -4038,7 +4061,13 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const int32_t rc = zookeeper->tryMulti(requests, responses); if (rc == ZooKeeperImpl::ZooKeeper::ZOK) + { + const String & path_created = + static_cast(responses[1].get())->path_created; + entry.znode_name = path_created.substr(path_created.find_last_of('/') + 1); + LOG_TRACE(log, "Created mutation with id " << entry.znode_name); break; + } else if (rc == ZooKeeperImpl::ZooKeeper::ZBADVERSION) { LOG_TRACE(log, "Version conflict when trying to create a mutation node, retrying..."); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 3a5593a6d5c..eac3e1d9c79 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -284,6 +284,9 @@ private: /// A task that selects parts to merge. BackgroundSchedulePool::TaskHolder merge_selecting_task; + /// A task that marks finished mutations as done. + BackgroundSchedulePool::TaskHolder mutations_finalizing_task; + /// It is acquired for each iteration of the selection of parts to merge or each OPTIMIZE query. std::mutex merge_selecting_mutex; @@ -412,6 +415,9 @@ private: */ void mergeSelectingTask(); + /// Checks if some mutations are done and marks them as done. + void mutationsFinalizingTask(); + /** Write the selected parts to merge into the log, * Call when merge_selecting_mutex is locked. * Returns false if any part is not in ZK. diff --git a/dbms/src/Storages/System/StorageSystemMutations.cpp b/dbms/src/Storages/System/StorageSystemMutations.cpp index e064c1fabb9..8caeb3a01e6 100644 --- a/dbms/src/Storages/System/StorageSystemMutations.cpp +++ b/dbms/src/Storages/System/StorageSystemMutations.cpp @@ -26,6 +26,7 @@ StorageSystemMutations::StorageSystemMutations(const std::string & name_) { "block_numbers.number", std::make_shared( std::make_shared()) }, { "parts_to_do", std::make_shared() }, + { "is_done", std::make_shared() }, })); } @@ -112,6 +113,7 @@ BlockInputStreams StorageSystemMutations::read( res_columns[col_num++]->insert(block_partition_ids); res_columns[col_num++]->insert(block_numbers); res_columns[col_num++]->insert(status.parts_to_do); + res_columns[col_num++]->insert(UInt64(status.is_done)); } } From e78dc8142cedc4c8047bf5ccf64b196e85b52965 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 21 Jun 2018 18:54:01 +0300 Subject: [PATCH 123/151] factor out common code [#CLICKHOUSE-3747] --- .../Storages/MergeTree/ActiveDataPartSet.h | 2 + .../MergeTree/ReplicatedMergeTreeQueue.cpp | 132 ++++++++---------- .../MergeTree/ReplicatedMergeTreeQueue.h | 4 + 3 files changed, 64 insertions(+), 74 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index df0dbe9b8c6..d68e18a4f40 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -66,6 +66,8 @@ public: size_t size() const; + MergeTreeDataFormatVersion getFormatVersion() const { return format_version; } + private: MergeTreeDataFormatVersion format_version; std::map part_info_to_name; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 99cb38785e0..fb73aa64bef 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -104,19 +104,7 @@ void ReplicatedMergeTreeQueue::insertUnlocked( for (const String & virtual_part_name : entry->getVirtualPartNames()) { virtual_parts.add(virtual_part_name); - - /// Update mutations status. - /// To complete each mutation with a block number bigger than part_info.getDataVersion() - /// we would need to mutate the part `virtual_part_name`. - - auto part_info = MergeTreePartInfo::fromPartName(virtual_part_name, format_version); - auto mutations_in_partition = mutations_by_partition.find(part_info.partition_id); - if (mutations_in_partition == mutations_by_partition.end()) - continue; - - auto from_it = mutations_in_partition->second.upper_bound(part_info.getDataVersion()); - for (auto it = from_it; it != mutations_in_partition->second.end(); ++it) - ++it->second->parts_to_do; + updateMutationsPartsToDo(virtual_part_name, /* add = */ true); } /// Put 'DROP PARTITION' entries at the beginning of the queue not to make superfluous fetches of parts that will be eventually deleted @@ -181,8 +169,6 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( } } - bool some_mutations_are_probably_done = false; - if (is_successful) { for (const String & virtual_part_name : entry->getVirtualPartNames()) @@ -190,25 +176,10 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( Strings replaced_parts; current_parts.add(virtual_part_name, &replaced_parts); - /// Update mutations state. /// Each part from `replaced_parts` should become Obsolete as a result of executing the entry. /// So it is one less part to mutate for each mutation with block number greater than part_info.getDataVersion() - for (const String & replaced_part_name : replaced_parts) - { - auto part_info = MergeTreePartInfo::fromPartName(replaced_part_name, format_version); - auto in_partition = mutations_by_partition.find(part_info.partition_id); - if (in_partition == mutations_by_partition.end()) - continue; - - auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); - for (auto it = from_it; it != in_partition->second.end(); ++it) - { - --it->second->parts_to_do; - if (it->second->parts_to_do <= 0) - some_mutations_are_probably_done = true; - } - } + updateMutationsPartsToDo(replaced_part_name, /* add = */ false); } String drop_range_part_name; @@ -227,24 +198,30 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( { for (const String & virtual_part_name : entry->getVirtualPartNames()) { - /// Update mutations state. /// Because execution of the entry is unsuccessful, `virtual_part_name` will never appear /// so we won't need to mutate it. - - auto part_info = MergeTreePartInfo::fromPartName(virtual_part_name, format_version); - auto in_partition = mutations_by_partition.find(part_info.partition_id); - if (in_partition == mutations_by_partition.end()) - continue; - - auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); - for (auto it = from_it; it != in_partition->second.end(); ++it) - { - --it->second->parts_to_do; - if (it->second->parts_to_do <= 0) - some_mutations_are_probably_done = true; - } + updateMutationsPartsToDo(virtual_part_name, /* add = */ false); } } +} + + +void ReplicatedMergeTreeQueue::updateMutationsPartsToDo(const String & part_name, bool add) +{ + auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + auto in_partition = mutations_by_partition.find(part_info.partition_id); + if (in_partition == mutations_by_partition.end()) + return; + + bool some_mutations_are_probably_done = false; + + auto from_it = in_partition->second.upper_bound(part_info.getDataVersion()); + for (auto it = from_it; it != in_partition->second.end(); ++it) + { + it->second->parts_to_do += (add ? +1 : -1); + if (it->second->parts_to_do <= 0) + some_mutations_are_probably_done = true; + } if (some_mutations_are_probably_done) storage.mutations_finalizing_task->schedule(); @@ -520,6 +497,32 @@ void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, z } } + +static size_t countPartsToMutate( + const ReplicatedMergeTreeMutationEntry & mutation, const ActiveDataPartSet & parts) +{ + size_t count = 0; + for (const auto & pair : mutation.block_numbers) + { + const String & partition_id = pair.first; + Int64 block_num = pair.second; + + /// Note that we cannot simply count all parts to mutate using getPartsCoveredBy(appropriate part_info) + /// because they are not consecutive in `parts`. + MergeTreePartInfo covering_part_info( + partition_id, 0, block_num, MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); + for (const String & covered_part_name : parts.getPartsCoveredBy(covering_part_info)) + { + auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, parts.getFormatVersion()); + if (part_info.getDataVersion() < block_num) + ++count; + } + } + + return count; +} + + void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, zkutil::WatchCallback watch_callback) { std::lock_guard lock(update_mutations_mutex); @@ -588,23 +591,14 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, z { const String & partition_id = pair.first; Int64 block_num = pair.second; - mutations_by_partition[partition_id].emplace(block_num, &mutation); - - /// Initialize `mutation.parts_to_do`. First we need to mutate all parts in `current_parts`. - MergeTreePartInfo covering_part_info( - partition_id, 0, block_num, MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); - for (const String & covered_part_name : current_parts.getPartsCoveredBy(covering_part_info)) - { - auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, format_version); - if (part_info.getDataVersion() < block_num) - ++mutation.parts_to_do; - } } - /// (continue initializing `mutation.parts_to_do`) And next we would need to mutate all - /// parts with getDataVersion() greater than mutation block number that would appear as - /// a result of executing the queue. + /// Initialize `mutation.parts_to_do`. First we need to mutate all parts in `current_parts`. + mutation.parts_to_do += countPartsToMutate(*entry, current_parts); + + /// And next we would need to mutate all parts with getDataVersion() greater than + /// mutation block number that would appear as a result of executing the queue. for (const auto & queue_entry : queue) { for (const String & produced_part_name : queue_entry->getVirtualPartNames()) @@ -1549,26 +1543,16 @@ bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const ReplicatedMerge return false; } } + } - size_t suddenly_appeared_parts = 0; - { - std::lock_guard lock(queue.state_mutex); - - MergeTreePartInfo covering_part_info( - partition_id, 0, block_num, MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); - for (const String & covered_part_name : queue.virtual_parts.getPartsCoveredBy(covering_part_info)) - { - auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, queue.format_version); - if (part_info.getDataVersion() < block_num) - ++suddenly_appeared_parts; - } - } + { + std::lock_guard lock(queue.state_mutex); + size_t suddenly_appeared_parts = countPartsToMutate(mutation, queue.virtual_parts); if (suddenly_appeared_parts) { LOG_TRACE(queue.log, "Mutation " << mutation.znode_name << " is not done yet because " - << "in partition ID " << partition_id << " " << suddenly_appeared_parts - << " suddenly appeared."); + << suddenly_appeared_parts << " parts to mutate suddenly appeared."); return false; } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index d380fc791a9..628bd0a9f52 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -180,6 +180,10 @@ private: std::optional & max_processed_insert_time_changed, std::unique_lock & state_lock); + /// If the new part appears (add == true) or becomes obsolete (add == false), update parts_to_do of all affected mutations. + /// Notifies storage.mutations_finalizing_task if some mutations are probably finished. + void updateMutationsPartsToDo(const String & part_name, bool add); + /// Update the insertion times in ZooKeeper. void updateTimesInZooKeeper(zkutil::ZooKeeperPtr zookeeper, std::optional min_unprocessed_insert_time_changed, From b62a6b59ee14d53fbec873239a3543de688a56e7 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 22 Jun 2018 00:51:14 +0300 Subject: [PATCH 124/151] Update libcpuid to 0.4.0 (#2544) --- .../libcpuid/include/libcpuid/amd_code_t.h | 65 ++ contrib/libcpuid/include/libcpuid/asm-bits.c | 10 +- contrib/libcpuid/include/libcpuid/config.h | 2 +- .../libcpuid/include/libcpuid/cpuid_main.c | 142 ++- .../libcpuid/include/libcpuid/intel_code_t.h | 83 ++ contrib/libcpuid/include/libcpuid/libcpuid.h | 300 +++++- .../include/libcpuid/libcpuid_constants.h | 5 +- .../include/libcpuid/libcpuid_internal.h | 63 ++ .../include/libcpuid/libcpuid_types.h | 31 + .../libcpuid/include/libcpuid/libcpuid_util.c | 29 +- .../libcpuid/include/libcpuid/libcpuid_util.h | 3 +- contrib/libcpuid/include/libcpuid/msrdriver.c | 593 +++++++++++ contrib/libcpuid/include/libcpuid/rdmsr.c | 922 ++++++++++++++++++ contrib/libcpuid/include/libcpuid/rdtsc.c | 55 +- contrib/libcpuid/include/libcpuid/recog_amd.c | 135 +-- contrib/libcpuid/include/libcpuid/recog_amd.h | 2 +- .../libcpuid/include/libcpuid/recog_intel.c | 396 +++++--- .../libcpuid/include/libcpuid/recog_intel.h | 2 +- 18 files changed, 2547 insertions(+), 291 deletions(-) create mode 100644 contrib/libcpuid/include/libcpuid/amd_code_t.h create mode 100644 contrib/libcpuid/include/libcpuid/intel_code_t.h create mode 100644 contrib/libcpuid/include/libcpuid/libcpuid_internal.h create mode 100644 contrib/libcpuid/include/libcpuid/msrdriver.c create mode 100644 contrib/libcpuid/include/libcpuid/rdmsr.c diff --git a/contrib/libcpuid/include/libcpuid/amd_code_t.h b/contrib/libcpuid/include/libcpuid/amd_code_t.h new file mode 100644 index 00000000000..2472a3d61d5 --- /dev/null +++ b/contrib/libcpuid/include/libcpuid/amd_code_t.h @@ -0,0 +1,65 @@ +/* + * Copyright 2016 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file contains a list of internal codes we use in detection. It is + * of no external use and isn't a complete list of AMD products. + */ + CODE2(OPTERON_GENERIC, 1000), + CODE(OPTERON_800), + CODE(ATHLON_XP), + CODE(ATHLON_XP_M), + CODE(ATHLON_XP_M_LV), + CODE(ATHLON), + CODE(ATHLON_MP), + CODE(MOBILE_ATHLON64), + CODE(ATHLON_FX), + CODE(DURON), + CODE(DURON_MP), + CODE(MOBILE_DURON), + CODE(MOBILE_SEMPRON), + CODE(OPTERON_SINGLE), + CODE(OPTERON_DUALCORE), + CODE(OPTERON_800_DUALCORE), + CODE(MOBILE_TURION), + CODE(ATHLON_64), + CODE(ATHLON_64_FX), + CODE(TURION_64), + CODE(TURION_X2), + CODE(SEMPRON), + CODE(M_SEMPRON), + CODE(SEMPRON_DUALCORE), + CODE(PHENOM), + CODE(PHENOM2), + CODE(ATHLON_64_X2), + CODE(ATHLON_64_X3), + CODE(ATHLON_64_X4), + CODE(FUSION_C), + CODE(FUSION_E), + CODE(FUSION_EA), + CODE(FUSION_Z), + CODE(FUSION_A), + diff --git a/contrib/libcpuid/include/libcpuid/asm-bits.c b/contrib/libcpuid/include/libcpuid/asm-bits.c index ca936f2abf5..b8e32284f57 100644 --- a/contrib/libcpuid/include/libcpuid/asm-bits.c +++ b/contrib/libcpuid/include/libcpuid/asm-bits.c @@ -75,13 +75,13 @@ int cpuid_exists_by_eflags(void) #endif /* PLATFORM_X86 */ } +#ifdef INLINE_ASM_SUPPORTED /* * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions * are implemented in separate .asm files. Otherwise, use inline assembly */ void exec_cpuid(uint32_t *regs) { -#ifdef INLINE_ASM_SUPPORTED #ifdef COMPILER_GCC # ifdef PLATFORM_X64 __asm __volatile( @@ -166,8 +166,8 @@ void exec_cpuid(uint32_t *regs) # error "Unsupported compiler" # endif /* COMPILER_MICROSOFT */ #endif -#endif /* INLINE_ASSEMBLY_SUPPORTED */ } +#endif /* INLINE_ASSEMBLY_SUPPORTED */ #ifdef INLINE_ASM_SUPPORTED void cpu_rdtsc(uint64_t* result) @@ -214,7 +214,8 @@ void busy_sse_loop(int cycles) " xorps %%xmm6, %%xmm6\n" " xorps %%xmm7, %%xmm7\n" XALIGN - ".bsLoop:\n" + /* ".bsLoop:\n" */ + "1:\n" // 0: " addps %%xmm1, %%xmm0\n" " addps %%xmm2, %%xmm1\n" @@ -505,7 +506,8 @@ void busy_sse_loop(int cycles) " addps %%xmm0, %%xmm7\n" " dec %%eax\n" - " jnz .bsLoop\n" + /* "jnz .bsLoop\n" */ + " jnz 1b\n" ::"a"(cycles) ); #else diff --git a/contrib/libcpuid/include/libcpuid/config.h b/contrib/libcpuid/include/libcpuid/config.h index 29b513c4ac1..2326cfeede5 100644 --- a/contrib/libcpuid/include/libcpuid/config.h +++ b/contrib/libcpuid/include/libcpuid/config.h @@ -1,2 +1,2 @@ /* Version number of package */ -#define VERSION "0.2.1" +#define VERSION "0.4.0" diff --git a/contrib/libcpuid/include/libcpuid/cpuid_main.c b/contrib/libcpuid/include/libcpuid/cpuid_main.c index 737aa706a37..02a7cb7ad50 100644 --- a/contrib/libcpuid/include/libcpuid/cpuid_main.c +++ b/contrib/libcpuid/include/libcpuid/cpuid_main.c @@ -24,11 +24,14 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "libcpuid.h" +#include "libcpuid_internal.h" #include "recog_intel.h" #include "recog_amd.h" #include "asm-bits.h" #include "libcpuid_util.h" +//#ifdef HAVE_CONFIG_H // CLICKHOUSE PATCH #include "config.h" +//#endif // CLICKHOUSE PATCH #include #include #include @@ -51,9 +54,9 @@ static void raw_data_t_constructor(struct cpu_raw_data_t* raw) static void cpu_id_t_constructor(struct cpu_id_t* id) { memset(id, 0, sizeof(struct cpu_id_t)); - id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = -1; - id->l1_assoc = id->l2_assoc = id->l3_assoc = -1; - id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = -1; + id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = id->l4_cache = -1; + id->l1_assoc = id->l2_assoc = id->l3_assoc = id->l4_assoc = -1; + id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = id->l4_cacheline = -1; id->sse_size = -1; } @@ -181,14 +184,26 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da }; const struct feature_map_t matchtable_ecx1[] = { { 0, CPU_FEATURE_PNI }, + { 1, CPU_FEATURE_PCLMUL }, { 3, CPU_FEATURE_MONITOR }, { 9, CPU_FEATURE_SSSE3 }, { 12, CPU_FEATURE_FMA3 }, { 13, CPU_FEATURE_CX16 }, { 19, CPU_FEATURE_SSE4_1 }, - { 21, CPU_FEATURE_X2APIC }, + { 20, CPU_FEATURE_SSE4_2 }, + { 22, CPU_FEATURE_MOVBE }, { 23, CPU_FEATURE_POPCNT }, + { 25, CPU_FEATURE_AES }, + { 26, CPU_FEATURE_XSAVE }, + { 27, CPU_FEATURE_OSXSAVE }, + { 28, CPU_FEATURE_AVX }, { 29, CPU_FEATURE_F16C }, + { 30, CPU_FEATURE_RDRAND }, + }; + const struct feature_map_t matchtable_ebx7[] = { + { 3, CPU_FEATURE_BMI1 }, + { 5, CPU_FEATURE_AVX2 }, + { 8, CPU_FEATURE_BMI2 }, }; const struct feature_map_t matchtable_edx81[] = { { 11, CPU_FEATURE_SYSCALL }, @@ -205,6 +220,9 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); } + if (raw->basic_cpuid[0][0] >= 7) { + match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); + } if (raw->ext_cpuid[0][0] >= 0x80000001) { match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); @@ -229,10 +247,10 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da } } -static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str) { - int i, j, basic, xmodel, xfamily, ext; - char brandstr[64] = {0}; + int i; + cpu_vendor_t vendor = VENDOR_UNKNOWN; const struct { cpu_vendor_t vendor; char match[16]; } matchtable[NUM_CPU_VENDORS] = { /* source: http://www.sandpile.org/ia32/cpuid.htm */ @@ -247,18 +265,27 @@ static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* dat { VENDOR_SIS , "SiS SiS SiS " }, { VENDOR_NSC , "Geode by NSC" }, }; - - memcpy(data->vendor_str + 0, &raw->basic_cpuid[0][1], 4); - memcpy(data->vendor_str + 4, &raw->basic_cpuid[0][3], 4); - memcpy(data->vendor_str + 8, &raw->basic_cpuid[0][2], 4); - data->vendor_str[12] = 0; + + memcpy(vendor_str + 0, &raw_vendor[1], 4); + memcpy(vendor_str + 4, &raw_vendor[3], 4); + memcpy(vendor_str + 8, &raw_vendor[2], 4); + vendor_str[12] = 0; + /* Determine vendor: */ - data->vendor = VENDOR_UNKNOWN; for (i = 0; i < NUM_CPU_VENDORS; i++) - if (!strcmp(data->vendor_str, matchtable[i].match)) { - data->vendor = matchtable[i].vendor; + if (!strcmp(vendor_str, matchtable[i].match)) { + vendor = matchtable[i].vendor; break; } + return vendor; +} + +static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +{ + int i, j, basic, xmodel, xfamily, ext; + char brandstr[64] = {0}; + data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str); + if (data->vendor == VENDOR_UNKNOWN) return set_error(ERR_CPU_UNKN); basic = raw->basic_cpuid[0][0]; @@ -347,7 +374,7 @@ int cpuid_get_raw_data(struct cpu_raw_data_t* data) cpu_exec_cpuid(i, data->basic_cpuid[i]); for (i = 0; i < 32; i++) cpu_exec_cpuid(0x80000000 + i, data->ext_cpuid[i]); - for (i = 0; i < 4; i++) { + for (i = 0; i < MAX_INTELFN4_LEVEL; i++) { memset(data->intel_fn4[i], 0, sizeof(data->intel_fn4[i])); data->intel_fn4[i][0] = 4; data->intel_fn4[i][2] = i; @@ -359,6 +386,18 @@ int cpuid_get_raw_data(struct cpu_raw_data_t* data) data->intel_fn11[i][2] = i; cpu_exec_cpuid_ext(data->intel_fn11[i]); } + for (i = 0; i < MAX_INTELFN12H_LEVEL; i++) { + memset(data->intel_fn12h[i], 0, sizeof(data->intel_fn12h[i])); + data->intel_fn12h[i][0] = 0x12; + data->intel_fn12h[i][2] = i; + cpu_exec_cpuid_ext(data->intel_fn12h[i]); + } + for (i = 0; i < MAX_INTELFN14H_LEVEL; i++) { + memset(data->intel_fn14h[i], 0, sizeof(data->intel_fn14h[i])); + data->intel_fn14h[i][0] = 0x14; + data->intel_fn14h[i][2] = i; + cpu_exec_cpuid_ext(data->intel_fn14h[i]); + } return set_error(ERR_OK); } @@ -390,6 +429,14 @@ int cpuid_serialize_raw_data(struct cpu_raw_data_t* data, const char* filename) fprintf(f, "intel_fn11[%d]=%08x %08x %08x %08x\n", i, data->intel_fn11[i][0], data->intel_fn11[i][1], data->intel_fn11[i][2], data->intel_fn11[i][3]); + for (i = 0; i < MAX_INTELFN12H_LEVEL; i++) + fprintf(f, "intel_fn12h[%d]=%08x %08x %08x %08x\n", i, + data->intel_fn12h[i][0], data->intel_fn12h[i][1], + data->intel_fn12h[i][2], data->intel_fn12h[i][3]); + for (i = 0; i < MAX_INTELFN14H_LEVEL; i++) + fprintf(f, "intel_fn14h[%d]=%08x %08x %08x %08x\n", i, + data->intel_fn14h[i][0], data->intel_fn14h[i][1], + data->intel_fn14h[i][2], data->intel_fn14h[i][3]); if (strcmp(filename, "")) fclose(f); @@ -434,10 +481,12 @@ int cpuid_deserialize_raw_data(struct cpu_raw_data_t* data, const char* filename recognized = 1; } syntax = 1; - syntax = syntax && parse_token("basic_cpuid", token, value, data->basic_cpuid, 32, &recognized); - syntax = syntax && parse_token("ext_cpuid", token, value, data->ext_cpuid, 32, &recognized); - syntax = syntax && parse_token("intel_fn4", token, value, data->intel_fn4, 4, &recognized); - syntax = syntax && parse_token("intel_fn11", token, value, data->intel_fn11, 4, &recognized); + syntax = syntax && parse_token("basic_cpuid", token, value, data->basic_cpuid, MAX_CPUID_LEVEL, &recognized); + syntax = syntax && parse_token("ext_cpuid", token, value, data->ext_cpuid, MAX_EXT_CPUID_LEVEL, &recognized); + syntax = syntax && parse_token("intel_fn4", token, value, data->intel_fn4, MAX_INTELFN4_LEVEL, &recognized); + syntax = syntax && parse_token("intel_fn11", token, value, data->intel_fn11, MAX_INTELFN11_LEVEL, &recognized); + syntax = syntax && parse_token("intel_fn12h", token, value, data->intel_fn12h, MAX_INTELFN12H_LEVEL, &recognized); + syntax = syntax && parse_token("intel_fn14h", token, value, data->intel_fn14h, MAX_INTELFN14H_LEVEL, &recognized); if (!syntax) { warnf("Error: %s:%d: Syntax error\n", filename, cur_line); fclose(f); @@ -453,7 +502,7 @@ int cpuid_deserialize_raw_data(struct cpu_raw_data_t* data, const char* filename return set_error(ERR_OK); } -int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) { int r; struct cpu_raw_data_t myraw; @@ -467,10 +516,10 @@ int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) return set_error(r); switch (data->vendor) { case VENDOR_INTEL: - r = cpuid_identify_intel(raw, data); + r = cpuid_identify_intel(raw, data, internal); break; case VENDOR_AMD: - r = cpuid_identify_amd(raw, data); + r = cpuid_identify_amd(raw, data, internal); break; default: break; @@ -478,6 +527,12 @@ int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) return set_error(r); } +int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +{ + struct internal_id_info_t throwaway; + return cpu_ident_internal(raw, data, &throwaway); +} + const char* cpu_feature_str(cpu_feature_t feature) { const struct { cpu_feature_t feature; const char* name; } @@ -577,6 +632,21 @@ const char* cpu_feature_str(cpu_feature_t feature) { CPU_FEATURE_PFI, "pfi" }, { CPU_FEATURE_PA, "pa" }, { CPU_FEATURE_AVX2, "avx2" }, + { CPU_FEATURE_BMI1, "bmi1" }, + { CPU_FEATURE_BMI2, "bmi2" }, + { CPU_FEATURE_HLE, "hle" }, + { CPU_FEATURE_RTM, "rtm" }, + { CPU_FEATURE_AVX512F, "avx512f" }, + { CPU_FEATURE_AVX512DQ, "avx512dq" }, + { CPU_FEATURE_AVX512PF, "avx512pf" }, + { CPU_FEATURE_AVX512ER, "avx512er" }, + { CPU_FEATURE_AVX512CD, "avx512cd" }, + { CPU_FEATURE_SHA_NI, "sha_ni" }, + { CPU_FEATURE_AVX512BW, "avx512bw" }, + { CPU_FEATURE_AVX512VL, "avx512vl" }, + { CPU_FEATURE_SGX, "sgx" }, + { CPU_FEATURE_RDSEED, "rdseed" }, + { CPU_FEATURE_ADX, "adx" }, }; unsigned i, n = COUNT_OF(matchtable); if (n != NUM_CPU_FEATURES) { @@ -600,6 +670,15 @@ const char* cpuid_error(void) { ERR_BADFMT , "Bad file format"}, { ERR_NOT_IMP , "Not implemented"}, { ERR_CPU_UNKN , "Unsupported processor"}, + { ERR_NO_RDMSR , "RDMSR instruction is not supported"}, + { ERR_NO_DRIVER, "RDMSR driver error (generic)"}, + { ERR_NO_PERMS , "No permissions to install RDMSR driver"}, + { ERR_EXTRACT , "Cannot extract RDMSR driver (read only media?)"}, + { ERR_HANDLE , "Bad handle"}, + { ERR_INVMSR , "Invalid MSR"}, + { ERR_INVCNB , "Invalid core number"}, + { ERR_HANDLE_R , "Error on handle read"}, + { ERR_INVRANGE , "Invalid given range"}, }; unsigned i; for (i = 0; i < COUNT_OF(matchtable); i++) @@ -626,6 +705,23 @@ void cpuid_set_verbosiness_level(int level) _current_verboselevel = level; } +cpu_vendor_t cpuid_get_vendor(void) +{ + static cpu_vendor_t vendor = VENDOR_UNKNOWN; + uint32_t raw_vendor[4]; + char vendor_str[VENDOR_STR_MAX]; + + if(vendor == VENDOR_UNKNOWN) { + if (!cpuid_present()) + set_error(ERR_NO_CPUID); + else { + cpu_exec_cpuid(0, raw_vendor); + vendor = cpuid_vendor_identify(raw_vendor, vendor_str); + } + } + return vendor; +} + void cpuid_get_cpu_list(cpu_vendor_t vendor, struct cpu_list_t* list) { switch (vendor) { diff --git a/contrib/libcpuid/include/libcpuid/intel_code_t.h b/contrib/libcpuid/include/libcpuid/intel_code_t.h new file mode 100644 index 00000000000..c50ec9c5a83 --- /dev/null +++ b/contrib/libcpuid/include/libcpuid/intel_code_t.h @@ -0,0 +1,83 @@ +/* + * Copyright 2016 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file contains a list of internal codes we use in detection. It is + * of no external use and isn't a complete list of intel products. + */ + CODE2(PENTIUM, 2000), + CODE(MOBILE_PENTIUM), + + CODE(XEON), + CODE(XEON_IRWIN), + CODE(XEONMP), + CODE(XEON_POTOMAC), + CODE(XEON_I7), + CODE(XEON_GAINESTOWN), + CODE(XEON_WESTMERE), + + CODE(MOBILE_PENTIUM_M), + CODE(CELERON), + CODE(MOBILE_CELERON), + CODE(NOT_CELERON), + + + CODE(CORE_SOLO), + CODE(MOBILE_CORE_SOLO), + CODE(CORE_DUO), + CODE(MOBILE_CORE_DUO), + + CODE(WOLFDALE), + CODE(MEROM), + CODE(PENRYN), + CODE(QUAD_CORE), + CODE(DUAL_CORE_HT), + CODE(QUAD_CORE_HT), + CODE(MORE_THAN_QUADCORE), + CODE(PENTIUM_D), + + CODE(ATOM_UNKNOWN), + CODE(ATOM_SILVERTHORNE), + CODE(ATOM_DIAMONDVILLE), + CODE(ATOM_PINEVIEW), + CODE(ATOM_CEDARVIEW), + + CODE(CORE_I3), + CODE(CORE_I5), + CODE(CORE_I7), + CODE(CORE_IVY3), /* 22nm Core-iX */ + CODE(CORE_IVY5), + CODE(CORE_IVY7), + CODE(CORE_HASWELL3), /* 22nm Core-iX, Haswell */ + CODE(CORE_HASWELL5), + CODE(CORE_HASWELL7), + CODE(CORE_BROADWELL3), /* 14nm Core-iX, Broadwell */ + CODE(CORE_BROADWELL5), + CODE(CORE_BROADWELL7), + CODE(CORE_SKYLAKE3), /* 14nm Core-iX, Skylake */ + CODE(CORE_SKYLAKE5), + CODE(CORE_SKYLAKE7), + diff --git a/contrib/libcpuid/include/libcpuid/libcpuid.h b/contrib/libcpuid/include/libcpuid/libcpuid.h index b78b0d6f514..866c0e8441d 100644 --- a/contrib/libcpuid/include/libcpuid/libcpuid.h +++ b/contrib/libcpuid/include/libcpuid/libcpuid.h @@ -26,25 +26,37 @@ #ifndef __LIBCPUID_H__ #define __LIBCPUID_H__ /** - * @File libcpuid.h - * @Author Veselin Georgiev - * @Date Oct 2008 - * @Version 0.2.1 + * \file libcpuid.h + * \author Veselin Georgiev + * \date Oct 2008 + * \version 0.4.0 * * Version history: * - * 0.1.0 (2008-10-15): initial adaptation from wxfractgui sources - * 0.1.1 (2009-07-06): Added intel_fn11 fields to cpu_raw_data_t to handle - * new processor topology enumeration required on Core i7 - * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting - * kernel driver on Win32. - * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock - * measurements with cpu_clock_by_ic() - * 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size - * checking. A backwards-incompatible change, since the - * sizeof cpu_id_t is now different. - * 0.2.1 (2012-05-26): Support for Ivy Bridge, and detecting the presence of - * the RdRand instruction. + * * 0.1.0 (2008-10-15): initial adaptation from wxfractgui sources + * * 0.1.1 (2009-07-06): Added intel_fn11 fields to cpu_raw_data_t to handle + * new processor topology enumeration required on Core i7 + * * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting + * kernel driver on Win32. + * * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock + * measurements with cpu_clock_by_ic() + * * 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size + * checking. A backwards-incompatible change, since the + * sizeof cpu_id_t is now different. + * * 0.2.1 (2012-05-26): Support for Ivy Bridge, and detecting the presence of + * the RdRand instruction. + * * 0.2.2 (2015-11-04): Support for newer processors up to Haswell and Vishera. + * Fix clock detection in cpu_clock_by_ic() for Bulldozer. + * More entries supported in cpu_msrinfo(). + * *BSD and Solaris support (unofficial). + * * 0.3.0 (2016-07-09): Support for Skylake; MSR ops in FreeBSD; INFO_VOLTAGE + * for AMD CPUs. Level 4 cache support for Crystalwell + * (a backwards-incompatible change since the sizeof + * cpu_raw_data_t is now different). + * * 0.4.0 (2016-09-30): Better detection of AMD clock multiplier with msrinfo. + * Support for Intel SGX detection + * (a backwards-incompatible change since the sizeof + * cpu_raw_data_t and cpu_id_t is now different). */ /** @mainpage A simple libcpuid introduction @@ -57,6 +69,15 @@ * To fetch the CPUID info needed for CPU identification, use * \ref cpuid_get_raw_data
* To make sense of that data (decode, extract features), use \ref cpu_identify
+ * To detect the CPU speed, use either \ref cpu_clock, \ref cpu_clock_by_os, + * \ref cpu_tsc_mark + \ref cpu_tsc_unmark + \ref cpu_clock_by_mark, + * \ref cpu_clock_measure or \ref cpu_clock_by_ic. + * Read carefully for pros/cons of each method.
+ * + * To read MSRs, use \ref cpu_msr_driver_open to get a handle, and then + * \ref cpu_rdmsr for querying abilities. Some MSR decoding is available on recent + * CPUs, and can be queried through \ref cpu_msrinfo; the various types of queries + * are described in \ref cpu_msrinfo_request_t. *

*/ @@ -116,6 +137,81 @@ struct cpu_raw_data_t { enumeration leaf), this stores the result of CPUID with eax = 11 and ecx = 0, 1, 2... */ uint32_t intel_fn11[MAX_INTELFN11_LEVEL][4]; + + /** when the CPU is intel and supports leaf 12h (SGX enumeration leaf), + * this stores the result of CPUID with eax = 0x12 and + * ecx = 0, 1, 2... */ + uint32_t intel_fn12h[MAX_INTELFN12H_LEVEL][4]; + + /** when the CPU is intel and supports leaf 14h (Intel Processor Trace + * capabilities leaf). + * this stores the result of CPUID with eax = 0x12 and + * ecx = 0, 1, 2... */ + uint32_t intel_fn14h[MAX_INTELFN14H_LEVEL][4]; +}; + +/** + * @brief This contains information about SGX features of the processor + * Example usage: + * @code + * ... + * struct cpu_raw_data_t raw; + * struct cpu_id_t id; + * + * if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0 && id.sgx.present) { + * printf("SGX is present.\n"); + * printf("SGX1 instructions: %s.\n", id.sgx.flags[INTEL_SGX1] ? "present" : "absent"); + * printf("SGX2 instructions: %s.\n", id.sgx.flags[INTEL_SGX2] ? "present" : "absent"); + * printf("Max 32-bit enclave size: 2^%d bytes.\n", id.sgx.max_enclave_32bit); + * printf("Max 64-bit enclave size: 2^%d bytes.\n", id.sgx.max_enclave_64bit); + * for (int i = 0; i < id.sgx.num_epc_sections; i++) { + * struct cpu_epc_t epc = cpuid_get_epc(i, NULL); + * printf("EPC section #%d: address = %x, size = %d bytes.\n", epc.address, epc.size); + * } + * } else { + * printf("SGX is not present.\n"); + * } + * @endcode + */ +struct cpu_sgx_t { + /** Whether SGX is present (boolean) */ + uint32_t present; + + /** Max enclave size in 32-bit mode. This is a power-of-two value: + * if it is "31", then the max enclave size is 2^31 bytes (2 GiB). + */ + uint8_t max_enclave_32bit; + + /** Max enclave size in 64-bit mode. This is a power-of-two value: + * if it is "36", then the max enclave size is 2^36 bytes (64 GiB). + */ + uint8_t max_enclave_64bit; + + /** + * contains SGX feature flags. See the \ref cpu_sgx_feature_t + * "INTEL_SGX*" macros below. + */ + uint8_t flags[SGX_FLAGS_MAX]; + + /** number of Enclave Page Cache (EPC) sections. Info for each + * section is available through the \ref cpuid_get_epc() function + */ + int num_epc_sections; + + /** bit vector of the supported extended features that can be written + * to the MISC region of the SSA (Save State Area) + */ + uint32_t misc_select; + + /** a bit vector of the attributes that can be set to SECS.ATTRIBUTES + * via ECREATE. Corresponds to bits 0-63 (incl.) of SECS.ATTRIBUTES. + */ + uint64_t secs_attributes; + + /** a bit vector of the bits that can be set in the XSAVE feature + * request mask; Corresponds to bits 64-127 of SECS.ATTRIBUTES. + */ + uint64_t secs_xfrm; }; /** @@ -133,7 +229,8 @@ struct cpu_id_t { /** * contain CPU flags. Used to test for features. See - * the CPU_FEATURE_* macros below. @see Features + * the \ref cpu_feature_t "CPU_FEATURE_*" macros below. + * @see Features */ uint8_t flags[CPU_FLAGS_MAX]; @@ -164,11 +261,17 @@ struct cpu_id_t { /** * The total number of logical processors. + * The same value is availabe through \ref cpuid_get_total_cpus. * * This is num_logical_cpus * {total physical processors in the system} + * (but only on a real system, under a VM this number may be lower). * * If you're writing a multithreaded program and you want to run it on * all CPUs, this is the number of threads you need. + * + * @note in a VM, this will exactly match the number of CPUs set in + * the VM's configuration. + * */ int32_t total_logical_cpus; @@ -194,6 +297,9 @@ struct cpu_id_t { /** L3 cache size in KB. Zero on most systems */ int32_t l3_cache; + + /** L4 cache size in KB. Zero on most systems */ + int32_t l4_cache; /** Cache associativity for the L1 data cache. -1 if undetermined */ int32_t l1_assoc; @@ -203,6 +309,9 @@ struct cpu_id_t { /** Cache associativity for the L3 cache. -1 if undetermined */ int32_t l3_assoc; + + /** Cache associativity for the L4 cache. -1 if undetermined */ + int32_t l4_assoc; /** Cache-line size for L1 data cache. -1 if undetermined */ int32_t l1_cacheline; @@ -213,6 +322,9 @@ struct cpu_id_t { /** Cache-line size for L3 cache. -1 if undetermined */ int32_t l3_cacheline; + /** Cache-line size for L4 cache. -1 if undetermined */ + int32_t l4_cacheline; + /** * The brief and human-friendly CPU codename, which was recognized.
* Examples: @@ -234,9 +346,13 @@ struct cpu_id_t { /** * contain miscellaneous detection information. Used to test about specifics of - * certain detected features. See CPU_HINT_* macros below. @see Hints + * certain detected features. See \ref cpu_hint_t "CPU_HINT_*" macros below. + * @see Hints */ uint8_t detection_hints[CPU_HINTS_MAX]; + + /** contains information about SGX features if the processor, if present */ + struct cpu_sgx_t sgx; }; /** @@ -355,6 +471,21 @@ typedef enum { CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */ CPU_FEATURE_PA, /*!< Processor accumulator */ CPU_FEATURE_AVX2, /*!< AVX2 instructions */ + CPU_FEATURE_BMI1, /*!< BMI1 instructions */ + CPU_FEATURE_BMI2, /*!< BMI2 instructions */ + CPU_FEATURE_HLE, /*!< Hardware Lock Elision prefixes */ + CPU_FEATURE_RTM, /*!< Restricted Transactional Memory instructions */ + CPU_FEATURE_AVX512F, /*!< AVX-512 Foundation */ + CPU_FEATURE_AVX512DQ, /*!< AVX-512 Double/Quad granular insns */ + CPU_FEATURE_AVX512PF, /*!< AVX-512 Prefetch */ + CPU_FEATURE_AVX512ER, /*!< AVX-512 Exponential/Reciprocal */ + CPU_FEATURE_AVX512CD, /*!< AVX-512 Conflict detection */ + CPU_FEATURE_SHA_NI, /*!< SHA-1/SHA-256 instructions */ + CPU_FEATURE_AVX512BW, /*!< AVX-512 Byte/Word granular insns */ + CPU_FEATURE_AVX512VL, /*!< AVX-512 128/256 vector length extensions */ + CPU_FEATURE_SGX, /*!< SGX extensions. Non-autoritative, check cpu_id_t::sgx::present to verify presence */ + CPU_FEATURE_RDSEED, /*!< RDSEED instruction */ + CPU_FEATURE_ADX, /*!< ADX extensions (arbitrary precision) */ /* termination: */ NUM_CPU_FEATURES, } cpu_feature_t; @@ -370,6 +501,36 @@ typedef enum { NUM_CPU_HINTS, } cpu_hint_t; +/** + * @brief SGX features flags + * \see cpu_sgx_t + * + * Usage: + * @code + * ... + * struct cpu_raw_data_t raw; + * struct cpu_id_t id; + * if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0 && id.sgx.present) { + * if (id.sgx.flags[INTEL_SGX1]) + * // The CPU has SGX1 instructions support... + * ... + * } else { + * // no SGX + * } + * } else { + * // processor cannot be determined. + * } + * @endcode + */ + +typedef enum { + INTEL_SGX1, /*!< SGX1 instructions support */ + INTEL_SGX2, /*!< SGX2 instructions support */ + + /* termination: */ + NUM_SGX_FEATURES, +} cpu_sgx_feature_t; + /** * @brief Describes common library error codes */ @@ -387,7 +548,10 @@ typedef enum { ERR_NO_PERMS = -10, /*!< "No permissions to install RDMSR driver" */ ERR_EXTRACT = -11, /*!< "Cannot extract RDMSR driver (read only media?)" */ ERR_HANDLE = -12, /*!< "Bad handle" */ - ERR_INVMSR = -13, /*!< "Invalid MSR" */ + ERR_INVMSR = -13, /*!< "Invalid MSR" */ + ERR_INVCNB = -14, /*!< "Invalid core number" */ + ERR_HANDLE_R = -15, /*!< "Error on handle read" */ + ERR_INVRANGE = -16, /*!< "Invalid given range" */ } cpu_error_t; /** @@ -400,8 +564,14 @@ struct cpu_mark_t { }; /** - * @brief Returns the total number of CPUs even if CPUID is not present - * @retval Number of CPUs available + * @brief Returns the total number of logical CPU threads (even if CPUID is not present). + * + * Under VM, this number (and total_logical_cpus, since they are fetched with the same code) + * may be nonsensical, i.e. might not equal NumPhysicalCPUs*NumCoresPerCPU*HyperThreading. + * This is because no matter how many logical threads the host machine has, you may limit them + * in the VM to any number you like. **This** is the number returned by cpuid_get_total_cpus(). + * + * @returns Number of logical CPU threads available. Equals the \ref cpu_id_t::total_logical_cpus. */ int cpuid_get_total_cpus(void); @@ -713,6 +883,32 @@ int cpu_clock_by_ic(int millis, int runs); */ int cpu_clock(void); + +/** + * @brief The return value of cpuid_get_epc(). + * @details + * Describes an EPC (Enclave Page Cache) layout (physical address and size). + * A CPU may have one or more EPC areas, and information about each is + * fetched via \ref cpuid_get_epc. + */ +struct cpu_epc_t { + uint64_t start_addr; + uint64_t length; +}; + +/** + * @brief Fetches information about an EPC (Enclave Page Cache) area. + * @param index - zero-based index, valid range [0..cpu_id_t.egx.num_epc_sections) + * @param raw - a pointer to fetched raw CPUID data. Needed only for testing, + * you can safely pass NULL here (if you pass a real structure, + * it will be used for fetching the leaf 12h data if index < 2; + * otherwise the real CPUID instruction will be used). + * @returns the requested data. If the CPU doesn't support SGX, or if + * index >= cpu_id_t.egx.num_epc_sections, both fields of the returned + * structure will be zeros. + */ +struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw); + /** * @brief Returns the libcpuid version * @@ -750,6 +946,14 @@ libcpuid_warn_fn_t cpuid_set_warn_function(libcpuid_warn_fn_t warn_fun); void cpuid_set_verbosiness_level(int level); +/** + * @brief Obtains the CPU vendor from CPUID from the current CPU + * @note The result is cached. + * @returns VENDOR_UNKNOWN if failed, otherwise the CPU vendor type. + * @see cpu_vendor_t + */ +cpu_vendor_t cpuid_get_vendor(void); + /** * @brief a structure that holds a list of processor names */ @@ -788,6 +992,7 @@ void cpuid_get_cpu_list(cpu_vendor_t vendor, struct cpu_list_t* list); */ void cpuid_free_cpu_list(struct cpu_list_t* list); +struct msr_driver_t; /** * @brief Starts/opens a driver, needed to read MSRs (Model Specific Registers) * @@ -799,9 +1004,23 @@ void cpuid_free_cpu_list(struct cpu_list_t* list); * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ -struct msr_driver_t; struct msr_driver_t* cpu_msr_driver_open(void); +/** + * @brief Similar to \ref cpu_msr_driver_open, but accept one parameter + * + * This function works on certain operating systems (GNU/Linux, FreeBSD) + * + * @param core_num specify the core number for MSR. + * The first core number is 0. + * The last core number is \ref cpuid_get_total_cpus - 1. + * + * @returns a handle to the driver on success, and NULL on error. + * The error message can be obtained by calling \ref cpuid_error. + * @see cpu_error_t + */ +struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num); + /** * @brief Reads a Model-Specific Register (MSR) * @@ -826,26 +1045,51 @@ struct msr_driver_t* cpu_msr_driver_open(void); * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ -int cpu_rdmsr(struct msr_driver_t* handle, int msr_index, uint64_t* result); +int cpu_rdmsr(struct msr_driver_t* handle, uint32_t msr_index, uint64_t* result); typedef enum { INFO_MPERF, /*!< Maximum performance frequency clock. This is a counter, which increments as a - proportion of the actual processor speed */ + proportion of the actual processor speed. */ INFO_APERF, /*!< Actual performance frequency clock. This accumulates the core clock counts when the core is active. */ + INFO_MIN_MULTIPLIER, /*!< Minimum CPU:FSB ratio for this CPU, + multiplied by 100. */ INFO_CUR_MULTIPLIER, /*!< Current CPU:FSB ratio, multiplied by 100. e.g., a CPU:FSB value of 18.5 reads as - 1850. */ - INFO_MAX_MULTIPLIER, /*!< Maxumum CPU:FSB ratio for this CPU, - multiplied by 100 */ - INFO_TEMPERATURE, /*!< The current core temperature in Celsius */ + "1850". */ + INFO_MAX_MULTIPLIER, /*!< Maximum CPU:FSB ratio for this CPU, + multiplied by 100. */ + INFO_TEMPERATURE, /*!< The current core temperature in Celsius. */ INFO_THROTTLING, /*!< 1 if the current logical processor is throttling. 0 if it is running normally. */ + INFO_VOLTAGE, /*!< The current core voltage in Volt, + multiplied by 100. */ + INFO_BCLK, /*!< See \ref INFO_BUS_CLOCK. */ + INFO_BUS_CLOCK, /*!< The main bus clock in MHz, + e.g., FSB/QPI/DMI/HT base clock, + multiplied by 100. */ } cpu_msrinfo_request_t; +/** + * @brief Similar to \ref cpu_rdmsr, but extract a range of bits + * + * @param handle - a handle to the MSR reader driver, as created by + * cpu_msr_driver_open + * @param msr_index - the numeric ID of the MSR you want to read + * @param highbit - the high bit in range, must be inferior to 64 + * @param lowbit - the low bit in range, must be equal or superior to 0 + * @param result - a pointer to a 64-bit integer, where the MSR value is stored + * + * @returns zero if successful, and some negative number on error. + * The error message can be obtained by calling \ref cpuid_error. + * @see cpu_error_t + */ +int cpu_rdmsr_range(struct msr_driver_t* handle, uint32_t msr_index, uint8_t highbit, + uint8_t lowbit, uint64_t* result); + /** * @brief Reads extended CPU information from Model-Specific Registers. * @param handle - a handle to an open MSR driver, @see cpu_msr_driver_open diff --git a/contrib/libcpuid/include/libcpuid/libcpuid_constants.h b/contrib/libcpuid/include/libcpuid/libcpuid_constants.h index 8af4718c906..3ddb6d5e14e 100644 --- a/contrib/libcpuid/include/libcpuid/libcpuid_constants.h +++ b/contrib/libcpuid/include/libcpuid/libcpuid_constants.h @@ -37,8 +37,11 @@ #define CPU_FLAGS_MAX 128 #define MAX_CPUID_LEVEL 32 #define MAX_EXT_CPUID_LEVEL 32 -#define MAX_INTELFN4_LEVEL 4 +#define MAX_INTELFN4_LEVEL 8 #define MAX_INTELFN11_LEVEL 4 +#define MAX_INTELFN12H_LEVEL 4 +#define MAX_INTELFN14H_LEVEL 4 #define CPU_HINTS_MAX 16 +#define SGX_FLAGS_MAX 14 #endif /* __LIBCPUID_CONSTANTS_H__ */ diff --git a/contrib/libcpuid/include/libcpuid/libcpuid_internal.h b/contrib/libcpuid/include/libcpuid/libcpuid_internal.h new file mode 100644 index 00000000000..95528896219 --- /dev/null +++ b/contrib/libcpuid/include/libcpuid/libcpuid_internal.h @@ -0,0 +1,63 @@ +/* + * Copyright 2016 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __LIBCPUID_INTERNAL_H__ +#define __LIBCPUID_INTERNAL_H__ +/* + * This file contains internal undocumented declarations and function prototypes + * for the workings of the internal library infrastructure. + */ + +enum _common_codes_t { + NA = 0, + NO_CODE, +}; + +#define CODE(x) x +#define CODE2(x, y) x = y +enum _amd_code_t { + #include "amd_code_t.h" +}; +typedef enum _amd_code_t amd_code_t; + +enum _intel_code_t { + #include "intel_code_t.h" +}; +typedef enum _intel_code_t intel_code_t; +#undef CODE +#undef CODE2 + +struct internal_id_info_t { + union { + amd_code_t amd; + intel_code_t intel; + } code; + int score; // detection (matchtable) score +}; + +int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, + struct internal_id_info_t* internal); + +#endif /* __LIBCPUID_INTERNAL_H__ */ diff --git a/contrib/libcpuid/include/libcpuid/libcpuid_types.h b/contrib/libcpuid/include/libcpuid/libcpuid_types.h index 6dd18a97724..f1181edf2ee 100644 --- a/contrib/libcpuid/include/libcpuid/libcpuid_types.h +++ b/contrib/libcpuid/include/libcpuid/libcpuid_types.h @@ -32,6 +32,37 @@ #ifndef __LIBCPUID_TYPES_H__ #define __LIBCPUID_TYPES_H__ +//#ifdef HAVE_CONFIG_H // CLICKHOUSE PATCH +# include "config.h" +//#endif // CLICKHOUSE PATCH + +#if 1 // CLICKHOUSE PATCH +//#if defined(HAVE_STDINT_H) // CLICKHOUSE PATCH # include +#else +/* we have to provide our own: */ +# if !defined(HAVE_INT32_T) && !defined(__int32_t_defined) +typedef int int32_t; +# endif + +# if !defined(HAVE_UINT32_T) && !defined(__uint32_t_defined) +typedef unsigned uint32_t; +# endif + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +#if (defined _MSC_VER) && (_MSC_VER <= 1300) + /* MSVC 6.0: no long longs ... */ + typedef signed __int64 int64_t; + typedef unsigned __int64 uint64_t; +#else + /* all other sane compilers: */ + typedef signed long long int64_t; + typedef unsigned long long uint64_t; +#endif + +#endif #endif /* __LIBCPUID_TYPES_H__ */ diff --git a/contrib/libcpuid/include/libcpuid/libcpuid_util.c b/contrib/libcpuid/include/libcpuid/libcpuid_util.c index 36e83c84b4a..ea6b1b8dfb4 100644 --- a/contrib/libcpuid/include/libcpuid/libcpuid_util.c +++ b/contrib/libcpuid/include/libcpuid/libcpuid_util.c @@ -38,7 +38,7 @@ void match_features(const struct feature_map_t* matchtable, int count, uint32_t { int i; for (i = 0; i < count; i++) - if (reg & (1U << matchtable[i].bit)) + if (reg & (1u << matchtable[i].bit)) data->flags[matchtable[i].feature] = 1; } @@ -78,20 +78,20 @@ static int score(const struct match_entry_t* entry, const struct cpu_id_t* data, int brand_code, int model_code) { int res = 0; - if (entry->family == data->family ) res++; - if (entry->model == data->model ) res++; - if (entry->stepping == data->stepping ) res++; - if (entry->ext_family == data->ext_family) res++; - if (entry->ext_model == data->ext_model ) res++; - if (entry->ncores == data->num_cores ) res++; - if (entry->l2cache == data->l2_cache ) res++; - if (entry->l3cache == data->l3_cache ) res++; - if (entry->brand_code == brand_code ) res++; - if (entry->model_code == model_code ) res++; + if (entry->family == data->family ) res += 2; + if (entry->model == data->model ) res += 2; + if (entry->stepping == data->stepping ) res += 2; + if (entry->ext_family == data->ext_family) res += 2; + if (entry->ext_model == data->ext_model ) res += 2; + if (entry->ncores == data->num_cores ) res += 2; + if (entry->l2cache == data->l2_cache ) res += 1; + if (entry->l3cache == data->l3_cache ) res += 1; + if (entry->brand_code == brand_code ) res += 2; + if (entry->model_code == model_code ) res += 2; return res; } -void match_cpu_codename(const struct match_entry_t* matchtable, int count, +int match_cpu_codename(const struct match_entry_t* matchtable, int count, struct cpu_id_t* data, int brand_code, int model_code) { int bestscore = -1; @@ -112,6 +112,7 @@ void match_cpu_codename(const struct match_entry_t* matchtable, int count, } } strcpy(data->cpu_codename, matchtable[bestindex].name); + return bestscore; } void generic_get_cpu_list(const struct match_entry_t* matchtable, int count, @@ -129,7 +130,11 @@ void generic_get_cpu_list(const struct match_entry_t* matchtable, int count, break; } if (!good) continue; +#if defined(_MSC_VER) + list->names[n++] = _strdup(matchtable[i].name); +#else list->names[n++] = strdup(matchtable[i].name); +#endif } list->num_entries = n; } diff --git a/contrib/libcpuid/include/libcpuid/libcpuid_util.h b/contrib/libcpuid/include/libcpuid/libcpuid_util.h index 34e1efe960a..22becea26f6 100644 --- a/contrib/libcpuid/include/libcpuid/libcpuid_util.h +++ b/contrib/libcpuid/include/libcpuid/libcpuid_util.h @@ -42,7 +42,8 @@ struct match_entry_t { char name[32]; }; -void match_cpu_codename(const struct match_entry_t* matchtable, int count, +// returns the match score: +int match_cpu_codename(const struct match_entry_t* matchtable, int count, struct cpu_id_t* data, int brand_code, int model_code); void warnf(const char* format, ...) diff --git a/contrib/libcpuid/include/libcpuid/msrdriver.c b/contrib/libcpuid/include/libcpuid/msrdriver.c new file mode 100644 index 00000000000..8f9d7ed0ca8 --- /dev/null +++ b/contrib/libcpuid/include/libcpuid/msrdriver.c @@ -0,0 +1,593 @@ +/* + * Copyright 2009 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @File msrdriver.c + * @Brief Contains the binary images of the x86 and x64 MSR drivers for Windows + * @Date 2009-09-29 + * + * The driver is courtesy of Nick 'Bombera' Gabareff, and its source is actually + * available, see the contrib/ dir. + * + * However, for simplicity, here we just include the images of the compiled .SYS + * files. + * They are extracted to the filesystem on demand and loaded in the kernel + * by the cpu_msr_driver_open() function + */ +#ifdef _WIN32 +#include "asm-bits.h" +//begin { +int cc_x86driver_code_size = 4608; +uint8_t cc_x86driver_code[4608] = { + 0x4d,0x5a,0x90,0x00,0x03,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0xff,0xff,0x00,0x00,0xb8,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0xc8,0x00,0x00,0x00,0x0e,0x1f,0xba,0x0e,0x00,0xb4,0x09,0xcd, + 0x21,0xb8,0x01,0x4c,0xcd,0x21,0x54,0x68,0x69,0x73,0x20,0x70,0x72,0x6f,0x67,0x72,0x61,0x6d, + 0x20,0x63,0x61,0x6e,0x6e,0x6f,0x74,0x20,0x62,0x65,0x20,0x72,0x75,0x6e,0x20,0x69,0x6e,0x20, + 0x44,0x4f,0x53,0x20,0x6d,0x6f,0x64,0x65,0x2e,0x0d,0x0d,0x0a,0x24,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x9f,0x99,0x48,0xdf,0xdb,0xf8,0x26,0x8c,0xdb,0xf8,0x26,0x8c,0xdb,0xf8,0x26,0x8c, + 0xdb,0xf8,0x27,0x8c,0xdd,0xf8,0x26,0x8c,0x21,0xdb,0x3f,0x8c,0xd8,0xf8,0x26,0x8c,0xfc,0x3e, + 0x57,0x8c,0xda,0xf8,0x26,0x8c,0xfc,0x3e,0x5a,0x8c,0xda,0xf8,0x26,0x8c,0xfc,0x3e,0x5e,0x8c, + 0xda,0xf8,0x26,0x8c,0x52,0x69,0x63,0x68,0xdb,0xf8,0x26,0x8c,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x50,0x45,0x00,0x00,0x4c,0x01,0x07,0x00,0x12,0x9b,0x9b,0x4a,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xe0,0x00,0x02,0x21,0x0b,0x01,0x08,0x00,0x00,0x06,0x00,0x00,0x00,0x0a, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00, + 0x00,0x00,0x01,0x00,0x00,0x10,0x00,0x00,0x00,0x02,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x04,0x00,0x00, + 0xa9,0xd1,0x00,0x00,0x01,0x00,0x00,0x04,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x00, + 0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x50,0x00,0x00,0x28,0x00,0x00,0x00,0x00,0x60,0x00,0x00,0xc0,0x03, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x70,0x00,0x00,0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x20,0x00,0x00,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e,0x74, + 0x65,0x78,0x74,0x00,0x00,0x00,0xa3,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x02,0x00,0x00, + 0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00, + 0x00,0x68,0x2e,0x72,0x64,0x61,0x74,0x61,0x00,0x00,0x62,0x00,0x00,0x00,0x00,0x20,0x00,0x00, + 0x00,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x40,0x00,0x00,0x48,0x2e,0x64,0x61,0x74,0x61,0x00,0x00,0x00,0x04,0x00,0x00,0x00, + 0x00,0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0xc8,0x50,0x41,0x47,0x45,0x30,0x44,0x45,0x46, + 0x8c,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x60,0x49,0x4e,0x49,0x54, + 0x00,0x00,0x00,0x00,0xd4,0x00,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x0a, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0xe2, + 0x2e,0x72,0x73,0x72,0x63,0x00,0x00,0x00,0xc0,0x03,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x04, + 0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x40,0x00,0x00,0x42,0x2e,0x72,0x65,0x6c,0x6f,0x63,0x00,0x00,0x68,0x00,0x00,0x00,0x00,0x70, + 0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x42,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x8b,0x4c, + 0x24,0x08,0x83,0x61,0x18,0x00,0x83,0x61,0x1c,0x00,0x32,0xd2,0xff,0x15,0x08,0x20,0x01,0x00, + 0x33,0xc0,0xc2,0x08,0x00,0x56,0x8b,0x74,0x24,0x0c,0x8b,0x46,0x60,0x81,0x78,0x0c,0x0c,0xe0, + 0x22,0x00,0x57,0x75,0x3c,0x83,0x78,0x04,0x08,0x72,0x36,0x83,0x78,0x08,0x04,0x75,0x07,0x8b, + 0x46,0x0c,0x8b,0x08,0xeb,0x05,0xb9,0x9c,0x01,0x00,0x00,0x8b,0x7e,0x0c,0x0f,0x32,0x89,0x07, + 0x89,0x57,0x04,0xc7,0x46,0x1c,0x08,0x00,0x00,0x00,0x33,0xff,0x32,0xd2,0x8b,0xce,0xff,0x15, + 0x08,0x20,0x01,0x00,0x8b,0xc7,0x5f,0x5e,0xc2,0x08,0x00,0x83,0x66,0x1c,0x00,0xbf,0x01,0x00, + 0x00,0xc0,0x89,0x7e,0x18,0xeb,0xe1,0x55,0x8b,0xec,0x51,0x51,0x8b,0x45,0x08,0xff,0x70,0x04, + 0xff,0x15,0x04,0x20,0x01,0x00,0x68,0x3c,0x20,0x01,0x00,0x8d,0x45,0xf8,0x50,0xff,0x15,0x00, + 0x20,0x01,0x00,0x8d,0x45,0xf8,0x50,0xff,0x15,0x14,0x20,0x01,0x00,0xc9,0xc2,0x04,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x5c,0x50,0x00,0x00,0x74,0x50,0x00,0x00,0x86,0x50,0x00,0x00, + 0x9c,0x50,0x00,0x00,0xb4,0x50,0x00,0x00,0x44,0x50,0x00,0x00,0x00,0x00,0x00,0x00,0x5c,0x00, + 0x44,0x00,0x65,0x00,0x76,0x00,0x69,0x00,0x63,0x00,0x65,0x00,0x5c,0x00,0x54,0x00,0x6d,0x00, + 0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00,0x00,0x00,0x00,0x00,0x5c,0x00,0x44,0x00,0x6f,0x00, + 0x73,0x00,0x44,0x00,0x65,0x00,0x76,0x00,0x69,0x00,0x63,0x00,0x65,0x00,0x73,0x00,0x5c,0x00, + 0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x55,0x8b,0xec,0x83, + 0xec,0x14,0x56,0x8b,0x35,0x00,0x20,0x01,0x00,0x57,0x68,0x1c,0x20,0x01,0x00,0x8d,0x45,0xf4, + 0x50,0xff,0xd6,0x8b,0x7d,0x08,0x8d,0x45,0xfc,0x50,0x6a,0x00,0x6a,0x00,0x6a,0x22,0x8d,0x45, + 0xf4,0x50,0x6a,0x04,0x57,0xff,0x15,0x10,0x20,0x01,0x00,0x85,0xc0,0x75,0x4f,0x68,0x3c,0x20, + 0x01,0x00,0x8d,0x45,0xec,0x50,0xff,0xd6,0x8d,0x45,0xf4,0x50,0x8d,0x45,0xec,0x50,0xff,0x15, + 0x0c,0x20,0x01,0x00,0x8b,0xf0,0x85,0xf6,0x74,0x0d,0xff,0x75,0xfc,0xff,0x15,0x04,0x20,0x01, + 0x00,0x8b,0xc6,0xeb,0x23,0x8b,0x45,0xfc,0xa3,0x00,0x30,0x01,0x00,0xb8,0x00,0x10,0x01,0x00, + 0x89,0x47,0x38,0x89,0x47,0x40,0xc7,0x47,0x34,0x75,0x10,0x01,0x00,0xc7,0x47,0x70,0x19,0x10, + 0x01,0x00,0x33,0xc0,0x5f,0x5e,0xc9,0xc2,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x28,0x50,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc6,0x50, + 0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x5c,0x50,0x00,0x00,0x74,0x50,0x00,0x00,0x86,0x50, + 0x00,0x00,0x9c,0x50,0x00,0x00,0xb4,0x50,0x00,0x00,0x44,0x50,0x00,0x00,0x00,0x00,0x00,0x00, + 0x4b,0x01,0x49,0x6f,0x44,0x65,0x6c,0x65,0x74,0x65,0x53,0x79,0x6d,0x62,0x6f,0x6c,0x69,0x63, + 0x4c,0x69,0x6e,0x6b,0x00,0x00,0x0b,0x04,0x52,0x74,0x6c,0x49,0x6e,0x69,0x74,0x55,0x6e,0x69, + 0x63,0x6f,0x64,0x65,0x53,0x74,0x72,0x69,0x6e,0x67,0x00,0x00,0x49,0x01,0x49,0x6f,0x44,0x65, + 0x6c,0x65,0x74,0x65,0x44,0x65,0x76,0x69,0x63,0x65,0x00,0x00,0xda,0x01,0x49,0x6f,0x66,0x43, + 0x6f,0x6d,0x70,0x6c,0x65,0x74,0x65,0x52,0x65,0x71,0x75,0x65,0x73,0x74,0x00,0x00,0x41,0x01, + 0x49,0x6f,0x43,0x72,0x65,0x61,0x74,0x65,0x53,0x79,0x6d,0x62,0x6f,0x6c,0x69,0x63,0x4c,0x69, + 0x6e,0x6b,0x00,0x00,0x38,0x01,0x49,0x6f,0x43,0x72,0x65,0x61,0x74,0x65,0x44,0x65,0x76,0x69, + 0x63,0x65,0x00,0x00,0x6e,0x74,0x6f,0x73,0x6b,0x72,0x6e,0x6c,0x2e,0x65,0x78,0x65,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x10,0x00,0x00,0x00,0x18,0x00,0x00,0x80, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x01,0x00, + 0x00,0x00,0x30,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x01,0x00,0x09,0x04,0x00,0x00,0x48,0x00,0x00,0x00,0x60,0x60,0x00,0x00,0x5c,0x03, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x5c,0x03,0x34,0x00,0x00,0x00,0x56,0x00,0x53,0x00,0x5f,0x00,0x56,0x00,0x45,0x00,0x52,0x00, + 0x53,0x00,0x49,0x00,0x4f,0x00,0x4e,0x00,0x5f,0x00,0x49,0x00,0x4e,0x00,0x46,0x00,0x4f,0x00, + 0x00,0x00,0x00,0x00,0xbd,0x04,0xef,0xfe,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x01,0x00, + 0x00,0x00,0x00,0x00,0x01,0x00,0x01,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x04,0x00,0x04,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0xba,0x02,0x00,0x00,0x01,0x00,0x53,0x00,0x74,0x00,0x72,0x00,0x69,0x00,0x6e,0x00, + 0x67,0x00,0x46,0x00,0x69,0x00,0x6c,0x00,0x65,0x00,0x49,0x00,0x6e,0x00,0x66,0x00,0x6f,0x00, + 0x00,0x00,0x96,0x02,0x00,0x00,0x01,0x00,0x30,0x00,0x34,0x00,0x30,0x00,0x39,0x00,0x30,0x00, + 0x34,0x00,0x62,0x00,0x30,0x00,0x00,0x00,0x58,0x00,0x20,0x00,0x01,0x00,0x43,0x00,0x6f,0x00, + 0x6d,0x00,0x6d,0x00,0x65,0x00,0x6e,0x00,0x74,0x00,0x73,0x00,0x00,0x00,0x4d,0x00,0x53,0x00, + 0x52,0x00,0x20,0x00,0x72,0x00,0x65,0x00,0x61,0x00,0x64,0x00,0x65,0x00,0x72,0x00,0x20,0x00, + 0x33,0x00,0x32,0x00,0x2d,0x00,0x62,0x00,0x69,0x00,0x74,0x00,0x20,0x00,0x6b,0x00,0x65,0x00, + 0x72,0x00,0x6e,0x00,0x65,0x00,0x6c,0x00,0x20,0x00,0x64,0x00,0x72,0x00,0x69,0x00,0x76,0x00, + 0x65,0x00,0x72,0x00,0x00,0x00,0x42,0x00,0x11,0x00,0x01,0x00,0x43,0x00,0x6f,0x00,0x6d,0x00, + 0x70,0x00,0x61,0x00,0x6e,0x00,0x79,0x00,0x4e,0x00,0x61,0x00,0x6d,0x00,0x65,0x00,0x00,0x00, + 0x00,0x00,0x49,0x00,0x72,0x00,0x6f,0x00,0x6e,0x00,0x20,0x00,0x53,0x00,0x74,0x00,0x65,0x00, + 0x65,0x00,0x64,0x00,0x73,0x00,0x20,0x00,0x49,0x00,0x6e,0x00,0x63,0x00,0x2e,0x00,0x00,0x00, + 0x00,0x00,0x60,0x00,0x1c,0x00,0x01,0x00,0x46,0x00,0x69,0x00,0x6c,0x00,0x65,0x00,0x44,0x00, + 0x65,0x00,0x73,0x00,0x63,0x00,0x72,0x00,0x69,0x00,0x70,0x00,0x74,0x00,0x69,0x00,0x6f,0x00, + 0x6e,0x00,0x00,0x00,0x00,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00, + 0x20,0x00,0x33,0x00,0x32,0x00,0x2d,0x00,0x62,0x00,0x69,0x00,0x74,0x00,0x20,0x00,0x4b,0x00, + 0x65,0x00,0x72,0x00,0x6e,0x00,0x65,0x00,0x6c,0x00,0x20,0x00,0x4d,0x00,0x6f,0x00,0x64,0x00, + 0x75,0x00,0x6c,0x00,0x65,0x00,0x00,0x00,0x36,0x00,0x0b,0x00,0x01,0x00,0x46,0x00,0x69,0x00, + 0x6c,0x00,0x65,0x00,0x56,0x00,0x65,0x00,0x72,0x00,0x73,0x00,0x69,0x00,0x6f,0x00,0x6e,0x00, + 0x00,0x00,0x00,0x00,0x31,0x00,0x2c,0x00,0x20,0x00,0x30,0x00,0x2c,0x00,0x20,0x00,0x30,0x00, + 0x2c,0x00,0x20,0x00,0x31,0x00,0x00,0x00,0x00,0x00,0x2e,0x00,0x07,0x00,0x01,0x00,0x49,0x00, + 0x6e,0x00,0x74,0x00,0x65,0x00,0x72,0x00,0x6e,0x00,0x61,0x00,0x6c,0x00,0x4e,0x00,0x61,0x00, + 0x6d,0x00,0x65,0x00,0x00,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00, + 0x00,0x00,0x00,0x00,0x4a,0x00,0x13,0x00,0x01,0x00,0x4c,0x00,0x65,0x00,0x67,0x00,0x61,0x00, + 0x6c,0x00,0x43,0x00,0x6f,0x00,0x70,0x00,0x79,0x00,0x72,0x00,0x69,0x00,0x67,0x00,0x68,0x00, + 0x74,0x00,0x00,0x00,0x4e,0x00,0x69,0x00,0x63,0x00,0x6b,0x00,0x20,0x00,0x47,0x00,0x61,0x00, + 0x62,0x00,0x61,0x00,0x72,0x00,0x65,0x00,0x76,0x00,0x20,0x00,0x27,0x00,0x32,0x00,0x30,0x00, + 0x30,0x00,0x39,0x00,0x00,0x00,0x00,0x00,0x3e,0x00,0x0b,0x00,0x01,0x00,0x4f,0x00,0x72,0x00, + 0x69,0x00,0x67,0x00,0x69,0x00,0x6e,0x00,0x61,0x00,0x6c,0x00,0x46,0x00,0x69,0x00,0x6c,0x00, + 0x65,0x00,0x6e,0x00,0x61,0x00,0x6d,0x00,0x65,0x00,0x00,0x00,0x54,0x00,0x6d,0x00,0x70,0x00, + 0x52,0x00,0x64,0x00,0x72,0x00,0x2e,0x00,0x73,0x00,0x79,0x00,0x73,0x00,0x00,0x00,0x00,0x00, + 0x54,0x00,0x1a,0x00,0x01,0x00,0x50,0x00,0x72,0x00,0x6f,0x00,0x64,0x00,0x75,0x00,0x63,0x00, + 0x74,0x00,0x4e,0x00,0x61,0x00,0x6d,0x00,0x65,0x00,0x00,0x00,0x00,0x00,0x43,0x00,0x6f,0x00, + 0x72,0x00,0x65,0x00,0x20,0x00,0x32,0x00,0x20,0x00,0x54,0x00,0x65,0x00,0x6d,0x00,0x70,0x00, + 0x65,0x00,0x72,0x00,0x61,0x00,0x74,0x00,0x75,0x00,0x72,0x00,0x65,0x00,0x20,0x00,0x52,0x00, + 0x65,0x00,0x61,0x00,0x64,0x00,0x65,0x00,0x72,0x00,0x00,0x00,0x3a,0x00,0x0b,0x00,0x01,0x00, + 0x50,0x00,0x72,0x00,0x6f,0x00,0x64,0x00,0x75,0x00,0x63,0x00,0x74,0x00,0x56,0x00,0x65,0x00, + 0x72,0x00,0x73,0x00,0x69,0x00,0x6f,0x00,0x6e,0x00,0x00,0x00,0x31,0x00,0x2c,0x00,0x20,0x00, + 0x30,0x00,0x2c,0x00,0x20,0x00,0x30,0x00,0x2c,0x00,0x20,0x00,0x31,0x00,0x00,0x00,0x00,0x00, + 0x44,0x00,0x00,0x00,0x01,0x00,0x56,0x00,0x61,0x00,0x72,0x00,0x46,0x00,0x69,0x00,0x6c,0x00, + 0x65,0x00,0x49,0x00,0x6e,0x00,0x66,0x00,0x6f,0x00,0x00,0x00,0x00,0x00,0x24,0x00,0x04,0x00, + 0x00,0x00,0x54,0x00,0x72,0x00,0x61,0x00,0x6e,0x00,0x73,0x00,0x6c,0x00,0x61,0x00,0x74,0x00, + 0x69,0x00,0x6f,0x00,0x6e,0x00,0x00,0x00,0x00,0x00,0x09,0x04,0xb0,0x04,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x14,0x00,0x00,0x00, + 0x10,0x30,0x5c,0x30,0x82,0x30,0x87,0x30,0x91,0x30,0x9b,0x30,0x00,0x40,0x00,0x00,0x1c,0x00, + 0x00,0x00,0x09,0x30,0x0f,0x30,0x2f,0x30,0x38,0x30,0x4c,0x30,0x5b,0x30,0x67,0x30,0x6c,0x30, + 0x79,0x30,0x80,0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +}; +int cc_x64driver_code_size = 5120; +uint8_t cc_x64driver_code[5120] = { + 0x4d,0x5a,0x90,0x00,0x03,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0xff,0xff,0x00,0x00,0xb8,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0xd0,0x00,0x00,0x00,0x0e,0x1f,0xba,0x0e,0x00,0xb4,0x09,0xcd, + 0x21,0xb8,0x01,0x4c,0xcd,0x21,0x54,0x68,0x69,0x73,0x20,0x70,0x72,0x6f,0x67,0x72,0x61,0x6d, + 0x20,0x63,0x61,0x6e,0x6e,0x6f,0x74,0x20,0x62,0x65,0x20,0x72,0x75,0x6e,0x20,0x69,0x6e,0x20, + 0x44,0x4f,0x53,0x20,0x6d,0x6f,0x64,0x65,0x2e,0x0d,0x0d,0x0a,0x24,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0xb7,0x04,0xa8,0xc2,0xf3,0x65,0xc6,0x91,0xf3,0x65,0xc6,0x91,0xf3,0x65,0xc6,0x91, + 0xf3,0x65,0xc7,0x91,0xf4,0x65,0xc6,0x91,0x85,0xf8,0xbd,0x91,0xf0,0x65,0xc6,0x91,0x85,0xf8, + 0xab,0x91,0xf0,0x65,0xc6,0x91,0x30,0x6a,0x98,0x91,0xf2,0x65,0xc6,0x91,0x85,0xf8,0xbe,0x91, + 0xf2,0x65,0xc6,0x91,0x52,0x69,0x63,0x68,0xf3,0x65,0xc6,0x91,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x50,0x45,0x00,0x00,0x64,0x86,0x07,0x00, + 0x41,0xc8,0x6d,0x49,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x00,0x22,0x00,0x0b,0x02, + 0x08,0x00,0x00,0x06,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x60,0x00,0x00, + 0x00,0x10,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x02, + 0x00,0x00,0x05,0x00,0x02,0x00,0x05,0x00,0x02,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x00,0x00, + 0x00,0x80,0x00,0x00,0x00,0x04,0x00,0x00,0x79,0x44,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00, + 0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00, + 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x6c,0x60,0x00,0x00,0x28,0x00,0x00,0x00, + 0x00,0x70,0x00,0x00,0xc0,0x03,0x00,0x00,0x00,0x40,0x00,0x00,0x48,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x20,0x00,0x00, + 0x1c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x38,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x2e,0x74,0x65,0x78,0x74,0x00,0x00,0x00,0x26,0x01,0x00,0x00,0x00,0x10, + 0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x20,0x00,0x00,0x68,0x2e,0x72,0x64,0x61,0x74,0x61,0x00,0x00,0xf0,0x00, + 0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x48,0x2e,0x64,0x61,0x74,0x61,0x00, + 0x00,0x00,0x18,0x01,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x08,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0xc8,0x2e,0x70, + 0x64,0x61,0x74,0x61,0x00,0x00,0x48,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x02,0x00,0x00, + 0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00, + 0x00,0x48,0x50,0x41,0x47,0x45,0x30,0x44,0x45,0x46,0x4e,0x01,0x00,0x00,0x00,0x50,0x00,0x00, + 0x00,0x02,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x20,0x00,0x00,0x60,0x49,0x4e,0x49,0x54,0x00,0x00,0x00,0x00,0x60,0x01,0x00,0x00, + 0x00,0x60,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00,0xe2,0x2e,0x72,0x73,0x72,0x63,0x00,0x00,0x00, + 0xc0,0x03,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x42,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x83,0xec,0x28, + 0x33,0xc9,0x48,0x8b,0xc2,0x89,0x4a,0x30,0x48,0x89,0x4a,0x38,0x33,0xd2,0x48,0x8b,0xc8,0xff, + 0x15,0xfd,0x0f,0x00,0x00,0x33,0xc0,0x48,0x83,0xc4,0x28,0xc3,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc, + 0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0x48,0x83,0xec,0x28,0x48,0x8b,0x82,0xb8,0x00,0x00, + 0x00,0x4c,0x8b,0xca,0x81,0x78,0x18,0x0c,0xe0,0x22,0x00,0x75,0x43,0x83,0x78,0x08,0x08,0x72, + 0x3d,0x83,0x78,0x10,0x04,0x75,0x08,0x48,0x8b,0x42,0x18,0x8b,0x08,0xeb,0x05,0xb9,0x9c,0x01, + 0x00,0x00,0x4c,0x8b,0x42,0x18,0x0f,0x32,0x48,0xc1,0xe2,0x20,0x49,0x8b,0xc9,0x48,0x0b,0xc2, + 0x33,0xd2,0x49,0x89,0x00,0x49,0xc7,0x41,0x38,0x08,0x00,0x00,0x00,0xff,0x15,0x95,0x0f,0x00, + 0x00,0x33,0xc0,0x48,0x83,0xc4,0x28,0xc3,0xc7,0x42,0x30,0x01,0x00,0x00,0xc0,0x48,0xc7,0x42, + 0x38,0x00,0x00,0x00,0x00,0x49,0x8b,0xc9,0x33,0xd2,0xff,0x15,0x74,0x0f,0x00,0x00,0xb8,0x01, + 0x00,0x00,0xc0,0x48,0x83,0xc4,0x28,0xc3,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc, + 0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0x48,0x83,0xec,0x38,0x48,0x8b,0x49,0x08,0xff,0x15, + 0x32,0x0f,0x00,0x00,0x48,0x8d,0x15,0x1b,0x00,0x00,0x00,0x48,0x8d,0x4c,0x24,0x20,0xff,0x15, + 0x18,0x0f,0x00,0x00,0x48,0x8d,0x4c,0x24,0x20,0xff,0x15,0x05,0x0f,0x00,0x00,0x48,0x83,0xc4, + 0x38,0xc3,0x5c,0x00,0x44,0x00,0x6f,0x00,0x73,0x00,0x44,0x00,0x65,0x00,0x76,0x00,0x69,0x00, + 0x63,0x00,0x65,0x00,0x73,0x00,0x5c,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00, + 0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0xe6,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0xfe,0x60,0x00,0x00, + 0x00,0x00,0x00,0x00,0x16,0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x28,0x61,0x00,0x00,0x00,0x00, + 0x00,0x00,0x40,0x61,0x00,0x00,0x00,0x00,0x00,0x00,0xd0,0x60,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x41,0xc8,0x6d,0x49,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x49,0x00,0x00,0x00, + 0x5c,0x20,0x00,0x00,0x5c,0x06,0x00,0x00,0x52,0x53,0x44,0x53,0xd9,0x5e,0xab,0x47,0xc4,0xf2, + 0x4f,0x40,0xaa,0xe9,0x90,0x47,0x67,0x30,0xa5,0xfa,0x03,0x00,0x00,0x00,0x44,0x3a,0x5c,0x74, + 0x6d,0x70,0x5c,0x4b,0x65,0x72,0x6e,0x65,0x6c,0x5c,0x6f,0x62,0x6a,0x66,0x72,0x65,0x5f,0x77, + 0x6e,0x65,0x74,0x5f,0x41,0x4d,0x44,0x36,0x34,0x5c,0x61,0x6d,0x64,0x36,0x34,0x5c,0x54,0x6d, + 0x70,0x52,0x64,0x72,0x2e,0x70,0x64,0x62,0x00,0x00,0x00,0x00,0x01,0x04,0x01,0x00,0x04,0x42, + 0x00,0x00,0x01,0x04,0x01,0x00,0x04,0x42,0x00,0x00,0x01,0x04,0x01,0x00,0x04,0x62,0x00,0x00, + 0x21,0x00,0x00,0x00,0x10,0x50,0x00,0x00,0x74,0x50,0x00,0x00,0xe4,0x20,0x00,0x00,0x21,0x08, + 0x02,0x00,0x08,0x74,0x13,0x00,0x10,0x50,0x00,0x00,0x74,0x50,0x00,0x00,0xe4,0x20,0x00,0x00, + 0x01,0x0c,0x03,0x00,0x0c,0x34,0x12,0x00,0x04,0xe2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0xcd,0x5d,0x20,0xd2,0x66,0xd4,0xff,0xff,0x32,0xa2,0xdf,0x2d,0x99,0x2b,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x10,0x10,0x00,0x00,0x32,0x10,0x00,0x00,0xa8,0x20,0x00,0x00,0x40,0x10, + 0x00,0x00,0xbe,0x10,0x00,0x00,0xb0,0x20,0x00,0x00,0xd0,0x10,0x00,0x00,0x00,0x11,0x00,0x00, + 0xb8,0x20,0x00,0x00,0x10,0x50,0x00,0x00,0x74,0x50,0x00,0x00,0xe4,0x20,0x00,0x00,0x74,0x50, + 0x00,0x00,0xe8,0x50,0x00,0x00,0xd0,0x20,0x00,0x00,0xe8,0x50,0x00,0x00,0xf5,0x50,0x00,0x00, + 0xc0,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x83,0xec,0x78,0x48,0x89,0x9c,0x24, + 0x90,0x00,0x00,0x00,0x48,0x8b,0xd9,0x48,0x8d,0x15,0x0a,0x01,0x00,0x00,0x48,0x8d,0x4c,0x24, + 0x48,0xff,0x15,0xd7,0xcf,0xff,0xff,0x41,0xb9,0x22,0x00,0x00,0x00,0x4c,0x8d,0x5c,0x24,0x40, + 0x4c,0x89,0x5c,0x24,0x30,0x4c,0x8d,0x44,0x24,0x48,0x41,0x8d,0x51,0xe6,0x48,0x8b,0xcb,0xc6, + 0x44,0x24,0x28,0x00,0xc7,0x44,0x24,0x20,0x00,0x00,0x00,0x00,0xff,0x15,0xc0,0xcf,0xff,0xff, + 0x85,0xc0,0x0f,0x85,0x80,0x00,0x00,0x00,0x48,0x8d,0x15,0x91,0x00,0x00,0x00,0x48,0x8d,0x4c, + 0x24,0x58,0x48,0x89,0xbc,0x24,0x98,0x00,0x00,0x00,0xff,0x15,0x86,0xcf,0xff,0xff,0x48,0x8d, + 0x54,0x24,0x48,0x48,0x8d,0x4c,0x24,0x58,0xff,0x15,0x86,0xcf,0xff,0xff,0x85,0xc0,0x8b,0xf8, + 0x74,0x0f,0x48,0x8b,0x4c,0x24,0x40,0xff,0x15,0x6d,0xcf,0xff,0xff,0x8b,0xc7,0xeb,0x39,0x48, + 0x8b,0x44,0x24,0x40,0x48,0x89,0x05,0x5d,0xe0,0xff,0xff,0x48,0x8d,0x05,0x16,0xc0,0xff,0xff, + 0x48,0x89,0x43,0x68,0x48,0x8d,0x05,0x4b,0xbf,0xff,0xff,0x48,0x89,0x43,0x70,0x48,0x89,0x83, + 0x80,0x00,0x00,0x00,0x48,0x8d,0x05,0x69,0xbf,0xff,0xff,0x48,0x89,0x83,0xe0,0x00,0x00,0x00, + 0x33,0xc0,0x48,0x8b,0xbc,0x24,0x98,0x00,0x00,0x00,0x48,0x8b,0x9c,0x24,0x90,0x00,0x00,0x00, + 0x48,0x83,0xc4,0x78,0xc3,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0x5c,0x00, + 0x44,0x00,0x6f,0x00,0x73,0x00,0x44,0x00,0x65,0x00,0x76,0x00,0x69,0x00,0x63,0x00,0x65,0x00, + 0x73,0x00,0x5c,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00,0x00,0x00, + 0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0x5c,0x00,0x44,0x00,0x65,0x00,0x76,0x00, + 0x69,0x00,0x63,0x00,0x65,0x00,0x5c,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00, + 0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x48,0x8b,0x05,0xf1,0xd0,0xff,0xff,0x49,0xb9,0x32,0xa2,0xdf,0x2d,0x99,0x2b,0x00,0x00,0x48, + 0x85,0xc0,0x74,0x05,0x49,0x3b,0xc1,0x75,0x2f,0x4c,0x8d,0x05,0xd6,0xd0,0xff,0xff,0x48,0xb8, + 0x20,0x03,0x00,0x00,0x80,0xf7,0xff,0xff,0x48,0x8b,0x00,0x49,0x33,0xc0,0x49,0xb8,0xff,0xff, + 0xff,0xff,0xff,0xff,0x00,0x00,0x49,0x23,0xc0,0x49,0x0f,0x44,0xc1,0x48,0x89,0x05,0xae,0xd0, + 0xff,0xff,0x48,0xf7,0xd0,0x48,0x89,0x05,0x9c,0xd0,0xff,0xff,0xe9,0xa7,0xef,0xff,0xff,0xcc, + 0xcc,0xcc,0x98,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x52,0x61,0x00,0x00, + 0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe6,0x60,0x00,0x00,0x00,0x00,0x00,0x00, + 0xfe,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x16,0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x28,0x61, + 0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x61,0x00,0x00,0x00,0x00,0x00,0x00,0xd0,0x60,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xf6,0x01,0x49,0x6f,0x66,0x43, + 0x6f,0x6d,0x70,0x6c,0x65,0x74,0x65,0x52,0x65,0x71,0x75,0x65,0x73,0x74,0x00,0x00,0x61,0x01, + 0x49,0x6f,0x44,0x65,0x6c,0x65,0x74,0x65,0x53,0x79,0x6d,0x62,0x6f,0x6c,0x69,0x63,0x4c,0x69, + 0x6e,0x6b,0x00,0x00,0x3e,0x04,0x52,0x74,0x6c,0x49,0x6e,0x69,0x74,0x55,0x6e,0x69,0x63,0x6f, + 0x64,0x65,0x53,0x74,0x72,0x69,0x6e,0x67,0x00,0x00,0x5f,0x01,0x49,0x6f,0x44,0x65,0x6c,0x65, + 0x74,0x65,0x44,0x65,0x76,0x69,0x63,0x65,0x00,0x00,0x55,0x01,0x49,0x6f,0x43,0x72,0x65,0x61, + 0x74,0x65,0x53,0x79,0x6d,0x62,0x6f,0x6c,0x69,0x63,0x4c,0x69,0x6e,0x6b,0x00,0x00,0x4c,0x01, + 0x49,0x6f,0x43,0x72,0x65,0x61,0x74,0x65,0x44,0x65,0x76,0x69,0x63,0x65,0x00,0x00,0x6e,0x74, + 0x6f,0x73,0x6b,0x72,0x6e,0x6c,0x2e,0x65,0x78,0x65,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x10,0x00,0x00,0x00,0x18,0x00,0x00,0x80,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x01,0x00,0x00,0x00, + 0x30,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x01,0x00,0x09,0x04,0x00,0x00,0x48,0x00,0x00,0x00,0x60,0x70,0x00,0x00,0x60,0x03,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x60,0x03, + 0x34,0x00,0x00,0x00,0x56,0x00,0x53,0x00,0x5f,0x00,0x56,0x00,0x45,0x00,0x52,0x00,0x53,0x00, + 0x49,0x00,0x4f,0x00,0x4e,0x00,0x5f,0x00,0x49,0x00,0x4e,0x00,0x46,0x00,0x4f,0x00,0x00,0x00, + 0x00,0x00,0xbd,0x04,0xef,0xfe,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x01,0x00,0x00,0x00, + 0x00,0x00,0x01,0x00,0x01,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00, + 0x04,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0xbe,0x02,0x00,0x00,0x01,0x00,0x53,0x00,0x74,0x00,0x72,0x00,0x69,0x00,0x6e,0x00,0x67,0x00, + 0x46,0x00,0x69,0x00,0x6c,0x00,0x65,0x00,0x49,0x00,0x6e,0x00,0x66,0x00,0x6f,0x00,0x00,0x00, + 0x9a,0x02,0x00,0x00,0x01,0x00,0x30,0x00,0x34,0x00,0x30,0x00,0x39,0x00,0x30,0x00,0x34,0x00, + 0x62,0x00,0x30,0x00,0x00,0x00,0x58,0x00,0x20,0x00,0x01,0x00,0x43,0x00,0x6f,0x00,0x6d,0x00, + 0x6d,0x00,0x65,0x00,0x6e,0x00,0x74,0x00,0x73,0x00,0x00,0x00,0x4d,0x00,0x53,0x00,0x52,0x00, + 0x20,0x00,0x72,0x00,0x65,0x00,0x61,0x00,0x64,0x00,0x65,0x00,0x72,0x00,0x20,0x00,0x36,0x00, + 0x34,0x00,0x2d,0x00,0x62,0x00,0x69,0x00,0x74,0x00,0x20,0x00,0x6b,0x00,0x65,0x00,0x72,0x00, + 0x6e,0x00,0x65,0x00,0x6c,0x00,0x20,0x00,0x64,0x00,0x72,0x00,0x69,0x00,0x76,0x00,0x65,0x00, + 0x72,0x00,0x00,0x00,0x42,0x00,0x11,0x00,0x01,0x00,0x43,0x00,0x6f,0x00,0x6d,0x00,0x70,0x00, + 0x61,0x00,0x6e,0x00,0x79,0x00,0x4e,0x00,0x61,0x00,0x6d,0x00,0x65,0x00,0x00,0x00,0x00,0x00, + 0x49,0x00,0x72,0x00,0x6f,0x00,0x6e,0x00,0x20,0x00,0x53,0x00,0x74,0x00,0x65,0x00,0x65,0x00, + 0x64,0x00,0x73,0x00,0x20,0x00,0x49,0x00,0x6e,0x00,0x63,0x00,0x2e,0x00,0x00,0x00,0x00,0x00, + 0x60,0x00,0x1c,0x00,0x01,0x00,0x46,0x00,0x69,0x00,0x6c,0x00,0x65,0x00,0x44,0x00,0x65,0x00, + 0x73,0x00,0x63,0x00,0x72,0x00,0x69,0x00,0x70,0x00,0x74,0x00,0x69,0x00,0x6f,0x00,0x6e,0x00, + 0x00,0x00,0x00,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00,0x20,0x00, + 0x36,0x00,0x34,0x00,0x2d,0x00,0x62,0x00,0x69,0x00,0x74,0x00,0x20,0x00,0x4b,0x00,0x65,0x00, + 0x72,0x00,0x6e,0x00,0x65,0x00,0x6c,0x00,0x20,0x00,0x4d,0x00,0x6f,0x00,0x64,0x00,0x75,0x00, + 0x6c,0x00,0x65,0x00,0x00,0x00,0x36,0x00,0x0b,0x00,0x01,0x00,0x46,0x00,0x69,0x00,0x6c,0x00, + 0x65,0x00,0x56,0x00,0x65,0x00,0x72,0x00,0x73,0x00,0x69,0x00,0x6f,0x00,0x6e,0x00,0x00,0x00, + 0x00,0x00,0x31,0x00,0x2c,0x00,0x20,0x00,0x30,0x00,0x2c,0x00,0x20,0x00,0x30,0x00,0x2c,0x00, + 0x20,0x00,0x31,0x00,0x00,0x00,0x00,0x00,0x2e,0x00,0x07,0x00,0x01,0x00,0x49,0x00,0x6e,0x00, + 0x74,0x00,0x65,0x00,0x72,0x00,0x6e,0x00,0x61,0x00,0x6c,0x00,0x4e,0x00,0x61,0x00,0x6d,0x00, + 0x65,0x00,0x00,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00,0x64,0x00,0x72,0x00,0x00,0x00, + 0x00,0x00,0x4a,0x00,0x13,0x00,0x01,0x00,0x4c,0x00,0x65,0x00,0x67,0x00,0x61,0x00,0x6c,0x00, + 0x43,0x00,0x6f,0x00,0x70,0x00,0x79,0x00,0x72,0x00,0x69,0x00,0x67,0x00,0x68,0x00,0x74,0x00, + 0x00,0x00,0x4e,0x00,0x69,0x00,0x63,0x00,0x6b,0x00,0x20,0x00,0x47,0x00,0x61,0x00,0x62,0x00, + 0x61,0x00,0x72,0x00,0x65,0x00,0x76,0x00,0x20,0x00,0x27,0x00,0x32,0x00,0x30,0x00,0x30,0x00, + 0x39,0x00,0x00,0x00,0x00,0x00,0x42,0x00,0x0d,0x00,0x01,0x00,0x4f,0x00,0x72,0x00,0x69,0x00, + 0x67,0x00,0x69,0x00,0x6e,0x00,0x61,0x00,0x6c,0x00,0x46,0x00,0x69,0x00,0x6c,0x00,0x65,0x00, + 0x6e,0x00,0x61,0x00,0x6d,0x00,0x65,0x00,0x00,0x00,0x54,0x00,0x6d,0x00,0x70,0x00,0x52,0x00, + 0x64,0x00,0x72,0x00,0x36,0x00,0x34,0x00,0x2e,0x00,0x73,0x00,0x79,0x00,0x73,0x00,0x00,0x00, + 0x00,0x00,0x54,0x00,0x1a,0x00,0x01,0x00,0x50,0x00,0x72,0x00,0x6f,0x00,0x64,0x00,0x75,0x00, + 0x63,0x00,0x74,0x00,0x4e,0x00,0x61,0x00,0x6d,0x00,0x65,0x00,0x00,0x00,0x00,0x00,0x43,0x00, + 0x6f,0x00,0x72,0x00,0x65,0x00,0x20,0x00,0x32,0x00,0x20,0x00,0x54,0x00,0x65,0x00,0x6d,0x00, + 0x70,0x00,0x65,0x00,0x72,0x00,0x61,0x00,0x74,0x00,0x75,0x00,0x72,0x00,0x65,0x00,0x20,0x00, + 0x52,0x00,0x65,0x00,0x61,0x00,0x64,0x00,0x65,0x00,0x72,0x00,0x00,0x00,0x3a,0x00,0x0b,0x00, + 0x01,0x00,0x50,0x00,0x72,0x00,0x6f,0x00,0x64,0x00,0x75,0x00,0x63,0x00,0x74,0x00,0x56,0x00, + 0x65,0x00,0x72,0x00,0x73,0x00,0x69,0x00,0x6f,0x00,0x6e,0x00,0x00,0x00,0x31,0x00,0x2c,0x00, + 0x20,0x00,0x30,0x00,0x2c,0x00,0x20,0x00,0x30,0x00,0x2c,0x00,0x20,0x00,0x31,0x00,0x00,0x00, + 0x00,0x00,0x44,0x00,0x00,0x00,0x01,0x00,0x56,0x00,0x61,0x00,0x72,0x00,0x46,0x00,0x69,0x00, + 0x6c,0x00,0x65,0x00,0x49,0x00,0x6e,0x00,0x66,0x00,0x6f,0x00,0x00,0x00,0x00,0x00,0x24,0x00, + 0x04,0x00,0x00,0x00,0x54,0x00,0x72,0x00,0x61,0x00,0x6e,0x00,0x73,0x00,0x6c,0x00,0x61,0x00, + 0x74,0x00,0x69,0x00,0x6f,0x00,0x6e,0x00,0x00,0x00,0x00,0x00,0x09,0x04,0xb0,0x04,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +}; +//} end +#endif // _WIN32 + +int msrdriver_dummy; // a dummy to avoid a linker warning on OS X. diff --git a/contrib/libcpuid/include/libcpuid/rdmsr.c b/contrib/libcpuid/include/libcpuid/rdmsr.c new file mode 100644 index 00000000000..a27e939bba0 --- /dev/null +++ b/contrib/libcpuid/include/libcpuid/rdmsr.c @@ -0,0 +1,922 @@ +/* + * Copyright 2009 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#define _XOPEN_SOURCE 500 +#include +#include +#include "libcpuid.h" +#include "asm-bits.h" +#include "libcpuid_util.h" +#include "libcpuid_internal.h" +#include "rdtsc.h" + +#if defined (__linux__) || defined (__gnu_linux__) +/* Assuming linux with /dev/cpu/x/msr: */ +#include +#include +#include +#include +#include +#include +struct msr_driver_t { int fd; }; +static int rdmsr_supported(void); +static int load_driver(char *msr_path) +{ + const int file_exists = !access(msr_path, F_OK); + const int file_readable = !access(msr_path, R_OK); + + if (file_exists && file_readable) + return 1; + else if (file_exists && !file_readable) + return 0; + else if (getuid() != 0) + return 0; + else + return !system("modprobe msr 2> /dev/null"); +} + +struct msr_driver_t* cpu_msr_driver_open(void) +{ + return cpu_msr_driver_open_core(0); +} + +struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num) +{ + char msr[32]; + struct msr_driver_t* handle; + if (core_num >= cpuid_get_total_cpus()) { + set_error(ERR_INVCNB); + return NULL; + } + if (!rdmsr_supported()) { + set_error(ERR_NO_RDMSR); + return NULL; + } + sprintf(msr, "/dev/cpu/%u/msr", core_num); + if(!load_driver(msr)) { + set_error(ERR_NO_DRIVER); + return NULL; + } + int fd = open(msr, O_RDONLY); + if (fd < 0) { + if (errno == EIO) { + set_error(ERR_NO_RDMSR); + return NULL; + } + set_error(ERR_NO_DRIVER); + return NULL; + } + handle = (struct msr_driver_t*) malloc(sizeof(struct msr_driver_t)); + handle->fd = fd; + return handle; +} + +int cpu_rdmsr(struct msr_driver_t* driver, uint32_t msr_index, uint64_t* result) +{ + ssize_t ret; + + if (!driver || driver->fd < 0) + return set_error(ERR_HANDLE); + ret = pread(driver->fd, result, 8, msr_index); + if (ret != 8) + return set_error(ERR_INVMSR); + return 0; +} + +int cpu_msr_driver_close(struct msr_driver_t* drv) +{ + if (drv && drv->fd >= 0) { + close(drv->fd); + free(drv); + } + return 0; +} + +/* #endif defined (__linux__) || defined (__gnu_linux__) */ + +#elif defined (__FreeBSD__) || defined (__DragonFly__) +/* Assuming FreeBSD with /dev/cpuctlX */ +#include +#include +#include +#include +#include + +struct msr_driver_t { int fd; }; +static int rdmsr_supported(void); +static int load_driver(char *msr_path) +{ + const int file_exists = !access(msr_path, F_OK); + const int file_readable = !access(msr_path, R_OK); + + if (file_exists && file_readable) + return 1; + else if (file_exists && !file_readable) + return 0; + else if (getuid() != 0) + return 0; + else + return !system("kldload -n cpuctl 2> /dev/null"); +} + +struct msr_driver_t* cpu_msr_driver_open(void) +{ + return cpu_msr_driver_open_core(0); +} + +struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num) +{ + char msr[32]; + struct msr_driver_t* handle; + if (core_num >= cpuid_get_total_cpus()) { + set_error(ERR_INVCNB); + return NULL; + } + if (!rdmsr_supported()) { + set_error(ERR_NO_RDMSR); + return NULL; + } + sprintf(msr, "/dev/cpuctl%u", core_num); + if(!load_driver(msr)) { + set_error(ERR_NO_DRIVER); + return NULL; + } + int fd = open(msr, O_RDONLY); + if (fd < 0) { + if (errno == EIO) { + set_error(ERR_NO_RDMSR); + return NULL; + } + set_error(ERR_NO_DRIVER); + return NULL; + } + handle = (struct msr_driver_t*) malloc(sizeof(struct msr_driver_t)); + handle->fd = fd; + return handle; +} + +int cpu_rdmsr(struct msr_driver_t* driver, uint32_t msr_index, uint64_t* result) +{ + cpuctl_msr_args_t args; + args.msr = msr_index; + + if (!driver || driver->fd < 0) + return set_error(ERR_HANDLE); + + if(ioctl(driver->fd, CPUCTL_RDMSR, &args)) + return set_error(ERR_INVMSR); + + *result = args.data; + return 0; +} + +int cpu_msr_driver_close(struct msr_driver_t* drv) +{ + if (drv && drv->fd >= 0) { + close(drv->fd); + free(drv); + } + return 0; +} + +/* #endif defined (__FreeBSD__) || defined (__DragonFly__) */ + +#elif defined (_WIN32) +#include +#include +#include + +extern uint8_t cc_x86driver_code[]; +extern int cc_x86driver_code_size; +extern uint8_t cc_x64driver_code[]; +extern int cc_x64driver_code_size; + +struct msr_driver_t { + char driver_path[MAX_PATH + 1]; + SC_HANDLE scManager; + volatile SC_HANDLE scDriver; + HANDLE hhDriver; + OVERLAPPED ovl; + int errorcode; +}; + +static int rdmsr_supported(void); +static int extract_driver(struct msr_driver_t* driver); +static int load_driver(struct msr_driver_t* driver); + +struct msr_driver_t* cpu_msr_driver_open(void) +{ + struct msr_driver_t* drv; + int status; + if (!rdmsr_supported()) { + set_error(ERR_NO_RDMSR); + return NULL; + } + + drv = (struct msr_driver_t*) malloc(sizeof(struct msr_driver_t)); + if (!drv) { + set_error(ERR_NO_MEM); + return NULL; + } + memset(drv, 0, sizeof(struct msr_driver_t)); + + if (!extract_driver(drv)) { + free(drv); + set_error(ERR_EXTRACT); + return NULL; + } + + status = load_driver(drv); + if (!DeleteFile(drv->driver_path)) + debugf(1, "Deleting temporary driver file failed.\n"); + if (!status) { + set_error(drv->errorcode ? drv->errorcode : ERR_NO_DRIVER); + free(drv); + return NULL; + } + return drv; +} + +struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num) +{ + warnf("cpu_msr_driver_open_core(): parameter ignored (function is the same as cpu_msr_driver_open)\n"); + return cpu_msr_driver_open(); +} + +typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); +static BOOL is_running_x64(void) +{ + BOOL bIsWow64 = FALSE; + + LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(__TEXT("kernel32")), "IsWow64Process"); + if(NULL != fnIsWow64Process) + fnIsWow64Process(GetCurrentProcess(), &bIsWow64); + return bIsWow64; +} + + +static int extract_driver(struct msr_driver_t* driver) +{ + FILE *f; + if (!GetTempPath(sizeof(driver->driver_path), driver->driver_path)) return 0; + strcat(driver->driver_path, "TmpRdr.sys"); + + f = fopen(driver->driver_path, "wb"); + if (!f) return 0; + if (is_running_x64()) + fwrite(cc_x64driver_code, 1, cc_x64driver_code_size, f); + else + fwrite(cc_x86driver_code, 1, cc_x86driver_code_size, f); + fclose(f); + return 1; +} + +static BOOL wait_for_service_state(SC_HANDLE hService, DWORD dwDesiredState, SERVICE_STATUS *lpsrvStatus){ + BOOL fOK = FALSE; + DWORD dwWaitHint; + + if(hService != NULL){ + while(TRUE){ + fOK = QueryServiceStatus(hService, lpsrvStatus); + if(!fOK) + break; + if(lpsrvStatus->dwCurrentState == dwDesiredState) + break; + + dwWaitHint = lpsrvStatus->dwWaitHint / 10; // Poll 1/10 of the wait hint + if (dwWaitHint < 1000) + dwWaitHint = 1000; // At most once per second + if (dwWaitHint > 10000) + dwWaitHint = 10000; // At least every 10 seconds + Sleep(dwWaitHint); + } + } + + return fOK; +} + +static int load_driver(struct msr_driver_t* drv) +{ + LPTSTR lpszInfo = __TEXT("RDMSR Executor Driver"); + USHORT uLen = 0; + SERVICE_STATUS srvStatus = {0}; + BOOL fRunning = FALSE; + DWORD dwLastError; + LPTSTR lpszDriverServiceName = __TEXT("TmpRdr"); + TCHAR lpszDriverName[] = __TEXT("\\\\.\\Global\\TmpRdr"); + + if((LPVOID)(drv->scManager = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS)) != NULL) { + drv->scDriver = CreateService(drv->scManager, lpszDriverServiceName, lpszInfo, SERVICE_ALL_ACCESS, + SERVICE_KERNEL_DRIVER, SERVICE_DEMAND_START, SERVICE_ERROR_NORMAL, + drv->driver_path, NULL, NULL, NULL, NULL, NULL); + if(drv->scDriver == NULL){ + switch(dwLastError = GetLastError()){ + case ERROR_SERVICE_EXISTS: + case ERROR_SERVICE_MARKED_FOR_DELETE:{ + LPQUERY_SERVICE_CONFIG lpqsc; + DWORD dwBytesNeeded; + + drv->scDriver = OpenService(drv->scManager, lpszDriverServiceName, SERVICE_ALL_ACCESS); + if(drv->scDriver == NULL){ + debugf(1, "Error opening service: %d\n", GetLastError()); + break; + } + + QueryServiceConfig(drv->scDriver, NULL, 0, &dwBytesNeeded); + if((dwLastError = GetLastError()) == ERROR_INSUFFICIENT_BUFFER){ + lpqsc = calloc(1, dwBytesNeeded); + if(!QueryServiceConfig(drv->scDriver, lpqsc, dwBytesNeeded, &dwBytesNeeded)){ + free(lpqsc); + debugf(1, "Error query service config(adjusted buffer): %d\n", GetLastError()); + goto clean_up; + } + else{ + free(lpqsc); + } + } + else{ + debugf(1, "Error query service config: %d\n", dwLastError); + goto clean_up; + } + + break; + } + case ERROR_ACCESS_DENIED: + drv->errorcode = ERR_NO_PERMS; + break; + default: + debugf(1, "Create driver service failed: %d\n", dwLastError); + break; + } + } + if(drv->scDriver != NULL){ + if(StartService(drv->scDriver, 0, NULL)){ + if(!wait_for_service_state(drv->scDriver, SERVICE_RUNNING, &srvStatus)){ + debugf(1, "Driver load failed.\n"); + DeleteService(drv->scDriver); + CloseServiceHandle(drv->scManager); + drv->scDriver = NULL; + goto clean_up; + } else { + fRunning = TRUE; + } + } else{ + if((dwLastError = GetLastError()) == ERROR_SERVICE_ALREADY_RUNNING) + fRunning = TRUE; + else{ + debugf(1, "Driver start failed.\n"); + DeleteService(drv->scDriver); + CloseServiceHandle(drv->scManager); + drv->scDriver = NULL; + goto clean_up; + } + + } + if(fRunning) + debugf(1, "Driver already running.\n"); + else + debugf(1, "Driver loaded.\n"); + CloseServiceHandle(drv->scManager); + drv->hhDriver = CreateFile(lpszDriverName, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_FLAG_OVERLAPPED, 0); + drv->ovl.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + return 1; + } + } else { + debugf(1, "Open SCM failed: %d\n", GetLastError()); + } + +clean_up: + if(drv->scManager != NULL){ + CloseServiceHandle(drv->scManager); + drv->scManager = 0; // pointless + } + if(drv->scDriver != NULL){ + if(!DeleteService(drv->scDriver)) + debugf(1, "Delete driver service failed: %d\n", GetLastError()); + CloseServiceHandle(drv->scDriver); + drv->scDriver = 0; + } + + return 0; +} + +#define FILE_DEVICE_UNKNOWN 0x00000022 +#define IOCTL_UNKNOWN_BASE FILE_DEVICE_UNKNOWN +#define IOCTL_PROCVIEW_RDMSR CTL_CODE(IOCTL_UNKNOWN_BASE, 0x0803, METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) + +int cpu_rdmsr(struct msr_driver_t* driver, uint32_t msr_index, uint64_t* result) +{ + DWORD dwBytesReturned; + __int64 msrdata; + SERVICE_STATUS srvStatus = {0}; + + if (!driver) + return set_error(ERR_HANDLE); + DeviceIoControl(driver->hhDriver, IOCTL_PROCVIEW_RDMSR, &msr_index, sizeof(int), &msrdata, sizeof(__int64), &dwBytesReturned, &driver->ovl); + GetOverlappedResult(driver->hhDriver, &driver->ovl, &dwBytesReturned, TRUE); + *result = msrdata; + return 0; +} + +int cpu_msr_driver_close(struct msr_driver_t* drv) +{ + SERVICE_STATUS srvStatus = {0}; + if (drv == NULL) return 0; + if(drv->scDriver != NULL){ + if (drv->hhDriver) CancelIo(drv->hhDriver); + if(drv->ovl.hEvent != NULL) + CloseHandle(drv->ovl.hEvent); + if (drv->hhDriver) CloseHandle(drv->hhDriver); + drv->hhDriver = NULL; + drv->ovl.hEvent = NULL; + if (ControlService(drv->scDriver, SERVICE_CONTROL_STOP, &srvStatus)){ + if (wait_for_service_state(drv->scDriver, SERVICE_STOPPED, &srvStatus)){ + DeleteService(drv->scDriver); + } + } + } + return 0; +} + +/* endif defined (_WIN32) */ + +#else /* Unsupported OS */ +/* On others OS (i.e., Darwin), we still do not support RDMSR, so supply dummy struct + and functions */ + +#define RDMSR_UNSUPPORTED_OS + +struct msr_driver_t { int dummy; }; +struct msr_driver_t* cpu_msr_driver_open(void) +{ + set_error(ERR_NOT_IMP); + return NULL; +} + +struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num) +{ + set_error(ERR_NOT_IMP); + return NULL; +} + +int cpu_rdmsr(struct msr_driver_t* driver, uint32_t msr_index, uint64_t* result) +{ + return set_error(ERR_NOT_IMP); +} + +int cpu_msr_driver_close(struct msr_driver_t* driver) +{ + return set_error(ERR_NOT_IMP); +} + +int cpu_rdmsr_range(struct msr_driver_t* handle, uint32_t msr_index, uint8_t highbit, + uint8_t lowbit, uint64_t* result) +{ + return set_error(ERR_NOT_IMP); +} + +int cpu_msrinfo(struct msr_driver_t* driver, cpu_msrinfo_request_t which) +{ + return set_error(ERR_NOT_IMP); +} + +#endif /* Unsupported OS */ + +#ifndef RDMSR_UNSUPPORTED_OS + +/* Useful links for hackers: +- AMD MSRs: + AMD BIOS and Kernel Developer’s Guide (BKDG) + * AMD Family 10h Processors + http://support.amd.com/TechDocs/31116.pdf + * AMD Family 11h Processors + http://support.amd.com/TechDocs/41256.pdf + * AMD Family 12h Processors + http://support.amd.com/TechDocs/41131.pdf + * AMD Family 14h Processors + http://support.amd.com/TechDocs/43170_14h_Mod_00h-0Fh_BKDG.pdf + * AMD Family 15h Processors + http://support.amd.com/TechDocs/42301_15h_Mod_00h-0Fh_BKDG.pdf + http://support.amd.com/TechDocs/42300_15h_Mod_10h-1Fh_BKDG.pdf + http://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf + http://support.amd.com/TechDocs/50742_15h_Models_60h-6Fh_BKDG.pdf + http://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf + * AMD Family 16h Processors + http://support.amd.com/TechDocs/48751_16h_bkdg.pdf + http://support.amd.com/TechDocs/52740_16h_Models_30h-3Fh_BKDG.pdf + +- Intel MSRs: + Intel® 64 and IA-32 Architectures Software Developer’s Manual + * Volume 3 (3A, 3B, 3C & 3D): System Programming Guide + http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-system-programming-manual-325384.pdf +*/ + +/* AMD MSRs addresses */ +#define MSR_PSTATE_L 0xC0010061 +#define MSR_PSTATE_S 0xC0010063 +#define MSR_PSTATE_0 0xC0010064 +#define MSR_PSTATE_7 0xC001006B + +/* Intel MSRs addresses */ +#define IA32_MPERF 0xE7 +#define IA32_APERF 0xE8 +#define IA32_PERF_STATUS 0x198 +#define IA32_THERM_STATUS 0x19C +#define MSR_EBL_CR_POWERON 0x2A +#define MSR_TURBO_RATIO_LIMIT 0x1AD +#define MSR_TEMPERATURE_TARGET 0x1A2 +#define MSR_PERF_STATUS 0x198 +#define MSR_PLATFORM_INFO 0xCE + + +static int rdmsr_supported(void) +{ + struct cpu_id_t* id = get_cached_cpuid(); + return id->flags[CPU_FEATURE_MSR]; +} + +static int perfmsr_measure(struct msr_driver_t* handle, int msr) +{ + int err; + uint64_t a, b; + uint64_t x, y; + err = cpu_rdmsr(handle, msr, &x); + if (err) return CPU_INVALID_VALUE; + sys_precise_clock(&a); + busy_loop_delay(10); + cpu_rdmsr(handle, msr, &y); + sys_precise_clock(&b); + if (a >= b || x > y) return CPU_INVALID_VALUE; + return (int) ((y - x) / (b - a)); +} + +static int get_amd_multipliers(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal, + uint32_t pstate, uint64_t *multiplier) +{ + int err; + static int clock = 0; + uint64_t CpuFid, CpuDid, CpuDidLSD; + double divisor; + + if (pstate < MSR_PSTATE_0 || MSR_PSTATE_7 < pstate) + return 1; + + switch (id->ext_family) { + case 0x11: + /* BKDG 11h, page 236 + MSRC001_00[6B:64][8:6] is CpuDid + MSRC001_00[6B:64][5:0] is CpuFid + CPU COF is ((100 MHz * (CpuFid + 08h)) / (2^CpuDid)) */ + err = cpu_rdmsr_range(handle, pstate, 8, 6, &CpuDid); + err += cpu_rdmsr_range(handle, pstate, 5, 0, &CpuFid); + *multiplier = (uint64_t) ((CpuFid + 0x8) / (1ull << CpuDid)); + break; + case 0x12: + /* BKDG 12h, page 469 + MSRC001_00[6B:64][8:4] is CpuFid + MSRC001_00[6B:64][3:0] is CpuDid + CPU COF is (100MHz * (CpuFid + 10h) / (divisor specified by CpuDid)) */ + err = cpu_rdmsr_range(handle, pstate, 8, 4, &CpuFid); + err += cpu_rdmsr_range(handle, pstate, 3, 0, &CpuDid); + if (CpuDid == 0x0) + divisor = 1; + else if (CpuDid == 0x1) + divisor = 1.5; + else if (CpuDid == 0x2) + divisor = 2; + else if (CpuDid == 0x3) + divisor = 3; + else if (CpuDid == 0x4) + divisor = 4; + else if (CpuDid == 0x5) + divisor = 6; + else if (CpuDid == 0x6) + divisor = 8; + else if (CpuDid == 0x7) + divisor = 12; + else if (CpuDid == 0x8) + divisor = 16; + else + divisor = 0; + + if (divisor > 0) + *multiplier = (uint64_t) ((CpuFid + 0x10) / divisor); + else + err++; + break; + case 0x14: + /* BKDG 14h, page 430 + MSRC001_00[6B:64][8:4] is CpuDidMSD + MSRC001_00[6B:64][3:0] is CpuDidLSD + PLL COF is (100 MHz * (D18F3xD4[MainPllOpFreqId] + 10h)) + Divisor is (CpuDidMSD + (CpuDidLSD * 0.25) + 1) + CPU COF is (main PLL frequency specified by D18F3xD4[MainPllOpFreqId]) / (core clock divisor specified by CpuDidMSD and CpuDidLSD) */ + err = cpu_rdmsr_range(handle, pstate, 8, 4, &CpuDid); + err += cpu_rdmsr_range(handle, pstate, 3, 0, &CpuDidLSD); + if (clock == 0) + clock = cpu_clock_measure(100, 1) + 5; // Fake round + *multiplier = (uint64_t) ((clock / 100 + 0x10) / (CpuDid + CpuDidLSD * 0.25 + 1)); + break; + case 0x10: + /* BKDG 10h, page 429 + MSRC001_00[6B:64][8:6] is CpuDid + MSRC001_00[6B:64][5:0] is CpuFid + CPU COF is (100 MHz * (CpuFid + 10h) / (2^CpuDid)) */ + case 0x15: + /* BKDG 15h, page 570/580/635/692 (00h-0Fh/10h-1Fh/30h-3Fh/60h-6Fh) + MSRC001_00[6B:64][8:6] is CpuDid + MSRC001_00[6B:64][5:0] is CpuFid + CoreCOF is (100 * (MSRC001_00[6B:64][CpuFid] + 10h) / (2^MSRC001_00[6B:64][CpuDid])) */ + case 0x16: + /* BKDG 16h, page 549/611 (00h-0Fh/30h-3Fh) + MSRC001_00[6B:64][8:6] is CpuDid + MSRC001_00[6B:64][5:0] is CpuFid + CoreCOF is (100 * (MSRC001_00[6B:64][CpuFid] + 10h) / (2^MSRC001_00[6B:64][CpuDid])) */ + err = cpu_rdmsr_range(handle, pstate, 8, 6, &CpuDid); + err += cpu_rdmsr_range(handle, pstate, 5, 0, &CpuFid); + *multiplier = (uint64_t) ((CpuFid + 0x10) / (1ull << CpuDid)); + break; + default: + err = 1; + break; + } + + return err; +} + +static double get_info_min_multiplier(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal) +{ + int err; + uint64_t reg; + + if(id->vendor == VENDOR_INTEL) { + /* Refer links above + Table 35-12. MSRs in Next Generation Intel Atom Processors Based on the Goldmont Microarchitecture + Table 35-13. MSRs in Processors Based on Intel® Microarchitecture Code Name Nehalem + Table 35-18. MSRs Supported by Intel® Processors based on Intel® microarchitecture code name Sandy Bridge (Contd.) + Table 35-23. Additional MSRs Supported by 3rd Generation Intel® Core™ Processors (based on Intel® microarchitecture code name Ivy Bridge) + Table 35-24. MSRs Supported by Intel® Xeon® Processors E5 v2 Product Family (based on Ivy Bridge-E microarchitecture) + Table 35-27. Additional MSRs Supported by Processors based on the Haswell or Haswell-E microarchitectures + Table 35-34. Additional MSRs Common to Intel® Xeon® Processor D and Intel Xeon Processors E5 v4 Family Based on the Broadwell Microarchitecture + Table 35-40. Selected MSRs Supported by Next Generation Intel® Xeon Phi™ Processors with DisplayFamily_DisplayModel Signature 06_57H + MSR_PLATFORM_INFO[47:40] is Maximum Efficiency Ratio + Maximum Efficiency Ratio is the minimum ratio that the processor can operates */ + err = cpu_rdmsr_range(handle, MSR_PLATFORM_INFO, 47, 40, ®); + if (!err) return (double) reg; + } + else if(id->vendor == VENDOR_AMD) { + /* Refer links above + MSRC001_0061[6:4] is PstateMaxVal + PstateMaxVal is the lowest-performance non-boosted P-state */ + err = cpu_rdmsr_range(handle, MSR_PSTATE_L, 6, 4, ®); + err += get_amd_multipliers(handle, id, internal, MSR_PSTATE_0 + (uint32_t) reg, ®); + if (!err) return (double) reg; + } + + return (double) CPU_INVALID_VALUE / 100; +} + +static double get_info_cur_multiplier(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal) +{ + int err; + uint64_t reg; + + if(id->vendor == VENDOR_INTEL && internal->code.intel == PENTIUM) { + err = cpu_rdmsr(handle, MSR_EBL_CR_POWERON, ®); + if (!err) return (double) ((reg>>22) & 0x1f); + } + else if(id->vendor == VENDOR_INTEL && internal->code.intel != PENTIUM) { + /* Refer links above + Table 35-2. IA-32 Architectural MSRs (Contd.) + IA32_PERF_STATUS[15:0] is Current performance State Value + [7:0] is 0x0, [15:8] looks like current ratio */ + err = cpu_rdmsr_range(handle, IA32_PERF_STATUS, 15, 8, ®); + if (!err) return (double) reg; + } + else if(id->vendor == VENDOR_AMD) { + /* Refer links above + MSRC001_0063[2:0] is CurPstate */ + err = cpu_rdmsr_range(handle, MSR_PSTATE_S, 2, 0, ®); + err += get_amd_multipliers(handle, id, internal, MSR_PSTATE_0 + (uint32_t) reg, ®); + if (!err) return (double) reg; + } + + return (double) CPU_INVALID_VALUE / 100; +} + +static double get_info_max_multiplier(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal) +{ + int err; + uint64_t reg; + + if(id->vendor == VENDOR_INTEL && internal->code.intel == PENTIUM) { + err = cpu_rdmsr(handle, IA32_PERF_STATUS, ®); + if (!err) return (double) ((reg >> 40) & 0x1f); + } + else if(id->vendor == VENDOR_INTEL && internal->code.intel != PENTIUM) { + /* Refer links above + Table 35-10. Specific MSRs Supported by Intel® Atom™ Processor C2000 Series with CPUID Signature 06_4DH + Table 35-12. MSRs in Next Generation Intel Atom Processors Based on the Goldmont Microarchitecture (Contd.) + Table 35-13. MSRs in Processors Based on Intel® Microarchitecture Code Name Nehalem (Contd.) + Table 35-14. Additional MSRs in Intel® Xeon® Processor 5500 and 3400 Series + Table 35-16. Additional MSRs Supported by Intel Processors (Based on Intel® Microarchitecture Code Name Westmere) + Table 35-19. MSRs Supported by 2nd Generation Intel® Core™ Processors (Intel® microarchitecture code name Sandy Bridge) + Table 35-21. Selected MSRs Supported by Intel® Xeon® Processors E5 Family (based on Sandy Bridge microarchitecture) + Table 35-28. MSRs Supported by 4th Generation Intel® Core™ Processors (Haswell microarchitecture) (Contd.) + Table 35-30. Additional MSRs Supported by Intel® Xeon® Processor E5 v3 Family + Table 35-33. Additional MSRs Supported by Intel® Core™ M Processors and 5th Generation Intel® Core™ Processors + Table 35-34. Additional MSRs Common to Intel® Xeon® Processor D and Intel Xeon Processors E5 v4 Family Based on the Broadwell Microarchitecture + Table 35-37. Additional MSRs Supported by 6th Generation Intel® Core™ Processors Based on Skylake Microarchitecture + Table 35-40. Selected MSRs Supported by Next Generation Intel® Xeon Phi™ Processors with DisplayFamily_DisplayModel Signature 06_57H + MSR_TURBO_RATIO_LIMIT[7:0] is Maximum Ratio Limit for 1C */ + err = cpu_rdmsr_range(handle, MSR_TURBO_RATIO_LIMIT, 7, 0, ®); + if (!err) return (double) reg; + } + else if(id->vendor == VENDOR_AMD) { + /* Refer links above + MSRC001_0064 is Pb0 + Pb0 is the highest-performance boosted P-state */ + err = get_amd_multipliers(handle, id, internal, MSR_PSTATE_0, ®); + if (!err) return (double) reg; + } + + return (double) CPU_INVALID_VALUE / 100; +} + +static int get_info_temperature(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal) +{ + int err; + uint64_t DigitalReadout, ReadingValid, TemperatureTarget; + + if(id->vendor == VENDOR_INTEL) { + /* Refer links above + Table 35-2. IA-32 Architectural MSRs + IA32_THERM_STATUS[22:16] is Digital Readout + IA32_THERM_STATUS[31] is Reading Valid + + Table 35-6. MSRs Common to the Silvermont Microarchitecture and Newer Microarchitectures for Intel® Atom + Table 35-13. MSRs in Processors Based on Intel® Microarchitecture Code Name Nehalem (Contd.) + Table 35-18. MSRs Supported by Intel® Processors based on Intel® microarchitecture code name Sandy Bridge (Contd.) + Table 35-24. MSRs Supported by Intel® Xeon® Processors E5 v2 Product Family (based on Ivy Bridge-E microarchitecture) (Contd.) + Table 35-34. Additional MSRs Common to Intel® Xeon® Processor D and Intel Xeon Processors E5 v4 Family Based on the Broadwell Microarchitecture + Table 35-40. Selected MSRs Supported by Next Generation Intel® Xeon Phi™ Processors with DisplayFamily_DisplayModel Signature 06_57H + MSR_TEMPERATURE_TARGET[23:16] is Temperature Target */ + err = cpu_rdmsr_range(handle, IA32_THERM_STATUS, 22, 16, &DigitalReadout); + err += cpu_rdmsr_range(handle, IA32_THERM_STATUS, 31, 31, &ReadingValid); + err += cpu_rdmsr_range(handle, MSR_TEMPERATURE_TARGET, 23, 16, &TemperatureTarget); + if(!err && ReadingValid) return (int) (TemperatureTarget - DigitalReadout); + } + + return CPU_INVALID_VALUE; +} + +static double get_info_voltage(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal) +{ + int err; + uint64_t reg, CpuVid; + + if(id->vendor == VENDOR_INTEL) { + /* Refer links above + Table 35-18. MSRs Supported by Intel® Processors based on Intel® microarchitecture code name Sandy Bridge (Contd.) + MSR_PERF_STATUS[47:32] is Core Voltage + P-state core voltage can be computed by MSR_PERF_STATUS[37:32] * (float) 1/(2^13). */ + err = cpu_rdmsr_range(handle, MSR_PERF_STATUS, 47, 32, ®); + if (!err) return (double) reg / (1 << 13); + } + else if(id->vendor == VENDOR_AMD) { + /* Refer links above + MSRC001_00[6B:64][15:9] is CpuVid + MSRC001_0063[2:0] is P-state Status + 2.4.1.6.3 Serial VID (SVI) Encodings: voltage = 1.550V - 0.0125V * SviVid[6:0] */ + err = cpu_rdmsr_range(handle, MSR_PSTATE_S, 2, 0, ®); + err += cpu_rdmsr_range(handle, MSR_PSTATE_0 + (uint32_t) reg, 15, 9, &CpuVid); + if (!err && MSR_PSTATE_0 + (uint32_t) reg <= MSR_PSTATE_7) return 1.550 - 0.0125 * CpuVid; + } + + return (double) CPU_INVALID_VALUE / 100; +} + +static double get_info_bus_clock(struct msr_driver_t* handle, struct cpu_id_t *id, + struct internal_id_info_t *internal) +{ + int err; + static int clock = 0; + uint64_t reg; + + if(clock == 0) + clock = cpu_clock_measure(100, 1); + + if(id->vendor == VENDOR_INTEL) { + /* Refer links above + Table 35-12. MSRs in Next Generation Intel Atom Processors Based on the Goldmont Microarchitecture + Table 35-13. MSRs in Processors Based on Intel® Microarchitecture Code Name Nehalem + Table 35-18. MSRs Supported by Intel® Processors based on Intel® microarchitecture code name Sandy Bridge (Contd.) + Table 35-23. Additional MSRs Supported by 3rd Generation Intel® Core™ Processors (based on Intel® microarchitecture code name Ivy Bridge) + Table 35-24. MSRs Supported by Intel® Xeon® Processors E5 v2 Product Family (based on Ivy Bridge-E microarchitecture) + Table 35-27. Additional MSRs Supported by Processors based on the Haswell or Haswell-E microarchitectures + Table 35-40. Selected MSRs Supported by Next Generation Intel® Xeon Phi™ Processors with DisplayFamily_DisplayModel Signature 06_57H + MSR_PLATFORM_INFO[15:8] is Maximum Non-Turbo Ratio */ + err = cpu_rdmsr_range(handle, MSR_PLATFORM_INFO, 15, 8, ®); + if (!err) return (double) clock / reg; + } + else if(id->vendor == VENDOR_AMD) { + /* Refer links above + MSRC001_0061[2:0] is CurPstateLimit + CurPstateLimit is the highest-performance non-boosted P-state */ + err = cpu_rdmsr_range(handle, MSR_PSTATE_L, 2, 0, ®); + err += get_amd_multipliers(handle, id, internal, MSR_PSTATE_0 + (uint32_t) reg, ®); + if (!err) return (double) clock / reg; + } + + return (double) CPU_INVALID_VALUE / 100; +} + +int cpu_rdmsr_range(struct msr_driver_t* handle, uint32_t msr_index, uint8_t highbit, + uint8_t lowbit, uint64_t* result) +{ + int err; + const uint8_t bits = highbit - lowbit + 1; + + if(highbit > 63 || lowbit > highbit) + return set_error(ERR_INVRANGE); + + err = cpu_rdmsr(handle, msr_index, result); + + if(!err && bits < 64) { + /* Show only part of register */ + *result >>= lowbit; + *result &= (1ULL << bits) - 1; + } + + return err; +} + +int cpu_msrinfo(struct msr_driver_t* handle, cpu_msrinfo_request_t which) +{ + struct cpu_raw_data_t raw; + static struct cpu_id_t id; + static struct internal_id_info_t internal; + internal.score = -1; + + if (handle == NULL) + return set_error(ERR_HANDLE); + + if (internal.score == -1) { + cpuid_get_raw_data(&raw); + cpu_ident_internal(&raw, &id, &internal); + } + + switch (which) { + case INFO_MPERF: + return perfmsr_measure(handle, IA32_MPERF); + case INFO_APERF: + return perfmsr_measure(handle, IA32_APERF); + case INFO_MIN_MULTIPLIER: + return (int) (get_info_min_multiplier(handle, &id, &internal) * 100); + case INFO_CUR_MULTIPLIER: + return (int) (get_info_cur_multiplier(handle, &id, &internal) * 100); + case INFO_MAX_MULTIPLIER: + return (int) (get_info_max_multiplier(handle, &id, &internal) * 100); + case INFO_TEMPERATURE: + return get_info_temperature(handle, &id, &internal); + case INFO_THROTTLING: + return CPU_INVALID_VALUE; + case INFO_VOLTAGE: + return (int) (get_info_voltage(handle, &id, &internal) * 100); + case INFO_BCLK: + case INFO_BUS_CLOCK: + return (int) (get_info_bus_clock(handle, &id, &internal) * 100); + default: + return CPU_INVALID_VALUE; + } +} + +#endif // RDMSR_UNSUPPORTED_OS diff --git a/contrib/libcpuid/include/libcpuid/rdtsc.c b/contrib/libcpuid/include/libcpuid/rdtsc.c index 5930681561a..df4543946f5 100644 --- a/contrib/libcpuid/include/libcpuid/rdtsc.c +++ b/contrib/libcpuid/include/libcpuid/rdtsc.c @@ -226,6 +226,45 @@ int cpu_clock_measure(int millis, int quad_check) return (results[bi] + results[bj] + _zero) / 2; } + +static void adjust_march_ic_multiplier(const struct cpu_id_t* id, int* numerator, int* denom) +{ + /* + * for cpu_clock_by_ic: we need to know how many clocks does a typical ADDPS instruction + * take, when issued in rapid succesion without dependencies. The whole idea of + * cpu_clock_by_ic was that this is easy to determine, at least it was back in 2010. Now + * it's getting progressively more hairy, but here are the current measurements: + * + * 1. For CPUs with 64-bit SSE units, ADDPS issue rate is 0.5 IPC (one insn in 2 clocks) + * 2. For CPUs with 128-bit SSE units, issue rate is exactly 1.0 IPC + * 3. For Bulldozer and later, it is 1.4 IPC (we multiply by 5/7) + * 4. For Skylake and later, it is 1.6 IPC (we multiply by 5/8) + */ + // + if (id->sse_size < 128) { + debugf(1, "SSE execution path is 64-bit\n"); + // on a CPU with half SSE unit length, SSE instructions execute at 0.5 IPC; + // the resulting value must be multiplied by 2: + *numerator = 2; + } else { + debugf(1, "SSE execution path is 128-bit\n"); + } + // + // Bulldozer or later: assume 1.4 IPC + if (id->vendor == VENDOR_AMD && id->ext_family >= 21) { + debugf(1, "cpu_clock_by_ic: Bulldozer (or later) detected, dividing result by 1.4\n"); + *numerator = 5; + *denom = 7; // multiply by 5/7, to divide by 1.4 + } + // + // Skylake or later: assume 1.6 IPC + if (id->vendor == VENDOR_INTEL && id->ext_model >= 94) { + debugf(1, "cpu_clock_by_ic: Skylake (or later) detected, dividing result by 1.6\n"); + *numerator = 5; + *denom = 8; // to divide by 1.6, multiply by 5/8 + } +} + int cpu_clock_by_ic(int millis, int runs) { int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c; @@ -237,21 +276,7 @@ int cpu_clock_by_ic(int millis, int runs) // if there aren't SSE instructions - we can't run the test at all if (!id || !id->flags[CPU_FEATURE_SSE]) return -1; // - if (id->sse_size < 128) { - debugf(1, "SSE execution path is 64-bit\n"); - // on a CPU with half SSE unit length, SSE instructions execute at 0.5 IPC; - // the resulting value must be multiplied by 2: - multiplier_numerator = 2; - } else { - debugf(1, "SSE execution path is 128-bit\n"); - } - // - // on a Bulldozer or later CPU, SSE instructions execute at 1.4 IPC, handle that as well: - if (id->vendor == VENDOR_AMD && id->ext_family >= 21) { - debugf(1, "cpu_clock_by_ic: Bulldozer (or later) detected, dividing result by 1.4\n"); - multiplier_numerator = 5; - multiplier_denom = 7; // multiply by 5/7, to divide by 1.4 - } + adjust_march_ic_multiplier(id, &multiplier_numerator, &multiplier_denom); // tl = millis * 125; // (*1000 / 8) cycles_inner = 128; diff --git a/contrib/libcpuid/include/libcpuid/recog_amd.c b/contrib/libcpuid/include/libcpuid/recog_amd.c index c5390b9fd24..2e6c8a9ead8 100644 --- a/contrib/libcpuid/include/libcpuid/recog_amd.c +++ b/contrib/libcpuid/include/libcpuid/recog_amd.c @@ -28,47 +28,16 @@ #include #include #include "libcpuid.h" -#include "recog_amd.h" #include "libcpuid_util.h" +#include "libcpuid_internal.h" +#include "recog_amd.h" -enum _amd_code_t { - NA, - NO_CODE, - OPTERON_GENERIC, - OPTERON_800, - ATHLON_XP, - ATHLON_XP_M, - ATHLON_XP_M_LV, - ATHLON, - ATHLON_MP, - MOBILE_ATHLON64, - ATHLON_FX, - DURON, - DURON_MP, - MOBILE_DURON, - MOBILE_SEMPRON, - OPTERON_SINGLE, - OPTERON_DUALCORE, - OPTERON_800_DUALCORE, - MOBILE_TURION, - ATHLON_64, - ATHLON_64_FX, - TURION_64, - TURION_X2, - SEMPRON, - M_SEMPRON, - SEMPRON_DUALCORE, - PHENOM, - PHENOM2, - ATHLON_64_X2, - ATHLON_64_X3, - ATHLON_64_X4, - FUSION_C, - FUSION_E, - FUSION_EA, - FUSION_Z, +const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = { + #define CODE(x) { x, #x } + #define CODE2(x, y) CODE(x) + #include "amd_code_t.h" + #undef CODE }; -typedef enum _amd_code_t amd_code_t; const struct match_entry_t cpudb_amd[] = { { -1, -1, -1, -1, -1, 1, -1, -1, NO_CODE , 0, "Unknown AMD CPU" }, @@ -146,6 +115,7 @@ const struct match_entry_t cpudb_amd[] = { { 15, -1, -1, 15, -1, 1, 1024, -1, ATHLON_64 , 0, "Athlon 64 (1024K)" }, { 15, -1, -1, 15, -1, 1, -1, -1, ATHLON_FX , 0, "Athlon FX" }, { 15, -1, -1, 15, -1, 1, -1, -1, ATHLON_64_FX , 0, "Athlon 64 FX" }, + { 15, 3, -1, 15, 35, 2, -1, -1, ATHLON_64_FX , 0, "Athlon 64 FX X2 (Toledo)" }, { 15, -1, -1, 15, -1, 2, 512, -1, ATHLON_64_X2 , 0, "Athlon 64 X2 (512K)" }, { 15, -1, -1, 15, -1, 2, 1024, -1, ATHLON_64_X2 , 0, "Athlon 64 X2 (1024K)" }, { 15, -1, -1, 15, -1, 1, 512, -1, TURION_64 , 0, "Turion 64 (512K)" }, @@ -237,31 +207,65 @@ const struct match_entry_t cpudb_amd[] = { { 15, 4, -1, 16, 10, 4, 512, -1, PHENOM2 , 0, "Phenom II X4 (Zosma)" }, { 15, 4, -1, 16, 10, 6, 512, -1, PHENOM2 , 0, "Phenom II X6 (Thuban)" }, - { 15, 4, -1, 16, -1, 2, 1024, -1, ATHLON_64_X2 , 0, "Athlon II X2 (Regor)" }, - { 15, 4, -1, 16, -1, 2, 512, -1, ATHLON_64_X2 , 0, "Athlon II X2 (Regor)" }, + { 15, 6, -1, 16, 6, 2, 512, -1, ATHLON , 0, "Athlon II (Champlain)" }, + { 15, 6, -1, 16, 6, 2, 512, -1, ATHLON_64_X2 , 0, "Athlon II X2 (Regor)" }, + { 15, 6, -1, 16, 6, 2, 1024, -1, ATHLON_64_X2 , 0, "Athlon II X2 (Regor)" }, { 15, 5, -1, 16, 5, 3, 512, -1, ATHLON_64_X3 , 0, "Athlon II X3 (Rana)" }, { 15, 5, -1, 16, 5, 4, 512, -1, ATHLON_64_X4 , 0, "Athlon II X4 (Propus)" }, - /* 2011 CPUs with AMD fusion: */ - { 15, -1, -1, 20, 1, 1, 512, -1, FUSION_C , 0, "Brazos Ontario" }, - { 15, -1, -1, 20, 1, 2, 512, -1, FUSION_C , 0, "Brazos Ontario (Dual-core)" }, - { 15, -1, -1, 20, 1, 1, 512, -1, FUSION_E , 0, "Brazos Zacate" }, - { 15, -1, -1, 20, 1, 2, 512, -1, FUSION_E , 0, "Brazos Zacate (Dual-core)" }, - { 15, -1, -1, 20, 1, 1, 512, -1, FUSION_Z , 0, "Brazos Desna" }, - { 15, -1, -1, 18, 1, 2, 512, -1, FUSION_EA , 0, "Llano X2" }, - { 15, -1, -1, 18, 1, 2, 1024, -1, FUSION_EA , 0, "Llano X2" }, - { 15, -1, -1, 18, 1, 3, 1024, -1, FUSION_EA , 0, "Llano X3" }, - { 15, -1, -1, 18, 1, 4, 1024, -1, FUSION_EA , 0, "Llano X4" }, + + /* 2011 CPUs: K10 architecture: Llano */ + { 15, 1, -1, 18, 1, 2, 512, -1, FUSION_EA , 0, "Llano X2" }, + { 15, 1, -1, 18, 1, 2, 1024, -1, FUSION_EA , 0, "Llano X2" }, + { 15, 1, -1, 18, 1, 3, 1024, -1, FUSION_EA , 0, "Llano X3" }, + { 15, 1, -1, 18, 1, 4, 1024, -1, FUSION_EA , 0, "Llano X4" }, + /* 2011 CPUs: Bobcat architecture: Ontario, Zacate, Desna, Hondo */ + { 15, 2, -1, 20, -1, 1, 512, -1, FUSION_C , 0, "Brazos Ontario" }, + { 15, 2, -1, 20, -1, 2, 512, -1, FUSION_C , 0, "Brazos Ontario (Dual-core)" }, + { 15, 1, -1, 20, -1, 1, 512, -1, FUSION_E , 0, "Brazos Zacate" }, + { 15, 1, -1, 20, -1, 2, 512, -1, FUSION_E , 0, "Brazos Zacate (Dual-core)" }, + { 15, 2, -1, 20, -1, 2, 512, -1, FUSION_Z , 0, "Brazos Desna (Dual-core)" }, + /* 2012 CPUs: Piledriver architecture: Trinity and Richland */ + { 15, 0, -1, 21, 10, 2, 1024, -1, FUSION_A , 0, "Trinity X2" }, + { 15, 0, -1, 21, 16, 2, 1024, -1, FUSION_A , 0, "Trinity X2" }, + { 15, 0, -1, 21, 10, 4, 1024, -1, FUSION_A , 0, "Trinity X4" }, + { 15, 0, -1, 21, 16, 4, 1024, -1, FUSION_A , 0, "Trinity X4" }, + { 15, 3, -1, 21, 13, 2, 1024, -1, FUSION_A , 0, "Richland X2" }, + { 15, 3, -1, 21, 13, 4, 1024, -1, FUSION_A , 0, "Richland X4" }, + /* 2013 CPUs: Jaguar architecture: Kabini and Temash */ + { 15, 0, -1, 22, 0, 2, 1024, -1, FUSION_A , 0, "Kabini X2" }, + { 15, 0, -1, 22, 0, 4, 1024, -1, FUSION_A , 0, "Kabini X4" }, + /* 2014 CPUs: Steamroller architecture: Kaveri */ + { 15, 0, -1, 21, 30, 2, 1024, -1, FUSION_A , 0, "Kaveri X2" }, + { 15, 0, -1, 21, 30, 4, 1024, -1, FUSION_A , 0, "Kaveri X4" }, + /* 2014 CPUs: Puma architecture: Beema and Mullins */ + { 15, 0, -1, 22, 30, 2, 1024, -1, FUSION_E , 0, "Mullins X2" }, + { 15, 0, -1, 22, 30, 4, 1024, -1, FUSION_A , 0, "Mullins X4" }, + /* 2015 CPUs: Excavator architecture: Carrizo */ + { 15, 1, -1, 21, 60, 2, 1024, -1, FUSION_A , 0, "Carrizo X2" }, + { 15, 1, -1, 21, 60, 4, 1024, -1, FUSION_A , 0, "Carrizo X4" }, + /* 2015 CPUs: Steamroller architecture: Godavari */ + //TODO + /* 2016 CPUs: Excavator architecture: Bristol Ridge */ + //TODO /* Newer Opterons: */ - { 15, 9, -1, 16, 9, 8, -1, -1, OPTERON_GENERIC , 0, "Magny-Cours Opteron" }, + { 15, 9, -1, 22, 9, 8, -1, -1, OPTERON_GENERIC , 0, "Magny-Cours Opteron" }, /* Bulldozer CPUs: */ + { 15, -1, -1, 21, 0, 4, 2048, -1, NO_CODE , 0, "Bulldozer X2" }, { 15, -1, -1, 21, 1, 4, 2048, -1, NO_CODE , 0, "Bulldozer X2" }, { 15, -1, -1, 21, 1, 6, 2048, -1, NO_CODE , 0, "Bulldozer X3" }, { 15, -1, -1, 21, 1, 8, 2048, -1, NO_CODE , 0, "Bulldozer X4" }, + /* Piledriver CPUs: */ { 15, -1, -1, 21, 2, 4, 2048, -1, NO_CODE , 0, "Vishera X2" }, { 15, -1, -1, 21, 2, 6, 2048, -1, NO_CODE , 0, "Vishera X3" }, { 15, -1, -1, 21, 2, 8, 2048, -1, NO_CODE , 0, "Vishera X4" }, + /* Steamroller CPUs: */ + //TODO + /* Excavator CPUs: */ + //TODO + /* Zen CPUs: */ + //TODO }; @@ -287,6 +291,7 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) { 12, CPU_FEATURE_SKINIT }, { 13, CPU_FEATURE_WDT }, { 16, CPU_FEATURE_FMA4 }, + { 21, CPU_FEATURE_TBM }, }; const struct feature_map_t matchtable_edx87[] = { { 0, CPU_FEATURE_TS }, @@ -307,7 +312,7 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); } - if (raw->ext_cpuid[0][0] >= 0x80000001) + if (raw->ext_cpuid[0][0] >= 0x80000007) match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data); if (raw->ext_cpuid[0][0] >= 0x8000001a) { /* We have the extended info about SSE unit size */ @@ -320,7 +325,7 @@ static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* d { int l3_result; const int assoc_table[16] = { - 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 92, 128, 255 + 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255 }; unsigned n = raw->ext_cpuid[0][0]; @@ -442,24 +447,36 @@ static amd_code_t decode_amd_codename_part1(const char *bs) if (match_pattern(bs, "Z-##")) return FUSION_Z; if (match_pattern(bs, "E#-####") || match_pattern(bs, "A#-####")) return FUSION_EA; - return NO_CODE; + return (amd_code_t) NO_CODE; } -static void decode_amd_codename(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +static void decode_amd_codename(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) { amd_code_t code = decode_amd_codename_part1(data->brand_str); - + int i = 0; + char* code_str = NULL; + for (i = 0; i < COUNT_OF(amd_code_str); i++) { + if (code == amd_code_str[i].code) { + code_str = amd_code_str[i].str; + break; + } + } if (code == ATHLON_64_X2 && data->l2_cache < 512) code = SEMPRON_DUALCORE; - match_cpu_codename(cpudb_amd, COUNT_OF(cpudb_amd), data, code, 0); + if (code_str) + debugf(2, "Detected AMD brand code: %d (%s)\n", code, code_str); + else + debugf(2, "Detected AMD brand code: %d\n", code); + internal->code.amd = code; + internal->score = match_cpu_codename(cpudb_amd, COUNT_OF(cpudb_amd), data, code, 0); } -int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) { load_amd_features(raw, data); decode_amd_cache_info(raw, data); decode_amd_number_of_cores(raw, data); - decode_amd_codename(raw, data); + decode_amd_codename(raw, data, internal); return 0; } diff --git a/contrib/libcpuid/include/libcpuid/recog_amd.h b/contrib/libcpuid/include/libcpuid/recog_amd.h index e0f3b61f727..34e89598397 100644 --- a/contrib/libcpuid/include/libcpuid/recog_amd.h +++ b/contrib/libcpuid/include/libcpuid/recog_amd.h @@ -26,7 +26,7 @@ #ifndef __RECOG_AMD_H__ #define __RECOG_AMD_H__ -int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data); +int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal); void cpuid_get_list_amd(struct cpu_list_t* list); #endif /* __RECOG_AMD_H__ */ diff --git a/contrib/libcpuid/include/libcpuid/recog_intel.c b/contrib/libcpuid/include/libcpuid/recog_intel.c index 625b2777f51..2ffc41c8a15 100644 --- a/contrib/libcpuid/include/libcpuid/recog_intel.c +++ b/contrib/libcpuid/include/libcpuid/recog_intel.c @@ -26,61 +26,16 @@ #include #include #include "libcpuid.h" -#include "recog_intel.h" #include "libcpuid_util.h" +#include "libcpuid_internal.h" +#include "recog_intel.h" - -enum _intel_code_t { - NA, - NO_CODE, - PENTIUM = 10, - MOBILE_PENTIUM, - - XEON = 20, - XEON_IRWIN, - XEONMP, - XEON_POTOMAC, - XEON_I7, - XEON_GAINESTOWN, - XEON_WESTMERE, - - MOBILE_PENTIUM_M = 30, - CELERON, - MOBILE_CELERON, - NOT_CELERON, - - - CORE_SOLO = 40, - MOBILE_CORE_SOLO, - CORE_DUO, - MOBILE_CORE_DUO, - - WOLFDALE = 50, - MEROM, - PENRYN, - QUAD_CORE, - DUAL_CORE_HT, - QUAD_CORE_HT, - MORE_THAN_QUADCORE, - PENTIUM_D, - - ATOM = 60, - ATOM_SILVERTHORNE, - ATOM_DIAMONDVILLE, - ATOM_PINEVIEW, - ATOM_CEDARVIEW, - - CORE_I3 = 70, - CORE_I5, - CORE_I7, - CORE_IVY3, /* 22nm Core-iX */ - CORE_IVY5, - CORE_IVY7, - CORE_HASWELL3, /* 22nm Core-iX, Haswell */ - CORE_HASWELL5, - CORE_HASWELL7, +const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = { + #define CODE(x) { x, #x } + #define CODE2(x, y) CODE(x) + #include "intel_code_t.h" + #undef CODE }; -typedef enum _intel_code_t intel_code_t; enum _intel_model_t { UNKNOWN = -1, @@ -134,12 +89,12 @@ const struct match_entry_t cpudb_intel[] = { { 6, 5, -1, -1, -1, 1, -1, -1, MOBILE_PENTIUM , 0, "Mobile Pentium II (Tonga)"}, { 6, 6, -1, -1, -1, 1, -1, -1, NO_CODE , 0, "Pentium II (Dixon)" }, - { 6, 3, -1, -1, -1, 1, -1, -1, XEON , 0, "P-II Xeon" }, - { 6, 5, -1, -1, -1, 1, -1, -1, XEON , 0, "P-II Xeon" }, - { 6, 6, -1, -1, -1, 1, -1, -1, XEON , 0, "P-II Xeon" }, + { 6, 3, -1, -1, -1, 1, -1, -1, XEON , 0, "P-II Xeon (Klamath)" }, + { 6, 5, -1, -1, -1, 1, -1, -1, XEON , 0, "P-II Xeon (Drake)" }, + { 6, 6, -1, -1, -1, 1, -1, -1, XEON , 0, "P-II Xeon (Dixon)" }, - { 6, 5, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-II Celeron (no L2)" }, - { 6, 6, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-II Celeron (128K)" }, + { 6, 5, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-II Celeron (Covingtons" }, + { 6, 6, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-II Celeron (Mendocino)" }, /* -------------------------------------------------- */ @@ -148,15 +103,15 @@ const struct match_entry_t cpudb_intel[] = { { 6, 10, -1, -1, -1, 1, -1, -1, NO_CODE , 0, "Pentium III (Coppermine)"}, { 6, 11, -1, -1, -1, 1, -1, -1, NO_CODE , 0, "Pentium III (Tualatin)" }, - { 6, 7, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon" }, - { 6, 8, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon" }, - { 6, 10, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon" }, - { 6, 11, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon" }, + { 6, 7, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon (Tanner)" }, + { 6, 8, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon (Cascades)" }, + { 6, 10, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon (Cascades)" }, + { 6, 11, -1, -1, -1, 1, -1, -1, XEON , 0, "P-III Xeon (Tualatin)" }, - { 6, 7, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron" }, - { 6, 8, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron" }, - { 6, 10, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron" }, - { 6, 11, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron" }, + { 6, 7, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron (Katmai)" }, + { 6, 8, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron (Coppermine)" }, + { 6, 10, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron (Coppermine)" }, + { 6, 11, -1, -1, -1, 1, -1, -1, CELERON , 0, "P-III Celeron (Tualatin)" }, /* Netburst based (Pentium 4 and later) classic P4s */ @@ -190,17 +145,17 @@ const struct match_entry_t cpudb_intel[] = { { 15, 6, -1, 15, -1, 1, -1, -1, XEON , 0, "Xeon (Dempsey)" }, /* Pentium Ds */ - { 15, 4, 4, 15, -1, 1, -1, -1, NO_CODE , 0, "Pentium D" }, - { 15, 4, -1, 15, -1, 1, -1, -1, PENTIUM_D , 0, "Pentium D" }, - { 15, 4, 7, 15, -1, 1, -1, -1, NO_CODE , 0, "Pentium D" }, - { 15, 6, -1, 15, -1, 1, -1, -1, PENTIUM_D , 0, "Pentium D" }, + { 15, 4, 4, 15, -1, 1, -1, -1, NO_CODE , 0, "Pentium D (SmithField)" }, + { 15, 4, -1, 15, -1, 1, -1, -1, PENTIUM_D , 0, "Pentium D (SmithField)" }, + { 15, 4, 7, 15, -1, 1, -1, -1, NO_CODE , 0, "Pentium D (SmithField)" }, + { 15, 6, -1, 15, -1, 1, -1, -1, PENTIUM_D , 0, "Pentium D (Presler)" }, /* Celeron and Celeron Ds */ - { 15, 1, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron (128K)" }, - { 15, 2, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron (128K)" }, - { 15, 3, -1, 15, -1, 1, -1, -1, CELERON , 0, "Celeron D" }, - { 15, 4, -1, 15, -1, 1, -1, -1, CELERON , 0, "Celeron D" }, - { 15, 6, -1, 15, -1, 1, -1, -1, CELERON , 0, "Celeron D" }, + { 15, 1, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron (Willamette)" }, + { 15, 2, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron (Northwood)" }, + { 15, 3, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron D (Prescott)" }, + { 15, 4, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron D (Prescott)" }, + { 15, 6, -1, 15, -1, 1, -1, -1, CELERON , 0, "P-4 Celeron D (Cedar Mill)" }, /* -------------------------------------------------- */ /* Intel Core microarchitecture - P6-based */ @@ -214,7 +169,7 @@ const struct match_entry_t cpudb_intel[] = { { 6, 13, -1, -1, -1, 1, -1, -1, MOBILE_PENTIUM_M , 0, "Pentium M (Dothan)" }, { 6, 13, -1, -1, -1, 1, -1, -1, CELERON , 0, "Celeron M" }, - { 6, 12, -1, -1, -1, -1, -1, -1, ATOM , 0, "Unknown Atom" }, + { 6, 12, -1, -1, -1, -1, -1, -1, ATOM_UNKNOWN , 0, "Unknown Atom" }, { 6, 12, -1, -1, -1, -1, -1, -1, ATOM_DIAMONDVILLE , 0, "Atom (Diamondville)" }, { 6, 12, -1, -1, -1, -1, -1, -1, ATOM_SILVERTHORNE , 0, "Atom (Silverthorne)" }, { 6, 12, -1, -1, -1, -1, -1, -1, ATOM_CEDARVIEW , 0, "Atom (Cedarview)" }, @@ -260,57 +215,13 @@ const struct match_entry_t cpudb_intel[] = { { 6, 7, -1, -1, 23, 2, 3072, -1, WOLFDALE , 0, "Wolfdale (Core 2 Duo) 3M" }, { 6, 7, -1, -1, 23, 2, 6144, -1, WOLFDALE , 0, "Wolfdale (Core 2 Duo) 6M" }, { 6, 7, -1, -1, 23, 1, -1, -1, MOBILE_CORE_DUO , 0, "Penryn (Core 2 Duo)" }, + { 6, 7, -1, -1, 23, 2, 1024, -1, PENRYN , 0, "Penryn (Core 2 Duo)" }, { 6, 7, -1, -1, 23, 2, 3072, -1, PENRYN , 0, "Penryn (Core 2 Duo) 3M" }, { 6, 7, -1, -1, 23, 2, 6144, -1, PENRYN , 0, "Penryn (Core 2 Duo) 6M" }, { 6, 7, -1, -1, 23, 4, 2048, -1, QUAD_CORE , 0, "Yorkfield (Core 2 Quad) 2M"}, { 6, 7, -1, -1, 23, 4, 3072, -1, QUAD_CORE , 0, "Yorkfield (Core 2 Quad) 3M"}, { 6, 7, -1, -1, 23, 4, 6144, -1, QUAD_CORE , 0, "Yorkfield (Core 2 Quad) 6M"}, - { 6, 5, -1, -1, 37, 2, -1, -1, NO_CODE , 0, "Unknown Core i3/i5 CPU" }, - { 6, 5, -1, -1, 37, 2, -1, 4096, CORE_I7 , 0, "Arrandale (Core i7)" }, - { 6, 5, -1, -1, 37, 2, -1, 3072, CORE_I5 , 0, "Arrandale (Core i5)" }, - { 6, 5, -1, -1, 37, 2, -1, 4096, CORE_I5 , 0, "Clarkdale (Core i5)" }, - { 6, 5, -1, -1, 37, 4, -1, 8192, CORE_I5 , 0, "Lynnfield (Core i5)" }, - { 6, 5, -1, -1, 37, 2, -1, 3072, CORE_I3 , 0, "Arrandale (Core i3)" }, - { 6, 5, -1, -1, 37, 2, -1, 4096, CORE_I3 , 0, "Clarkdale (Core i3)" }, - - { 6, 10, -1, -1, 42, -1, -1, -1, NO_CODE , 0, "Unknown Sandy Bridge" }, - { 6, 10, -1, -1, 42, -1, -1, -1, CORE_I7 , 0, "Sandy Bridge i7" }, - { 6, 10, -1, -1, 42, 4, -1, -1, CORE_I7 , 0, "Sandy Bridge (Core i7)" }, - { 6, 10, -1, -1, 42, 4, -1, -1, CORE_I5 , 0, "Sandy Bridge (Core i5)" }, - { 6, 10, -1, -1, 42, 2, -1, -1, CORE_I3 , 0, "Sandy Bridge (Core i3)" }, - { 6, 10, -1, -1, 42, 1, -1, -1, CELERON , 0, "Celeron (Sandy Bridge)" }, - { 6, 10, -1, -1, 42, 2, -1, -1, CELERON , 0, "Celeron (Sandy Bridge)" }, - { 6, 10, -1, -1, 42, 2, -1, -1, PENTIUM , 0, "Pentium (Sandy Bridge)" }, - - { 6, 10, -1, -1, 26, 1, -1, -1, CORE_I7 , 0, "Intel Core i7" }, - { 6, 10, -1, -1, 26, 4, -1, -1, CORE_I7 , 0, "Bloomfield (Core i7)" }, - { 6, 10, -1, -1, 30, 4, -1, -1, CORE_I7 , 0, "Lynnfield (Core i7)" }, - { 6, 10, -1, -1, 26, 4, -1, -1, XEON_I7 , 0, "Xeon (Bloomfield)" }, - - { 6, 10, -1, -1, 26, 4, -1, -1, XEON_GAINESTOWN , 0, "Xeon (Gainestown)" }, - { 6, 10, -1, -1, 26, 4, -1, 4096, XEON_GAINESTOWN , 0, "Xeon (Gainestown) 4M" }, - { 6, 10, -1, -1, 26, 4, -1, 8192, XEON_GAINESTOWN , 0, "Xeon (Gainestown) 8M" }, - - { 6, 12, -1, -1, 44, -1, -1, -1, XEON_WESTMERE , 0, "Xeon (Westmere-based)" }, - { 6, 12, -1, -1, 44, 4, -1, 12288, CORE_I7 , 0, "Gulftown (Core i7)" }, - { 6, 12, -1, -1, 44, -1, -1, 12288, XEON_WESTMERE , 0, "Xeon (Gulftown)" }, - - { 6, 13, -1, -1, 45, -1, -1, -1, XEON , 0, "Xeon (Sandy Bridge)" }, - - { 6, 13, -1, -1, 45, -1, -1, -1, CORE_I7 , 0, "Sandy Bridge-E (Core i7)" }, - { 6, 13, -1, -1, 45, -1, -1, -1, CORE_I5 , 0, "Sandy Bridge-E (Core i5)" }, - { 6, 13, -1, -1, 45, -1, -1, -1, CORE_I3 , 0, "Sandy Bridge-E (Core i3)" }, - - { 6, 10, -1, -1, 58, 4, -1, -1, CORE_IVY7 , 0, "Ivy Bridge (Core i7)" }, - { 6, 10, -1, -1, 58, 4, -1, -1, CORE_IVY5 , 0, "Ivy Bridge (Core i5)" }, - { 6, 10, -1, -1, 58, 2, -1, -1, CORE_IVY3 , 0, "Ivy Bridge (Core i3)" }, - - { 6, 12, -1, -1, 60, 4, -1, -1, CORE_HASWELL7 , 0, "Haswell (Core i7)" }, - { 6, 12, -1, -1, 60, 4, -1, -1, CORE_HASWELL5 , 0, "Haswell (Core i5)" }, - { 6, 12, -1, -1, 60, 2, -1, -1, CORE_HASWELL3 , 0, "Haswell (Core i3)" }, - - /* Core microarchitecture-based Xeons: */ { 6, 14, -1, -1, 14, 1, -1, -1, XEON , 0, "Xeon LV" }, { 6, 15, -1, -1, 15, 2, 4096, -1, XEON , _5100, "Xeon (Woodcrest)" }, @@ -324,6 +235,85 @@ const struct match_entry_t cpudb_intel[] = { { 6, 7, -1, -1, 23, 4, 3072, -1, XEON , X3300, "Xeon (Yorkfield/3M)" }, { 6, 7, -1, -1, 23, 4, 6144, -1, XEON , X3300, "Xeon (Yorkfield/6M)" }, + /* Nehalem CPUs (45nm): */ + { 6, 10, -1, -1, 26, 4, -1, -1, XEON_GAINESTOWN , 0, "Gainestown (Xeon)" }, + { 6, 10, -1, -1, 26, 4, -1, 4096, XEON_GAINESTOWN , 0, "Gainestown 4M (Xeon)" }, + { 6, 10, -1, -1, 26, 4, -1, 8192, XEON_GAINESTOWN , 0, "Gainestown 8M (Xeon)" }, + { 6, 10, -1, -1, 26, 4, -1, -1, XEON_I7 , 0, "Bloomfield (Xeon)" }, + { 6, 10, -1, -1, 26, 4, -1, -1, CORE_I7 , 0, "Bloomfield (Core i7)" }, + { 6, 10, -1, -1, 30, 4, -1, -1, CORE_I7 , 0, "Lynnfield (Core i7)" }, + { 6, 5, -1, -1, 37, 4, -1, 8192, CORE_I5 , 0, "Lynnfield (Core i5)" }, + + /* Westmere CPUs (32nm): */ + { 6, 5, -1, -1, 37, 2, -1, -1, NO_CODE , 0, "Unknown Core i3/i5" }, + { 6, 12, -1, -1, 44, -1, -1, -1, XEON_WESTMERE , 0, "Westmere (Xeon)" }, + { 6, 12, -1, -1, 44, -1, -1, 12288, XEON_WESTMERE , 0, "Gulftown (Xeon)" }, + { 6, 12, -1, -1, 44, 4, -1, 12288, CORE_I7 , 0, "Gulftown (Core i7)" }, + { 6, 5, -1, -1, 37, 2, -1, 4096, CORE_I5 , 0, "Clarkdale (Core i5)" }, + { 6, 5, -1, -1, 37, 2, -1, 4096, CORE_I3 , 0, "Clarkdale (Core i3)" }, + { 6, 5, -1, -1, 37, 2, -1, -1, PENTIUM , 0, "Arrandale" }, + { 6, 5, -1, -1, 37, 2, -1, 4096, CORE_I7 , 0, "Arrandale (Core i7)" }, + { 6, 5, -1, -1, 37, 2, -1, 3072, CORE_I5 , 0, "Arrandale (Core i5)" }, + { 6, 5, -1, -1, 37, 2, -1, 3072, CORE_I3 , 0, "Arrandale (Core i3)" }, + + /* Sandy Bridge CPUs (32nm): */ + { 6, 10, -1, -1, 42, -1, -1, -1, NO_CODE , 0, "Unknown Sandy Bridge" }, + { 6, 10, -1, -1, 42, -1, -1, -1, XEON , 0, "Sandy Bridge (Xeon)" }, + { 6, 10, -1, -1, 42, -1, -1, -1, CORE_I7 , 0, "Sandy Bridge (Core i7)" }, + { 6, 10, -1, -1, 42, 4, -1, -1, CORE_I7 , 0, "Sandy Bridge (Core i7)" }, + { 6, 10, -1, -1, 42, 4, -1, -1, CORE_I5 , 0, "Sandy Bridge (Core i5)" }, + { 6, 10, -1, -1, 42, 2, -1, -1, CORE_I3 , 0, "Sandy Bridge (Core i3)" }, + { 6, 10, -1, -1, 42, 2, -1, -1, PENTIUM , 0, "Sandy Bridge (Pentium)" }, + { 6, 10, -1, -1, 42, 1, -1, -1, CELERON , 0, "Sandy Bridge (Celeron)" }, + { 6, 10, -1, -1, 42, 2, -1, -1, CELERON , 0, "Sandy Bridge (Celeron)" }, + { 6, 13, -1, -1, 45, -1, -1, -1, NO_CODE , 0, "Sandy Bridge-E" }, + { 6, 13, -1, -1, 45, -1, -1, -1, XEON , 0, "Sandy Bridge-E (Xeon)" }, + + /* Ivy Bridge CPUs (22nm): */ + { 6, 10, -1, -1, 58, -1, -1, -1, XEON , 0, "Ivy Bridge (Xeon)" }, + { 6, 10, -1, -1, 58, 4, -1, -1, CORE_IVY7 , 0, "Ivy Bridge (Core i7)" }, + { 6, 10, -1, -1, 58, 4, -1, -1, CORE_IVY5 , 0, "Ivy Bridge (Core i5)" }, + { 6, 10, -1, -1, 58, 2, -1, -1, CORE_IVY3 , 0, "Ivy Bridge (Core i3)" }, + { 6, 10, -1, -1, 58, 2, -1, -1, PENTIUM , 0, "Ivy Bridge (Pentium)" }, + { 6, 10, -1, -1, 58, 1, -1, -1, CELERON , 0, "Ivy Bridge (Celeron)" }, + { 6, 10, -1, -1, 58, 2, -1, -1, CELERON , 0, "Ivy Bridge (Celeron)" }, + { 6, 14, -1, -1, 62, -1, -1, -1, NO_CODE , 0, "Ivy Bridge-E" }, + + /* Haswell CPUs (22nm): */ + { 6, 12, -1, -1, 60, -1, -1, -1, XEON , 0, "Haswell (Xeon)" }, + { 6, 12, -1, -1, 60, 4, -1, -1, CORE_HASWELL7 , 0, "Haswell (Core i7)" }, + { 6, 5, -1, -1, 69, 4, -1, -1, CORE_HASWELL7 , 0, "Haswell (Core i7)" }, + { 6, 12, -1, -1, 60, 4, -1, -1, CORE_HASWELL5 , 0, "Haswell (Core i5)" }, + { 6, 5, -1, -1, 69, 4, -1, -1, CORE_HASWELL5 , 0, "Haswell (Core i5)" }, + { 6, 12, -1, -1, 60, 2, -1, -1, CORE_HASWELL3 , 0, "Haswell (Core i3)" }, + { 6, 5, -1, -1, 69, 2, -1, -1, CORE_HASWELL3 , 0, "Haswell (Core i3)" }, + { 6, 12, -1, -1, 60, 2, -1, -1, PENTIUM , 0, "Haswell (Pentium)" }, + { 6, 12, -1, -1, 60, 2, -1, -1, CELERON , 0, "Haswell (Celeron)" }, + { 6, 12, -1, -1, 60, 1, -1, -1, CELERON , 0, "Haswell (Celeron)" }, + { 6, 15, -1, -1, 63, -1, -1, -1, NO_CODE , 0, "Haswell-E" }, + + /* Broadwell CPUs (14nm): */ + { 6, 7, -1, -1, 71, 4, -1, -1, CORE_BROADWELL7 , 0, "Broadwell (Core i7)" }, + { 6, 7, -1, -1, 71, 4, -1, -1, CORE_BROADWELL5 , 0, "Broadwell (Core i5)" }, + { 6, 13, -1, -1, 61, 4, -1, -1, CORE_BROADWELL7 , 0, "Broadwell-U (Core i7)" }, + { 6, 13, -1, -1, 61, 2, -1, -1, CORE_BROADWELL7 , 0, "Broadwell-U (Core i7)" }, + { 6, 13, -1, -1, 61, 2, -1, -1, CORE_BROADWELL5 , 0, "Broadwell-U (Core i5)" }, + { 6, 13, -1, -1, 61, 2, -1, -1, CORE_BROADWELL3 , 0, "Broadwell-U (Core i3)" }, + { 6, 13, -1, -1, 61, 2, -1, -1, PENTIUM , 0, "Broadwell-U (Pentium)" }, + { 6, 13, -1, -1, 61, 2, -1, -1, CELERON , 0, "Broadwell-U (Celeron)" }, + { 6, 13, -1, -1, 61, 2, -1, -1, NA , 0, "Broadwell-U (Core M)" }, + { 6, 15, -1, -1, 79, 2, -1, -1, CORE_BROADWELL3 , 0, "Broadwell-E (Core i3)" }, + { 6, 15, -1, -1, 79, 2, -1, -1, CORE_BROADWELL5 , 0, "Broadwell-E (Core i5)" }, + { 6, 15, -1, -1, 79, 4, -1, -1, CORE_BROADWELL5 , 0, "Broadwell-E (Core i5)" }, + { 6, 15, -1, -1, 79, 2, -1, -1, CORE_BROADWELL7 , 0, "Broadwell-E (Core i7)" }, + { 6, 15, -1, -1, 79, 4, -1, -1, CORE_BROADWELL7 , 0, "Broadwell-E (Core i7)" }, + + /* Skylake CPUs (14nm): */ + { 6, 14, -1, -1, 94, 4, -1, -1, CORE_BROADWELL7 , 0, "Skylake (Core i7)" }, + { 6, 14, -1, -1, 94, 4, -1, -1, CORE_BROADWELL5 , 0, "Skylake (Core i5)" }, + { 6, 14, -1, -1, 94, 4, -1, -1, CORE_BROADWELL3 , 0, "Skylake (Core i3)" }, + { 6, 14, -1, -1, 94, 4, -1, -1, PENTIUM , 0, "Skylake (Pentium)" }, + /* Itaniums */ { 7, -1, -1, -1, -1, 1, -1, -1, NO_CODE , 0, "Itanium" }, { 15, -1, -1, 16, -1, 1, -1, -1, NO_CODE , 0, "Itanium 2" }, @@ -343,7 +333,6 @@ static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* dat { 31, CPU_FEATURE_PBE }, }; const struct feature_map_t matchtable_ecx1[] = { - { 1, CPU_FEATURE_PCLMUL }, { 2, CPU_FEATURE_DTS64 }, { 4, CPU_FEATURE_DS_CPL }, { 5, CPU_FEATURE_VMX }, @@ -354,37 +343,45 @@ static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* dat { 14, CPU_FEATURE_XTPR }, { 15, CPU_FEATURE_PDCM }, { 18, CPU_FEATURE_DCA }, - { 20, CPU_FEATURE_SSE4_2 }, - { 22, CPU_FEATURE_MOVBE }, - { 25, CPU_FEATURE_AES }, - { 26, CPU_FEATURE_XSAVE }, - { 27, CPU_FEATURE_OSXSAVE }, - { 28, CPU_FEATURE_AVX }, - { 30, CPU_FEATURE_RDRAND }, - }; - const struct feature_map_t matchtable_ebx7[] = { - { 5, CPU_FEATURE_AVX2 }, + { 21, CPU_FEATURE_X2APIC }, }; const struct feature_map_t matchtable_edx81[] = { { 20, CPU_FEATURE_XD }, }; + const struct feature_map_t matchtable_ebx7[] = { + { 2, CPU_FEATURE_SGX }, + { 4, CPU_FEATURE_HLE }, + { 11, CPU_FEATURE_RTM }, + { 16, CPU_FEATURE_AVX512F }, + { 17, CPU_FEATURE_AVX512DQ }, + { 18, CPU_FEATURE_RDSEED }, + { 19, CPU_FEATURE_ADX }, + { 26, CPU_FEATURE_AVX512PF }, + { 27, CPU_FEATURE_AVX512ER }, + { 28, CPU_FEATURE_AVX512CD }, + { 29, CPU_FEATURE_SHA_NI }, + { 30, CPU_FEATURE_AVX512BW }, + { 31, CPU_FEATURE_AVX512VL }, + }; if (raw->basic_cpuid[0][0] >= 1) { match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); } - if (raw->basic_cpuid[0][0] >= 7) { - match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); - } if (raw->ext_cpuid[0][0] >= 1) { match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); } + // detect TSX/AVX512: + if (raw->basic_cpuid[0][0] >= 7) { + match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); + } } enum _cache_type_t { L1I, L1D, L2, - L3 + L3, + L4 }; typedef enum _cache_type_t cache_type_t; @@ -409,6 +406,12 @@ static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int data->l3_cache = size; data->l3_assoc = assoc; data->l3_cacheline = linesize; + break; + case L4: + data->l4_cache = size; + data->l4_assoc = assoc; + data->l4_cacheline = linesize; + break; default: break; } @@ -529,6 +532,8 @@ static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw, type = L2; else if (level == 3 && typenumber == 3) type = L3; + else if (level == 4 && typenumber == 3) + type = L4; else { warnf("deterministic_cache: unknown level/typenumber combo (%d/%d), cannot\n", level, typenumber); warnf("deterministic_cache: recognize cache type\n"); @@ -561,8 +566,12 @@ static int decode_intel_extended_topology(struct cpu_raw_data_t* raw, } } if (num_smt == -1 || num_core == -1) return 0; - data->num_cores = num_core / num_smt; data->num_logical_cpus = num_core; + data->num_cores = num_core / num_smt; + // make sure num_cores is at least 1. In VMs, the CPUID instruction + // is rigged and may give nonsensical results, but we should at least + // avoid outputs like data->num_cores == 0. + if (data->num_cores <= 0) data->num_cores = 1; return 1; } @@ -587,7 +596,9 @@ static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw, data->num_logical_cpus = logical_cpus; } else { data->num_cores = 1; - data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2); + data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1); + if (data->num_logical_cpus == 1) + data->flags[CPU_FEATURE_HT] = 0; } } else { data->num_cores = data->num_logical_cpus = 1; @@ -596,7 +607,7 @@ static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw, static intel_code_t get_brand_code(struct cpu_id_t* data) { - intel_code_t code = NO_CODE; + intel_code_t code = (intel_code_t) NO_CODE; int i, need_matchtable = 1, core_ix_base = 0; const char* bs = data->brand_str; const char* s; @@ -607,15 +618,16 @@ static intel_code_t get_brand_code(struct cpu_id_t* data) { CELERON, "Celeron" }, { MOBILE_PENTIUM_M, "Pentium(R) M" }, { CORE_SOLO, "Pentium(R) Dual CPU" }, + { CORE_SOLO, "Pentium(R) Dual-Core" }, { PENTIUM_D, "Pentium(R) D" }, { PENTIUM, "Pentium" }, { CORE_SOLO, "Genuine Intel(R) CPU" }, { CORE_SOLO, "Intel(R) Core(TM)" }, { ATOM_DIAMONDVILLE, "Atom(TM) CPU [N ][23]## " }, { ATOM_SILVERTHORNE, "Atom(TM) CPU Z" }, - { ATOM_PINEVIEW, "Atom(TM) CPU D" }, - { ATOM_CEDARVIEW, "Atom(TM) CPU N####" }, - { ATOM, "Atom(TM) CPU" }, + { ATOM_PINEVIEW, "Atom(TM) CPU [ND][45]## " }, + { ATOM_CEDARVIEW, "Atom(TM) CPU [ND]#### " }, + { ATOM_UNKNOWN, "Atom(TM) CPU" }, }; if (strstr(bs, "Mobile")) { @@ -637,6 +649,9 @@ static intel_code_t get_brand_code(struct cpu_id_t* data) /* if it has FMA, then it is at least Haswell */ if (data->flags[CPU_FEATURE_FMA3]) core_ix_base = CORE_HASWELL3; + /* if it has RTM, then it is at least a Broadwell-E or Skylake */ + if (data->flags[CPU_FEATURE_RDSEED]) + core_ix_base = CORE_BROADWELL3; switch (bs[i + 9]) { case '3': code = core_ix_base + 0; break; @@ -659,7 +674,8 @@ static intel_code_t get_brand_code(struct cpu_id_t* data) code = XEON_GAINESTOWN; else if (match_pattern(bs, "[ELXW]56##")) code = XEON_WESTMERE; - else if (data->l3_cache > 0) + else if (data->l3_cache > 0 && data->family == 16) + /* restrict by family, since later Xeons also have L3 ... */ code = XEON_IRWIN; } if (code == XEONMP && data->l3_cache > 0) @@ -770,8 +786,75 @@ static intel_model_t get_model_code(struct cpu_id_t* data) #undef HAVE } -int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data) +static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data) { + struct cpu_epc_t epc; + int i; + + if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf + if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS + + // decode sub-leaf 0: + if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1; + if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1; + if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2]) + data->sgx.present = 1; + data->sgx.misc_select = raw->basic_cpuid[0x12][1]; + data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff; + data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff; + + // decode sub-leaf 1: + data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32); + data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32); + + // decode higher-order subleafs, whenever present: + data->sgx.num_epc_sections = -1; + for (i = 0; i < 1000000; i++) { + epc = cpuid_get_epc(i, raw); + if (epc.length == 0) { + debugf(2, "SGX: epc section request for %d returned null, no more EPC sections.\n", i); + data->sgx.num_epc_sections = i; + break; + } + } + if (data->sgx.num_epc_sections == -1) { + debugf(1, "SGX: warning: seems to be infinitude of EPC sections.\n"); + data->sgx.num_epc_sections = 1000000; + } +} + +struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw) +{ + uint32_t regs[4]; + struct cpu_epc_t retval = {0, 0}; + if (raw && index < MAX_INTELFN12H_LEVEL - 2) { + // this was queried already, use the data: + memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs)); + } else { + // query this ourselves: + regs[0] = 0x12; + regs[2] = 2 + index; + regs[1] = regs[3] = 0; + cpu_exec_cpuid_ext(regs); + } + + // decode values: + if ((regs[0] & 0xf) == 0x1) { + retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32) + retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52) + retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32) + retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52) + } + return retval; +} + +int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal) +{ + intel_code_t brand_code; + intel_model_t model_code; + int i; + char* brand_code_str = NULL; + load_intel_features(raw, data); if (raw->basic_cpuid[0][0] >= 4) { /* Deterministic way is preferred, being more generic */ @@ -780,8 +863,31 @@ int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data) decode_intel_oldstyle_cache_info(raw, data); } decode_intel_number_of_cores(raw, data); - match_cpu_codename(cpudb_intel, COUNT_OF(cpudb_intel), data, - get_brand_code(data), get_model_code(data)); + + brand_code = get_brand_code(data); + model_code = get_model_code(data); + for (i = 0; i < COUNT_OF(intel_bcode_str); i++) { + if (brand_code == intel_bcode_str[i].code) { + brand_code_str = intel_bcode_str[i].str; + break; + } + } + if (brand_code_str) + debugf(2, "Detected Intel brand code: %d (%s)\n", brand_code, brand_code_str); + else + debugf(2, "Detected Intel brand code: %d\n", brand_code); + debugf(2, "Detected Intel model code: %d\n", model_code); + + internal->code.intel = brand_code; + + if (data->flags[CPU_FEATURE_SGX]) { + debugf(2, "SGX seems to be present, decoding...\n"); + // if SGX is indicated by the CPU, verify its presence: + decode_intel_sgx_features(raw, data); + } + + internal->score = match_cpu_codename(cpudb_intel, COUNT_OF(cpudb_intel), data, + brand_code, model_code); return 0; } diff --git a/contrib/libcpuid/include/libcpuid/recog_intel.h b/contrib/libcpuid/include/libcpuid/recog_intel.h index 67279373271..b99c783bf43 100644 --- a/contrib/libcpuid/include/libcpuid/recog_intel.h +++ b/contrib/libcpuid/include/libcpuid/recog_intel.h @@ -26,7 +26,7 @@ #ifndef __RECOG_INTEL_H__ #define __RECOG_INTEL_H__ -int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data); +int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal); void cpuid_get_list_intel(struct cpu_list_t* list); #endif /*__RECOG_INTEL_H__*/ From 6859ed8f36bed51f888db381217240345758c58a Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 22 Jun 2018 01:02:18 +0300 Subject: [PATCH 125/151] release: configurable pbuilder autoupdate time --- release | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/release b/release index 5623df2a712..c6c1133669b 100755 --- a/release +++ b/release @@ -19,6 +19,7 @@ source "./release_lib.sh" DEB_CC=${DEB_CC:=gcc-7} DEB_CXX=${DEB_CXX:=g++-7} +PBUILDER_AUTOUPDATE=${PBUILDER_AUTOUPDATE=4320} CONTROL=debian/control DEBUILD_NOSIGN_OPTIONS="-us -uc" @@ -118,10 +119,12 @@ else sudo --preserve-env bash -x pbuilder create --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT fi - # Update every 3 days (60*24*3 minutes) - if [[ -n "$PBUILDER_UPDATE" ]] || test `find "$BASETGZ" -mmin +4320` ; then - echo Updating base system $BASETGZ - sudo --preserve-env pbuilder update --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT + if [ "$PBUILDER_AUTOUPDATE" -gt 0 ]; then + # Update every 3 days (60*24*3 minutes) + if [[ -n "$PBUILDER_UPDATE" ]] || test `find "$BASETGZ" -mmin +$PBUILDER_AUTOUPDATE` ; then + echo Updating base system $BASETGZ + sudo --preserve-env pbuilder update --configfile $CURDIR/debian/.pbuilderrc $PBUILDER_OPT + fi fi pdebuild --configfile $CURDIR/debian/.pbuilderrc -- $PBUILDER_OPT From f4e72792a562356af88ff2daac6864c63b04c0f4 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 20 Jun 2018 16:09:09 +0800 Subject: [PATCH 126/151] ISSUES-2553 add offset parameter to numbers --- .../Storages/System/StorageSystemNumbers.cpp | 7 ++-- .../Storages/System/StorageSystemNumbers.h | 3 +- .../TableFunctions/TableFunctionNumbers.cpp | 35 ++++++++++--------- .../src/TableFunctions/TableFunctionNumbers.h | 3 ++ ...00647_select_numbers_with_offset.reference | 1 + .../00647_select_numbers_with_offset.sql | 2 ++ 6 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.reference create mode 100644 dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.sql diff --git a/dbms/src/Storages/System/StorageSystemNumbers.cpp b/dbms/src/Storages/System/StorageSystemNumbers.cpp index 49a3c7ca513..367a2639870 100644 --- a/dbms/src/Storages/System/StorageSystemNumbers.cpp +++ b/dbms/src/Storages/System/StorageSystemNumbers.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -44,8 +43,8 @@ private: }; -StorageSystemNumbers::StorageSystemNumbers(const std::string & name_, bool multithreaded_, size_t limit_) - : name(name_), multithreaded(multithreaded_), limit(limit_) +StorageSystemNumbers::StorageSystemNumbers(const std::string & name_, bool multithreaded_, size_t limit_, size_t offset_) + : name(name_), multithreaded(multithreaded_), limit(limit_), offset(offset_) { setColumns(ColumnsDescription({{"number", std::make_shared()}})); } @@ -74,7 +73,7 @@ BlockInputStreams StorageSystemNumbers::read( BlockInputStreams res(num_streams); for (size_t i = 0; i < num_streams; ++i) { - res[i] = std::make_shared(max_block_size, i * max_block_size, num_streams * max_block_size); + res[i] = std::make_shared(max_block_size, offset + i * max_block_size, num_streams * max_block_size); if (limit) /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. res[i] = std::make_shared(res[i], limit * (i + 1) / num_streams - limit * i / num_streams, 0); diff --git a/dbms/src/Storages/System/StorageSystemNumbers.h b/dbms/src/Storages/System/StorageSystemNumbers.h index e0769fb3968..30c68cbd853 100644 --- a/dbms/src/Storages/System/StorageSystemNumbers.h +++ b/dbms/src/Storages/System/StorageSystemNumbers.h @@ -37,10 +37,11 @@ private: const std::string name; bool multithreaded; size_t limit; + size_t offset; protected: /// limit: 0 means unlimited. - StorageSystemNumbers(const std::string & name_, bool multithreaded_, size_t limit_ = 0); + StorageSystemNumbers(const std::string & name_, bool multithreaded_, size_t limit_ = 0, size_t offset_ = 0); }; } diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.cpp b/dbms/src/TableFunctions/TableFunctionNumbers.cpp index 60136dccfab..1970a757b2d 100644 --- a/dbms/src/TableFunctions/TableFunctionNumbers.cpp +++ b/dbms/src/TableFunctions/TableFunctionNumbers.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -20,31 +19,33 @@ namespace ErrorCodes StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, const Context & context) const { - ASTs & args_func = typeid_cast(*ast_function).children; + if (const ASTFunction * function = typeid_cast(ast_function.get())) + { + auto arguments = function->arguments->children; - if (args_func.size() != 1) - throw Exception("Table function 'numbers' requires exactly one argument: amount of numbers.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arguments.size() != 1 && arguments.size() != 2) + throw Exception("Table function 'numbers' requires 'length' or 'offset, length'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - ASTs & args = typeid_cast(*args_func.at(0)).children; - if (args.size() != 1) - throw Exception("Table function 'numbers' requires exactly one argument: amount of numbers.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + UInt64 offset = arguments.size() == 2 ? evaluateArgument(context, arguments[0]) : 0; + UInt64 length = arguments.size() == 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); - args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context); - - UInt64 limit = static_cast(*args[0]).value.safeGet(); - - auto res = StorageSystemNumbers::create(getName(), false, limit); - res->startup(); - return res; + auto res = StorageSystemNumbers::create(getName(), false, length, offset); + res->startup(); + return res; + } + throw new Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - void registerTableFunctionNumbers(TableFunctionFactory & factory) { factory.registerFunction(); } + +UInt64 TableFunctionNumbers::evaluateArgument(const Context & context, ASTPtr & argument) const +{ + return static_cast(*evaluateConstantExpressionOrIdentifierAsLiteral(argument, context)).value.safeGet(); +} + } diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.h b/dbms/src/TableFunctions/TableFunctionNumbers.h index 6a3ab7f9090..ed060a6450a 100644 --- a/dbms/src/TableFunctions/TableFunctionNumbers.h +++ b/dbms/src/TableFunctions/TableFunctionNumbers.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -17,6 +18,8 @@ public: std::string getName() const override { return name; } private: StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; + + UInt64 evaluateArgument(const Context & context, ASTPtr & argument) const; }; diff --git a/dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.reference b/dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.sql b/dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.sql new file mode 100644 index 00000000000..3cea011a45d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00647_select_numbers_with_offset.sql @@ -0,0 +1,2 @@ +SET max_rows_to_read = 1; +SELECT * FROM numbers(1, 1); From 971fddb2996e26b8458f484f1a2ed27d7983992a Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 22 Jun 2018 13:43:35 +0300 Subject: [PATCH 127/151] add comments [#CLICKHOUSE-3747] --- dbms/src/Storages/MergeTree/ActiveDataPartSet.h | 2 ++ dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index d68e18a4f40..0ddd6beb31d 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -43,6 +43,8 @@ public: return *this; } + /// Returns true if the part was actually added. If out_replaced_parts != nullptr, it will contain + /// parts that were replaced from the set by the newly added part. bool add(const String & name, Strings * out_replaced_parts = nullptr); bool remove(const MergeTreePartInfo & part_info) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 628bd0a9f52..2e642ad148c 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -59,7 +59,8 @@ private: /// Protects the queue, future_parts and other queue state variables. mutable std::mutex state_mutex; - /// A set of parts that should be on this replica according to the queue entries executed up to this point. + /// A set of parts that should be on this replica according to the queue entries that have been done + /// up to this point. The invariant holds: `virtual_parts` = `current_parts` + `queue`. /// Note: it can be different from the actual set of parts because the replica can decide to fetch /// a bigger part instead of the part mentioned in the log entry. ActiveDataPartSet current_parts; From b247635acf9ff56f59467cba990fb63103a62cc3 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 22 Jun 2018 13:46:07 +0300 Subject: [PATCH 128/151] disable unreliable test [#CLICKHOUSE-3758] --- .../{00638_remote_ssrf.sh => 00638_remote_ssrf.sh.disabled} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00638_remote_ssrf.sh => 00638_remote_ssrf.sh.disabled} (100%) diff --git a/dbms/tests/queries/0_stateless/00638_remote_ssrf.sh b/dbms/tests/queries/0_stateless/00638_remote_ssrf.sh.disabled similarity index 100% rename from dbms/tests/queries/0_stateless/00638_remote_ssrf.sh rename to dbms/tests/queries/0_stateless/00638_remote_ssrf.sh.disabled From 7ce74d173055952f6d0ddae5443e20cd9795afc3 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 22 Jun 2018 18:29:51 +0300 Subject: [PATCH 129/151] ZooKeeper client: before the fix we waited 2x longer than necessary before the timeout --- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 251235654b1..6bae7fdeb5f 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -834,7 +834,7 @@ void ZooKeeper::receiveThread() if (earliest_operation) throw Exception("Operation timeout (no response) for path: " + earliest_operation->request->getPath(), ZOPERATIONTIMEOUT); waited += max_wait; - if (waited > session_timeout.totalMicroseconds()) + if (waited >= session_timeout.totalMicroseconds()) throw Exception("Nothing is received in session timeout", ZOPERATIONTIMEOUT); } From 7ce4ebf1e1a5fefd3161b6f615eec0730d75ec34 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 22 Jun 2018 18:53:59 +0300 Subject: [PATCH 130/151] fix test (disable random component in CleanupThread sleep period) --- .../test_extreme_deduplication/configs/conf.d/merge_tree.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml b/dbms/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml index 4e89a5e0ad0..f6941123312 100644 --- a/dbms/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml +++ b/dbms/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml @@ -3,6 +3,7 @@ 999999999 1 1 + 0 1 From a21a9577f42b694b83c7d4777f7aff70ee327195 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 22 Jun 2018 21:10:11 +0300 Subject: [PATCH 131/151] Auto version update to [54387] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 9fc319fc3fa..b89e32968bb 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,7 +1,7 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54386-testing) -set(VERSION_REVISION 54386) -set(VERSION_GITHASH 9b285c91b001c35ab0b3b5a7352b51e095013b7d) +set(VERSION_DESCRIBE v1.1.54387-testing) +set(VERSION_REVISION 54387) +set(VERSION_GITHASH 7ce4ebf1e1a5fefd3161b6f615eec0730d75ec34) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index a63626e5e4b..867c80c4dbb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54386) unstable; urgency=low +clickhouse (1.1.54387) unstable; urgency=low * Modified source code - -- Fri, 01 Jun 2018 23:16:46 +0300 + -- Fri, 22 Jun 2018 21:10:11 +0300 From 91c9906c889cd338bb4df2ccc570c8f189ae3c00 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 25 Jun 2018 13:48:11 +0300 Subject: [PATCH 132/151] Fixed reading from ReplacingMergeTree from emprty row set after prewhere. #2525 --- dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp index 8fcfdfe2d58..db8efcd162f 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp @@ -109,7 +109,8 @@ void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std } /// We will write the data for the last primary key. - insertRow(merged_columns, merged_rows); + if (!selected_row.empty()) + insertRow(merged_columns, merged_rows); finished = true; } From 7c95d153745f988a39143fbacc9e90299f86aa77 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 25 Jun 2018 13:57:28 +0300 Subject: [PATCH 133/151] Added test. #2525 --- .../00648_replacing_emtpy_set_from_prewhere.reference | 0 .../0_stateless/00648_replacing_emtpy_set_from_prewhere.sql | 5 +++++ 2 files changed, 5 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.reference create mode 100644 dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.sql diff --git a/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.reference b/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.sql b/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.sql new file mode 100644 index 00000000000..aba76207d1d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS test.final_test; +CREATE TABLE test.final_test (id String, version Date) ENGINE = ReplacingMergeTree(version, id, 8192); +INSERT INTO test.final_test (id, version) VALUES ('2018-01-01', '2018-01-01'); +SELECT * FROM test.final_test FINAL PREWHERE id == '2018-01-02'; +DROP TABLE test.final_test; From ea59cb0b057fdeaa8178ce26b21417e93c6f5f48 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Mon, 25 Jun 2018 16:24:42 +0300 Subject: [PATCH 134/151] Update CHANGELOG.draft.md --- CHANGELOG.draft.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md index ccef5afadf6..f59be82471f 100644 --- a/CHANGELOG.draft.md +++ b/CHANGELOG.draft.md @@ -1,11 +1,17 @@ -en: +# en: ## Improvements: * `clickhouse-client`: option --ask-password for interactively ask for credentials #1044 +## Bug fixes: +* Fixed a crash that occurred while reading from ReplacingMergeTree with prewhere condition which filters all rows. #2525 -ru: + +# ru: ## Улучшения: * `clickhouse-client`: опция --ask-password для интерактивного ввода пароля #1044 + +## Исправление ошибок: +* Исправлено падение при чтении из ReplacingMergeTree с условием в prewhere, фильтрующим все строки. #2525 From 6e273978c573fabf3e7511a5aba47655d55614c0 Mon Sep 17 00:00:00 2001 From: Max Vetrov Date: Sun, 24 Jun 2018 23:48:16 +0200 Subject: [PATCH 135/151] Update mutator.cpp Don't check variable res twice. --- utils/compressor/mutator.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/utils/compressor/mutator.cpp b/utils/compressor/mutator.cpp index 94c99807b9b..89a5d1d45c8 100644 --- a/utils/compressor/mutator.cpp +++ b/utils/compressor/mutator.cpp @@ -106,9 +106,11 @@ static void mutate(pcg64 & generator, void * src, size_t length) && isAlphaASCII(pos[2])) { auto res = rand(generator, 0, 3); - if (res == 2) + if (res == 2) + { std::swap(pos[0], pos[1]); - if (res == 3) + } + else if (res == 3) std::swap(pos[1], pos[2]); pos += 3; @@ -124,7 +126,7 @@ static void mutate(pcg64 & generator, void * src, size_t length) std::swap(pos[1], pos[2]); std::swap(pos[0], pos[1]); } - if (res == 3) + else if (res == 3) { std::swap(pos[3], pos[2]); std::swap(pos[4], pos[3]); From 49d524244f9d56a438b8a9806476052a889acdf0 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 23 Jun 2018 00:31:53 +0800 Subject: [PATCH 136/151] ISSUES-2533 add numbers table function document --- docs/en/table_functions/numbers.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/en/table_functions/numbers.md b/docs/en/table_functions/numbers.md index 9b98d8747b6..4486fece3d1 100644 --- a/docs/en/table_functions/numbers.md +++ b/docs/en/table_functions/numbers.md @@ -1,13 +1,15 @@ # numbers `numbers(N)` – Returns a table with the single 'number' column (UInt64) that contains integers from 0 to N-1. +`numbers(N, M)` - Returns a table with the single 'number' column (UInt64) that contains integers from N to (N + M - 1). -Similar to the `system.numbers` table, it can be used for testing and generating successive values. +Similar to the `system.numbers` table, it can be used for testing and generating successive values, `numbers(N, M)` more efficient than `system.numbers`. -The following two queries are equivalent: +The following queries are equivalent: ```sql SELECT * FROM numbers(10); +SELECT * FROM numbers(0, 10); SELECT * FROM system.numbers LIMIT 10; ``` From 0eed697623afd251c1908073c4a2850a9bfc9cdc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Jun 2018 14:28:24 +0300 Subject: [PATCH 137/151] Fixed typo. --- ...eference => 00648_replacing_empty_set_from_prewhere.reference} | 0 ...m_prewhere.sql => 00648_replacing_empty_set_from_prewhere.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00648_replacing_emtpy_set_from_prewhere.reference => 00648_replacing_empty_set_from_prewhere.reference} (100%) rename dbms/tests/queries/0_stateless/{00648_replacing_emtpy_set_from_prewhere.sql => 00648_replacing_empty_set_from_prewhere.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.reference b/dbms/tests/queries/0_stateless/00648_replacing_empty_set_from_prewhere.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.reference rename to dbms/tests/queries/0_stateless/00648_replacing_empty_set_from_prewhere.reference diff --git a/dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.sql b/dbms/tests/queries/0_stateless/00648_replacing_empty_set_from_prewhere.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00648_replacing_emtpy_set_from_prewhere.sql rename to dbms/tests/queries/0_stateless/00648_replacing_empty_set_from_prewhere.sql From 2ed5a7c2dd693a4f3d7c4b378cd8283ee57516f9 Mon Sep 17 00:00:00 2001 From: "Dmitry S..ky / skype: dvska-at-skype" Date: Tue, 26 Jun 2018 12:04:38 +0300 Subject: [PATCH 138/151] typo fixed --- docs/ru/table_functions/numbers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/table_functions/numbers.md b/docs/ru/table_functions/numbers.md index bd5d566f78e..62da54b8d2b 100644 --- a/docs/ru/table_functions/numbers.md +++ b/docs/ru/table_functions/numbers.md @@ -11,6 +11,6 @@ SELECT * FROM system.numbers LIMIT 10; ``` Примеры: ```sql --- генарация последовательности всех дат от 2010-01-01 до 2010-12-31 +-- генерация последовательности всех дат от 2010-01-01 до 2010-12-31 select toDate('2010-01-01') + number as d FROM numbers(365); ``` From 225539bac21529316581efdb9e66656c23d40cef Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 26 Jun 2018 08:26:45 +0300 Subject: [PATCH 139/151] Temporary fix for #2558 --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 56b4c993981..adaea133aee 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ ## Technical info Developer guide for writing code for ClickHouse is published on official website alongside the usage and operations documentation: -https://clickhouse.yandex/docs/en/development/index.html +https://clickhouse.yandex/docs/en/development/architecture/ ## Legal info From 2b3282f18616de9ef3fe934681edcdb6e8fce24e Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 26 Jun 2018 16:50:36 +0300 Subject: [PATCH 140/151] update changelog up to v1.1.54387 tag --- CHANGELOG_RU.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 09087467692..24c16c54535 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,12 +1,10 @@ -# ClickHouse release 1.1.5438x, 2018-06-xx +# ClickHouse release 1.1.54387, 2018-06-26 ## Новые возможности: -* Добавлена возможность вычислять аргументы функции `and` только там, где они нужны ([Анастасия Царькова](https://github.com/yandex/ClickHouse/pull/2272)) -* Добавлена возможность JIT-компиляции в нативный код некоторых выражений ([pyos](https://github.com/yandex/ClickHouse/pull/2277)). * Добавлена агрегатная функция `windowFunnel` ([sundy-li](https://github.com/yandex/ClickHouse/pull/2352)). * Добавлена возможность записи в таблицу с движком MySQL и соответствующую табличную функцию ([sundy-li](https://github.com/yandex/ClickHouse/pull/2294)). * Добавлена поддержка запроса `ALTER TABLE t DELETE WHERE` для реплицированных таблиц и таблица `system.mutations`. -* Добавлена поддержка запроса `ALTER TABLE t [REPLACE|ATTACH] PARTITION` для реплицированных таблиц. +* Добавлена поддержка запроса `ALTER TABLE t [REPLACE|ATTACH] PARTITION` для *MergeTree-таблиц. * Добавлена возможность интерактивного ввода пароля в `clickhouse-client`. * Добавлена возможность отправки логов сервера в syslog ([Александр Крашенинников](https://github.com/yandex/ClickHouse/pull/2459)). * Добавлено несколько новых `SYSTEM`-запросов для реплицированных таблиц (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|REPLICATED SENDS|REPLICATION QUEUES]`). @@ -15,6 +13,13 @@ * Добавлена поддержка логирования в словарях с источником shared library ([Александр Сапин](https://github.com/yandex/ClickHouse/pull/2472)). * Добавлена поддержка произвольного разделителя в формате CSV ([Иван Жуков](https://github.com/yandex/ClickHouse/pull/2263)) * Добавлена настройка `date_time_input_format`. Если переключить эту настройку в значение `'best_effort'`, значения DateTime будут читаться в широком диапазоне форматов. +* Добавлена утилита `clickhouse-obfuscator` для обфускации данных. Пример использования: публикация данных, используемых в тестах производительности. +* Добавлена табличная функция `url()` и движок таблиц `URL` ([Александр Сапин](https://github.com/yandex/ClickHouse/pull/2501)). +* В табличной функции `numbers()` добавлена возможность указывать offset ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2535)). + +## Экспериментальные возможности: +* Добавлена возможность вычислять аргументы функции `and` только там, где они нужны ([Анастасия Царькова](https://github.com/yandex/ClickHouse/pull/2272)) +* Добавлена возможность JIT-компиляции в нативный код некоторых выражений ([pyos](https://github.com/yandex/ClickHouse/pull/2277)). ## Исправление ошибок: * Исправлена ошибка при чтении столбца-массива из Nested-структуры ([#2066](https://github.com/yandex/ClickHouse/issues/2066)). @@ -33,6 +38,8 @@ * Клиентская библиотека ZooKeeper теперь использует таймаут сессии, полученный от сервера. * Исправлен синтаксический разбор и форматирование оператора `CAST`. * Исправлен race condition при записи данных из движка `Kafka` в материализованные представления ([Yangkuan Liu](https://github.com/yandex/ClickHouse/pull/2448)). +* Исправлен выход из `clickhouse-client` в multiline-режиме ([#2510](https://github.com/yandex/ClickHouse/issues/2510)). +* Исправлена ошибка в клиентской библиотеке ZooKeeper, из-за которой ожидание ответа от сервера могло длиться дольше таймаута. ## Улучшения: * Сервер с реплицированными таблицами теперь может стартовать, даже если не сконфигурирован ZooKeeper. @@ -43,6 +50,8 @@ * Разрешены выражения вида `tuple IN (SELECT tuple)`, если типы кортежей совпадают. * Ускорен анализ запроса с большим числом JOIN-ов и подзапросов. * Исправлено несоответствие в значениях счётчиков событий `Query`, `SelectQuery`, `InsertQuery`. +* Добавлен chown директорий конфигов в конфигурационном файле systemd ([Михаил Ширяев](https://github.com/yandex/ClickHouse/pull/2421)). +* Улучшена производительность разжатия LZ4. ## Изменения сборки: * Используемая версия библиотеки librdkafka обновлена до v0.11.4. @@ -50,9 +59,12 @@ * Добавлена возможность сборки компилятором gcc8. * Добавлена возможность сборки llvm из submodule. * Cmake теперь по умолчанию генерирует файлы для ninja (как при использовании `-G Ninja`). +* Добавлена возможность использования библиотеки libtinfo вместо libtermcap ([Георгий Кондратьев](https://github.com/yandex/ClickHouse/pull/2519)). +* Исправлен конфликт заголовочных файлов в Fedora Rawhide ([#2520](https://github.com/yandex/ClickHouse/issues/2520)). +* Добавлена возможность использования библиотеки libcpuid из системы, используемая версия библиотеки обновлена до 0.4.0. ## Обратно несовместимые изменения: -* Убран escaping в форматах `Vertical` и `Pretty*`. +* Убран escaping в форматах `Vertical` и `Pretty*`, удалён формат `VerticalRaw`. # ClickHouse release 1.1.54385, 2018-06-01 From 497c635260ff973f350c3712e0fc48870bef83d7 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 26 Jun 2018 20:21:05 +0300 Subject: [PATCH 141/151] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 58 ++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 24c16c54535..e68c48b071d 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,67 +1,67 @@ # ClickHouse release 1.1.54387, 2018-06-26 ## Новые возможности: -* Добавлена агрегатная функция `windowFunnel` ([sundy-li](https://github.com/yandex/ClickHouse/pull/2352)). -* Добавлена возможность записи в таблицу с движком MySQL и соответствующую табличную функцию ([sundy-li](https://github.com/yandex/ClickHouse/pull/2294)). * Добавлена поддержка запроса `ALTER TABLE t DELETE WHERE` для реплицированных таблиц и таблица `system.mutations`. * Добавлена поддержка запроса `ALTER TABLE t [REPLACE|ATTACH] PARTITION` для *MergeTree-таблиц. +* Добавлена поддержка запроса `TRUNCATE TABLE` ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2260)) +* Добавлено несколько новых `SYSTEM`-запросов для реплицированных таблиц (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|REPLICATED SENDS|REPLICATION QUEUES]`). +* Добавлена возможность записи в таблицу с движком MySQL и соответствующую табличную функцию ([sundy-li](https://github.com/yandex/ClickHouse/pull/2294)). +* Добавлена табличная функция `url()` и движок таблиц `URL` ([Александр Сапин](https://github.com/yandex/ClickHouse/pull/2501)). +* Добавлена агрегатная функция `windowFunnel` ([sundy-li](https://github.com/yandex/ClickHouse/pull/2352)). +* Добавлены функции `startsWith` и `endsWith` для строк ([Вадим Плахтинский](https://github.com/yandex/ClickHouse/pull/2429)). +* В табличной функции `numbers()` добавлена возможность указывать offset ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2535)). * Добавлена возможность интерактивного ввода пароля в `clickhouse-client`. * Добавлена возможность отправки логов сервера в syslog ([Александр Крашенинников](https://github.com/yandex/ClickHouse/pull/2459)). -* Добавлено несколько новых `SYSTEM`-запросов для реплицированных таблиц (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|REPLICATED SENDS|REPLICATION QUEUES]`). -* Добавлены функции `startsWith` и `endsWith` для строк ([Вадим Плахтинский](https://github.com/yandex/ClickHouse/pull/2429)). -* Добавлена поддержка запроса `TRUNCATE TABLE` ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2260)) * Добавлена поддержка логирования в словарях с источником shared library ([Александр Сапин](https://github.com/yandex/ClickHouse/pull/2472)). * Добавлена поддержка произвольного разделителя в формате CSV ([Иван Жуков](https://github.com/yandex/ClickHouse/pull/2263)) * Добавлена настройка `date_time_input_format`. Если переключить эту настройку в значение `'best_effort'`, значения DateTime будут читаться в широком диапазоне форматов. * Добавлена утилита `clickhouse-obfuscator` для обфускации данных. Пример использования: публикация данных, используемых в тестах производительности. -* Добавлена табличная функция `url()` и движок таблиц `URL` ([Александр Сапин](https://github.com/yandex/ClickHouse/pull/2501)). -* В табличной функции `numbers()` добавлена возможность указывать offset ([Winter Zhang](https://github.com/yandex/ClickHouse/pull/2535)). ## Экспериментальные возможности: * Добавлена возможность вычислять аргументы функции `and` только там, где они нужны ([Анастасия Царькова](https://github.com/yandex/ClickHouse/pull/2272)) * Добавлена возможность JIT-компиляции в нативный код некоторых выражений ([pyos](https://github.com/yandex/ClickHouse/pull/2277)). ## Исправление ошибок: +* Исправлено появление дублей в запросе с `DISTINCT` и `ORDER BY`. +* Запросы с `ARRAY JOIN` и `arrayFilter` раньше возвращали некорректный результат. * Исправлена ошибка при чтении столбца-массива из Nested-структуры ([#2066](https://github.com/yandex/ClickHouse/issues/2066)). -* Исправлено соответствие типов в табличной функции ODBC ([sundy-li](https://github.com/yandex/ClickHouse/pull/2268)). -* Исправлено применение настроек из параметров командной строки в программе clickhouse-local. * Исправлена ошибка при анализе запросов с секцией HAVING вида `HAVING tuple IN (...)`. * Исправлена ошибка при анализе запросов с рекурсивными алиасами. -* Запросы с `ARRAY JOIN` и `arrayFilter` возвращали некорректный результат. -* Исправлено некорректное сравнение типов `DateTime` с таймзоной и без неё ([Александр Бочаров](https://github.com/yandex/ClickHouse/pull/2400)). -* После `CLEAR COLUMN IN PARTITION` в соответствующей партиции теперь возможны слияния ([#2315](https://github.com/yandex/ClickHouse/issues/2315)). -* Исправлено появление дублей в запросе с `DISTINCT` и `ORDER BY`. -* Исправлена вставка в материализованное представление в случае, если движок таблицы представления - Distributed ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2411)). -* Исправлено отсечение ненужных кусков при запросе с условием на столбцы ключа партиционирования ([#2342](https://github.com/yandex/ClickHouse/issues/2342)). * Настройки профиля пользователя не применялись при использовании сессий в HTTP-интерфейсе. -* Исправлена SSRF в табличной функции remote(). +* Исправлено применение настроек из параметров командной строки в программе clickhouse-local. * Клиентская библиотека ZooKeeper теперь использует таймаут сессии, полученный от сервера. -* Исправлен синтаксический разбор и форматирование оператора `CAST`. -* Исправлен race condition при записи данных из движка `Kafka` в материализованные представления ([Yangkuan Liu](https://github.com/yandex/ClickHouse/pull/2448)). -* Исправлен выход из `clickhouse-client` в multiline-режиме ([#2510](https://github.com/yandex/ClickHouse/issues/2510)). * Исправлена ошибка в клиентской библиотеке ZooKeeper, из-за которой ожидание ответа от сервера могло длиться дольше таймаута. +* Исправлено отсечение ненужных кусков при запросе с условием на столбцы ключа партиционирования ([#2342](https://github.com/yandex/ClickHouse/issues/2342)). +* После `CLEAR COLUMN IN PARTITION` в соответствующей партиции теперь возможны слияния ([#2315](https://github.com/yandex/ClickHouse/issues/2315)). +* Исправлено соответствие типов в табличной функции ODBC ([sundy-li](https://github.com/yandex/ClickHouse/pull/2268)). +* Исправлено некорректное сравнение типов `DateTime` с таймзоной и без неё ([Александр Бочаров](https://github.com/yandex/ClickHouse/pull/2400)). +* Исправлен синтаксический разбор и форматирование оператора `CAST`. +* Исправлена вставка в материализованное представление в случае, если движок таблицы представления - Distributed ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2411)). +* Исправлен race condition при записи данных из движка `Kafka` в материализованные представления ([Yangkuan Liu](https://github.com/yandex/ClickHouse/pull/2448)). +* Исправлена SSRF в табличной функции remote(). +* Исправлен выход из `clickhouse-client` в multiline-режиме ([#2510](https://github.com/yandex/ClickHouse/issues/2510)). ## Улучшения: +* Фоновые задачи в реплицированных таблицах теперь выполняются не в отдельных потоках, а в пуле потоков ([Silviu Caragea](https://github.com/yandex/ClickHouse/pull/1722)) +* Улучшена производительность разжатия LZ4. +* Ускорен анализ запроса с большим числом JOIN-ов и подзапросов. +* DNS-кэш теперь автоматически обновляется при большом числе сетевых ошибок. +* Вставка в таблицу теперь не происходит, если вставка в одно из её материализованных представлений невозможна из-за того, что в нём много кусков. +* Исправлено несоответствие в значениях счётчиков событий `Query`, `SelectQuery`, `InsertQuery`. +* Разрешены выражения вида `tuple IN (SELECT tuple)`, если типы кортежей совпадают. * Сервер с реплицированными таблицами теперь может стартовать, даже если не сконфигурирован ZooKeeper. * При расчёте количества доступных ядер CPU теперь учитываются ограничения cgroups ([Atri Sharma](https://github.com/yandex/ClickHouse/pull/2325)). -* DNS-кэш теперь автоматически обновляется при большом числе сетевых ошибок. -* Фоновые задачи в реплицированных таблицах теперь выполняются не в отдельных потоках, а в пуле потоков ([Silviu Caragea](https://github.com/yandex/ClickHouse/pull/1722)) -* Вставка в таблицу теперь не происходит, если вставка в одно из её материализованных представлений невозможна из-за того, что в нём много кусков. -* Разрешены выражения вида `tuple IN (SELECT tuple)`, если типы кортежей совпадают. -* Ускорен анализ запроса с большим числом JOIN-ов и подзапросов. -* Исправлено несоответствие в значениях счётчиков событий `Query`, `SelectQuery`, `InsertQuery`. * Добавлен chown директорий конфигов в конфигурационном файле systemd ([Михаил Ширяев](https://github.com/yandex/ClickHouse/pull/2421)). -* Улучшена производительность разжатия LZ4. ## Изменения сборки: -* Используемая версия библиотеки librdkafka обновлена до v0.11.4. -* Исправлена сборка с использованием библиотеки vectorclass ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2274)). * Добавлена возможность сборки компилятором gcc8. * Добавлена возможность сборки llvm из submodule. +* Используемая версия библиотеки librdkafka обновлена до v0.11.4. +* Добавлена возможность использования библиотеки libcpuid из системы, используемая версия библиотеки обновлена до 0.4.0. +* Исправлена сборка с использованием библиотеки vectorclass ([Babacar Diassé](https://github.com/yandex/ClickHouse/pull/2274)). * Cmake теперь по умолчанию генерирует файлы для ninja (как при использовании `-G Ninja`). * Добавлена возможность использования библиотеки libtinfo вместо libtermcap ([Георгий Кондратьев](https://github.com/yandex/ClickHouse/pull/2519)). * Исправлен конфликт заголовочных файлов в Fedora Rawhide ([#2520](https://github.com/yandex/ClickHouse/issues/2520)). -* Добавлена возможность использования библиотеки libcpuid из системы, используемая версия библиотеки обновлена до 0.4.0. ## Обратно несовместимые изменения: * Убран escaping в форматах `Vertical` и `Pretty*`, удалён формат `VerticalRaw`. From 2447755700f40af317cb80ba8800b94d6350d148 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 27 Jun 2018 13:30:13 +0300 Subject: [PATCH 142/151] BackgroundSchedulePool: remove task from delayed even it it is currently executing If the task was left in the delayed_tasks set, it could lead to a nasty bug: delayExecutionThreadFunction() would schedule the task, then it would immediately go on a next iteration and schedule the same task again. Thus this task would be continually executing, and other delayed tasks would have no chance to run. --- dbms/src/Common/BackgroundSchedulePool.cpp | 12 ++++++------ dbms/src/Common/BackgroundSchedulePool.h | 3 +++ .../MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 1 - 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index 80caf14e748..21e3211ea1f 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -42,13 +42,14 @@ bool BackgroundSchedulePool::TaskInfo::schedule() scheduled = true; - if (!executing) - { - if (delayed) - pool.cancelDelayedTask(shared_from_this(), lock); + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock); + /// If the task is not executing at the moment, enqueue it for immediate execution. + /// But if it is currently executing, do nothing because it will be enqueued + /// at the end of the execute() method. + if (!executing) pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); - } return true; } @@ -123,7 +124,6 @@ void BackgroundSchedulePool::TaskInfo::execute() if (scheduled) pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); } - } zkutil::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback() diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h index 2fda381b111..0ebcc207b2c 100644 --- a/dbms/src/Common/BackgroundSchedulePool.h +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -70,6 +70,9 @@ public: std::mutex exec_mutex; std::mutex schedule_mutex; + /// Invariants: + /// * If deactivated is true then scheduled, delayed and executing are all false. + /// * scheduled and delayed cannot be true at the same time. bool deactivated = false; bool scheduled = false; bool delayed = false; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 05d5c1b9477..df8de692488 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -46,7 +46,6 @@ void ReplicatedMergeTreeCleanupThread::run() } task->scheduleAfter(CLEANUP_SLEEP_MS); - } From e03d523f191e97907e2252199c71e89291a79e7f Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Wed, 27 Jun 2018 16:10:59 +0300 Subject: [PATCH 143/151] Auto version update to [54388] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index b89e32968bb..29abf098b26 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,7 +1,7 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54387-testing) -set(VERSION_REVISION 54387) -set(VERSION_GITHASH 7ce4ebf1e1a5fefd3161b6f615eec0730d75ec34) +set(VERSION_DESCRIBE v1.1.54388-testing) +set(VERSION_REVISION 54388) +set(VERSION_GITHASH 2447755700f40af317cb80ba8800b94d6350d148) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index 867c80c4dbb..dc05db4aa45 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54387) unstable; urgency=low +clickhouse (1.1.54388) unstable; urgency=low * Modified source code - -- Fri, 22 Jun 2018 21:10:11 +0300 + -- Wed, 27 Jun 2018 16:10:59 +0300 From e0afb1fab147ee2b091591522fb3b00b881d6157 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 27 Jun 2018 18:55:35 +0300 Subject: [PATCH 144/151] update changelog for v1.1.54388 --- CHANGELOG.draft.md | 7 ------- CHANGELOG_RU.md | 3 ++- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md index 614915a5523..811a7c2bb4e 100644 --- a/CHANGELOG.draft.md +++ b/CHANGELOG.draft.md @@ -1,11 +1,4 @@ # en: -## Bug fixes: -* Fixed a crash that occurred while reading from ReplacingMergeTree with prewhere condition which filters all rows. #2525 - - # ru: - -## Исправление ошибок: -* Исправлено падение при чтении из ReplacingMergeTree с условием в prewhere, фильтрующим все строки. #2525 diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index e68c48b071d..2fe43529a5a 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,4 +1,4 @@ -# ClickHouse release 1.1.54387, 2018-06-26 +# ClickHouse release 1.1.54388, 2018-06-28 ## Новые возможности: * Добавлена поддержка запроса `ALTER TABLE t DELETE WHERE` для реплицированных таблиц и таблица `system.mutations`. @@ -27,6 +27,7 @@ * Исправлена ошибка при чтении столбца-массива из Nested-структуры ([#2066](https://github.com/yandex/ClickHouse/issues/2066)). * Исправлена ошибка при анализе запросов с секцией HAVING вида `HAVING tuple IN (...)`. * Исправлена ошибка при анализе запросов с рекурсивными алиасами. +* Исправлена ошибка при чтении из ReplacingMergeTree с условием в PREWHERE, фильтрующим все строки ([#2525](https://github.com/yandex/ClickHouse/issues/2525)). * Настройки профиля пользователя не применялись при использовании сессий в HTTP-интерфейсе. * Исправлено применение настроек из параметров командной строки в программе clickhouse-local. * Клиентская библиотека ZooKeeper теперь использует таймаут сессии, полученный от сервера. From c7a305eca379d719d38404b41374ce9238538df9 Mon Sep 17 00:00:00 2001 From: Mikhail Surin Date: Mon, 25 Jun 2018 10:29:53 +0300 Subject: [PATCH 145/151] fix float transform --- dbms/src/Common/RadixSort.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/RadixSort.h b/dbms/src/Common/RadixSort.h index dd75477cf30..0a5861b30eb 100644 --- a/dbms/src/Common/RadixSort.h +++ b/dbms/src/Common/RadixSort.h @@ -54,7 +54,7 @@ struct RadixSortFloatTransform static KeyBits forward(KeyBits x) { - return x ^ (-(x >> (sizeof(KeyBits) * 8 - 1) | (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)))); + return x ^ ((-(x >> (sizeof(KeyBits) * 8 - 1))) | (KeyBits(1) << (sizeof(KeyBits) * 8 - 1))); } static KeyBits backward(KeyBits x) From b8c4a0ba8e6f82d7243d023e3dbc2d7a24c4e2cf Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 28 Jun 2018 16:52:00 +0300 Subject: [PATCH 146/151] add docs draft for mutations [#CLICKHOUSE-3687] --- docs/ru/query_language/queries.md | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index 58bc73bc44a..661c5864311 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -436,6 +436,48 @@ ALTER TABLE [db.]table FETCH PARTITION 'name' FROM 'path-in-zookeeper' +### Мутации + +Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов `DELETE` и `UPDATE`, рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. + +Функциональность находится в состоянии beta и доступна начиная с версии 1.1.54388. Реализована поддержка Replicated*MergeTree таблиц (в скором времени будет добавлена поддержка и для нереплицированных MergeTree). + +Конвертировать существующие таблицы для работы с мутациями не нужно. Но после применения первой мутации формат данных таблицы становится несовместимым с предыдущими версиями и откатиться на предыдущую версию уже не получится. + +На данный момент доступна команда `ALTER DELETE`: + +```sql +ALTER TABLE [db.]table DELETE WHERE expr +``` + +Выражение `expr` должно иметь тип UInt8. Запрос удаляет строки таблицы, для которых это выражение принимает ненулевое значение. + +В одном запросе можно указать несколько команд через запятую. + +Для *MergeTree-таблиц мутации выполняются, перезаписывая данные по кускам (parts). При этом атомарности нет - куски заменяются на помутированные по мере выполнения и запрос `SELECT`, заданный во время выполнения мутации, увидит данные как из измененных кусков, так и из кусков, которые еще не были изменены. + +Мутации линейно упорядочены между собой и накладываются на каждый кусок в порядке добавления. Мутации также упорядочены со вставками - гарантируется, что данные, вставленные в таблицу до начала выполнения запроса мутации, будут изменены, а данные, вставленные после окончания запроса мутации, изменены не будут. При этом мутации никак не блокируют вставки. + +Для реплицированных таблиц запрос завершается немедленно после добавления информации о мутации в ZooKeeper. Сама мутация выполняется асинхронно, следить за ходом её выполнения можно по таблице `system.mutations`. Добавленные мутации будут выполняться до конца даже в случае перезапуска серверов ClickHouse. Откатить мутацию после её добавления нельзя. + +#### Сиситемная таблица system.mutations + +Таблица содержит информацию о ходе выполнения мутаций MergeTree-таблиц. Каждой команде мутации соответствует одна строка. В таблице есть следующие столбцы: + +**database**, **table** - имя БД и таблицы, к которой была применена мутация. + +**mutation_id** - ID запроса. Для реплицированных таблиц эти ID соответствуют именам записей в директории `/mutations/` в ZooKeeper. + +**command** - Команда мутации (часть запроса после `ALTER TABLE [db.]table`). + +**create_time** - Время создания мутации. + +**block_numbers.partition_id**, **block_numbers.number** - Nested-столбец, для каждой партиции содержащий номер блока, полученный этой мутацией (в каждой партиции будут изменены только куски, содержащие блоки с номерами, меньшими номера, полученного мутацией в этой партиции). + +**parts_to_do** - Количество кусков таблицы, которые ещё предстоит изменить. + +**is_done** - Завершена ли мутация. Замечание: даже если `parts_to_do = 0`, возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся вставки, которая добавляет данные, которые нужно будет мутировать. + ## SHOW DATABASES ```sql From ebde528506e52a7e22c05b89e2b245e6e7681c1f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Jun 2018 17:07:53 +0300 Subject: [PATCH 147/151] Create default macros in global context. [#CLICKHOUSE-3793] --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 49a984e948c..2ae105ff7f4 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -191,7 +191,7 @@ struct ContextShared Context::ConfigReloadCallback config_reload_callback; ContextShared(std::shared_ptr runtime_components_factory_) - : runtime_components_factory(std::move(runtime_components_factory_)) + : runtime_components_factory(std::move(runtime_components_factory_)), macros(std::make_unique()) { /// TODO: make it singleton (?) static std::atomic num_calls{0}; From d2e399f6884851b619cf757306358fd2f8b41060 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 28 Jun 2018 18:13:29 +0300 Subject: [PATCH 148/151] Update index.md --- docs/en/operations/settings/index.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 0c5ca5d5171..2321519f9d9 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -7,9 +7,9 @@ Settings are configured in layers, so each subsequent layer redefines the previo Ways to configure settings, in order of priority: -- Settings in the server config file. +- Settings in the server config file `users.xml`. - Settings from user profiles. +- Settings from user profiles. - Session settings. @@ -21,4 +21,3 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). Settings that can only be made in the server config file are not covered in this section. - From b06801a3d37354d95cfe97cd7a3cef9428e812ac Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 28 Jun 2018 18:15:37 +0300 Subject: [PATCH 149/151] Update index.md --- docs/en/operations/settings/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 2321519f9d9..0a72ebac128 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -9,7 +9,7 @@ Ways to configure settings, in order of priority: - Settings in the server config file `users.xml`. -- Settings from user profiles. + Set it in user profile in `` element. - Session settings. From 493b67d051d13ff0ea0d0b7cb05b3ae56200f890 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Jun 2018 18:59:39 +0300 Subject: [PATCH 150/151] Removed useless file [#CLICKHOUSE-2] --- dbms/tests/queries/0_stateless/99999_prepare.reference | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/99999_prepare.reference diff --git a/dbms/tests/queries/0_stateless/99999_prepare.reference b/dbms/tests/queries/0_stateless/99999_prepare.reference deleted file mode 100644 index e69de29bb2d..00000000000 From 6f21706f65ae8397086455622e3387c717376ad5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Jun 2018 19:05:41 +0300 Subject: [PATCH 151/151] Added test #2553 --- .../0_stateless/00649_quantile_tdigest_negative.reference | 1 + .../queries/0_stateless/00649_quantile_tdigest_negative.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.reference create mode 100644 dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.sql diff --git a/dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.reference b/dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.reference new file mode 100644 index 00000000000..3fbedf693b5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.reference @@ -0,0 +1 @@ +-2 diff --git a/dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.sql b/dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.sql new file mode 100644 index 00000000000..7e5e78a2b5d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00649_quantile_tdigest_negative.sql @@ -0,0 +1 @@ +SELECT quantileTDigest(0.5)(arrayJoin([-1, -2, -3]));