Remove header files

2024-11-17 13:13:36 +00:00 · 2021-05-23 04:12:30 +03:00 · 2021-05-23 04:12:30 +03:00 · 3057bbe831
commit 3057bbe831
parent deb68b15da
18 changed files with 465 additions and 528 deletions
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@ -1,4 +1,4 @@
-#include <Compression/CompressionCodecDelta.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
 #include <common/unaligned.h>
@ -11,6 +11,29 @@
 namespace DB
 {

+class CompressionCodecDelta : public ICompressionCodec
+{
+public:
+    CompressionCodecDelta(UInt8 delta_bytes_size_);
+
+    uint8_t getMethodByte() const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; }
+
+    bool isCompression() const override { return false; }
+    bool isGenericCompression() const override { return false; }
+
+private:
+    UInt8 delta_bytes_size;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecDelta.h
+++ b/src/Compression/CompressionCodecDelta.h
@ -1,32 +0,0 @@
-#pragma once
-
-#include <Compression/ICompressionCodec.h>
-
-namespace DB
-{
-
-class CompressionCodecDelta : public ICompressionCodec
-{
-public:
-    CompressionCodecDelta(UInt8 delta_bytes_size_);
-
-    uint8_t getMethodByte() const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; }
-
-    bool isCompression() const override { return false; }
-    bool isGenericCompression() const override { return false; }
-
-private:
-    UInt8 delta_bytes_size;
-};
-
-}
--- a/src/Compression/CompressionCodecDensity.cpp
+++ b/src/Compression/CompressionCodecDensity.cpp
@ -1,11 +1,41 @@
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionFactory.h>
 #include <Compression/CompressionInfo.h>
-#include <Compression/CompressionCodecDensity.h>
 #include <Parsers/ASTLiteral.h>

+#include <src/density_api.h>
+

 namespace DB
 {
+
+class CompressionCodecDensity : public ICompressionCodec
+{
+public:
+    static constexpr auto DENSITY_DEFAULT_ALGO = DENSITY_ALGORITHM_CHAMELEON; // by default aim on speed
+
+    CompressionCodecDensity(DENSITY_ALGORITHM algo_);
+
+    uint8_t getMethodByte() const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return true; }
+    bool isExperimental() const override { return true; }
+
+private:
+    const DENSITY_ALGORITHM algo;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecDensity.h
+++ b/src/Compression/CompressionCodecDensity.h
@ -1,35 +0,0 @@
-#pragma once
-
-#include <Compression/ICompressionCodec.h>
-#include <src/density_api.h>
-
-
-namespace DB
-{
-class CompressionCodecDensity : public ICompressionCodec
-{
-public:
-    static constexpr auto DENSITY_DEFAULT_ALGO = DENSITY_ALGORITHM_CHAMELEON; // by default aim on speed
-
-    CompressionCodecDensity(DENSITY_ALGORITHM algo_);
-
-    uint8_t getMethodByte() const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-    bool isExperimental() const override { return true; }
-
-private:
-    const DENSITY_ALGORITHM algo;
-};
-
-}
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@ -1,4 +1,4 @@
-#include <Compression/CompressionCodecDoubleDelta.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
 #include <common/unaligned.h>
@ -15,9 +15,121 @@
 #include <type_traits>
 #include <limits>

+
 namespace DB
 {

+/** DoubleDelta column codec implementation.
+ *
+ * Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf, which was extended
+ * to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix
+ * instead of 4-bit prefix.
+ *
+ * This codec is best used against monotonic integer sequences with constant (or almost constant)
+ * stride, like event timestamp for some monitoring application.
+ *
+ * Given input sequence a: [a0, a1, ... an]:
+ *
+ * First, write number of items (sizeof(int32)*8 bits):                n
+ * Then write first item as is (sizeof(a[0])*8 bits):                  a[0]
+ * Second item is written as delta (sizeof(a[0])*8 bits):              a[1] - a[0]
+ * Loop over remaining items and calculate double delta:
+ *   double_delta = a[i] - 2 * a[i - 1] + a[i - 2]
+ *   Write it in compact binary form with `BitWriter`
+ *   if double_delta == 0:
+ *      write 1bit:                                                    0
+ *   else if -63 < double_delta < 64:
+ *      write 2 bit prefix:                                            10
+ *      write sign bit (1 if signed):                                  x
+ *      write 7-1 bits of abs(double_delta - 1):                       xxxxxx
+ *   else if -255 < double_delta < 256:
+ *      write 3 bit prefix:                                            110
+ *      write sign bit (1 if signed):                                  x
+ *      write 9-1 bits of abs(double_delta - 1):                       xxxxxxxx
+ *   else if -2047 < double_delta < 2048:
+ *      write 4 bit prefix:                                            1110
+ *      write sign bit (1 if signed):                                  x
+ *      write 12-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx
+ *   else if double_delta fits into 32-bit int:
+ *      write 5 bit prefix:                                            11110
+ *      write sign bit (1 if signed):                                  x
+ *      write 32-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx...
+ *   else
+ *      write 5 bit prefix:                                            11111
+ *      write sign bit (1 if signed):                                  x
+ *      write 64-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx...
+ *
+ * @example sequence of UInt8 values [1, 2, 3, 4, 5, 6, 7, 8, 9 10] is encoded as (codec header is omitted):
+ *
+ * .- 4-byte little-endian sequence length (10 == 0xa)
+ * |               .- 1 byte (sizeof(UInt8) a[0]                                            : 0x01
+ * |               |   .- 1 byte of delta: a[1] - a[0] = 2 - 1 = 1                          : 0x01
+ * |               |   |   .- 8 zero bits since double delta for remaining 8 elements was 0 : 0x00
+ * v_______________v___v___v___
+ * \x0a\x00\x00\x00\x01\x01\x00
+ *
+ * @example sequence of Int16 values [-10, 10, -20, 20, -40, 40] is encoded as:
+ *
+ * .- 4-byte little endian sequence length = 6                                 : 0x00000006
+ * |                .- 2 bytes (sizeof(Int16) a[0] as UInt16 = -10             : 0xfff6
+ * |                |       .- 2 bytes of delta: a[1] - a[0] = 10 - (-10) = 20 : 0x0014
+ * |                |       |       .- 4 encoded double deltas (see below)
+ * v_______________ v______ v______ v______________________
+ * \x06\x00\x00\x00\xf6\xff\x14\x00\xb8\xe2\x2e\xb1\xe4\x58
+ *
+ * 4 binary encoded double deltas (\xb8\xe2\x2e\xb1\xe4\x58):
+ * double_delta (DD) = -20 - 2 * 10 + (-10) = -50
+ * .- 2-bit prefix                                                         : 0b10
+ * | .- sign-bit                                                           : 0b1
+ * | |.- abs(DD - 1) = 49                                                  : 0b110001
+ * | ||
+ * | ||      DD = 20 - 2 * (-20) + 10 = 70
+ * | ||      .- 3-bit prefix                                               : 0b110
+ * | ||      |  .- sign bit                                                : 0b0
+ * | ||      |  |.- abs(DD - 1) = 69                                       : 0b1000101
+ * | ||      |  ||
+ * | ||      |  ||        DD = -40 - 2 * 20 + (-20) = -100
+ * | ||      |  ||        .- 3-bit prefix                                  : 0b110
+ * | ||      |  ||        |    .- sign-bit                                 : 0b0
+ * | ||      |  ||        |    |.- abs(DD - 1) = 99                        : 0b1100011
+ * | ||      |  ||        |    ||
+ * | ||      |  ||        |    ||       DD = 40 - 2 * (-40) + 20 = 140
+ * | ||      |  ||        |    ||       .- 3-bit prefix                    : 0b110
+ * | ||      |  ||        |    ||       |  .- sign bit                     : 0b0
+ * | ||      |  ||        |    ||       |  |.- abs(DD - 1) = 139           : 0b10001011
+ * | ||      |  ||        |    ||       |  ||
+ * V_vv______V__vv________V____vv_______V__vv________,- padding bits
+ * 10111000 11100010 00101110 10110001 11100100 01011000
+ *
+ * Please also see unit tests for:
+ *   * Examples on what output `BitWriter` produces on predefined input.
+ *   * Compatibility tests solidifying encoded binary output on set of predefined sequences.
+ */
+class CompressionCodecDoubleDelta : public ICompressionCodec
+{
+public:
+    CompressionCodecDoubleDelta(UInt8 data_bytes_size_);
+
+    uint8_t getMethodByte() const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return false; }
+
+private:
+    UInt8 data_bytes_size;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecDoubleDelta.h
+++ b/src/Compression/CompressionCodecDoubleDelta.h
@ -1,118 +0,0 @@
-#pragma once
-
-#include <Compression/ICompressionCodec.h>
-
-namespace DB
-{
-
-/** DoubleDelta column codec implementation.
- *
- * Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf, which was extended
- * to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix
- * instead of 4-bit prefix.
- *
- * This codec is best used against monotonic integer sequences with constant (or almost constant)
- * stride, like event timestamp for some monitoring application.
- *
- * Given input sequence a: [a0, a1, ... an]:
- *
- * First, write number of items (sizeof(int32)*8 bits):                n
- * Then write first item as is (sizeof(a[0])*8 bits):                  a[0]
- * Second item is written as delta (sizeof(a[0])*8 bits):              a[1] - a[0]
- * Loop over remaining items and calculate double delta:
- *   double_delta = a[i] - 2 * a[i - 1] + a[i - 2]
- *   Write it in compact binary form with `BitWriter`
- *   if double_delta == 0:
- *      write 1bit:                                                    0
- *   else if -63 < double_delta < 64:
- *      write 2 bit prefix:                                            10
- *      write sign bit (1 if signed):                                  x
- *      write 7-1 bits of abs(double_delta - 1):                       xxxxxx
- *   else if -255 < double_delta < 256:
- *      write 3 bit prefix:                                            110
- *      write sign bit (1 if signed):                                  x
- *      write 9-1 bits of abs(double_delta - 1):                       xxxxxxxx
- *   else if -2047 < double_delta < 2048:
- *      write 4 bit prefix:                                            1110
- *      write sign bit (1 if signed):                                  x
- *      write 12-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx
- *   else if double_delta fits into 32-bit int:
- *      write 5 bit prefix:                                            11110
- *      write sign bit (1 if signed):                                  x
- *      write 32-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx...
- *   else
- *      write 5 bit prefix:                                            11111
- *      write sign bit (1 if signed):                                  x
- *      write 64-1 bits of abs(double_delta - 1):                      xxxxxxxxxxx...
- *
- * @example sequence of UInt8 values [1, 2, 3, 4, 5, 6, 7, 8, 9 10] is encoded as (codec header is omitted):
- *
- * .- 4-byte little-endian sequence length (10 == 0xa)
- * |               .- 1 byte (sizeof(UInt8) a[0]                                            : 0x01
- * |               |   .- 1 byte of delta: a[1] - a[0] = 2 - 1 = 1                          : 0x01
- * |               |   |   .- 8 zero bits since double delta for remaining 8 elements was 0 : 0x00
- * v_______________v___v___v___
- * \x0a\x00\x00\x00\x01\x01\x00
- *
- * @example sequence of Int16 values [-10, 10, -20, 20, -40, 40] is encoded as:
- *
- * .- 4-byte little endian sequence length = 6                                 : 0x00000006
- * |                .- 2 bytes (sizeof(Int16) a[0] as UInt16 = -10             : 0xfff6
- * |                |       .- 2 bytes of delta: a[1] - a[0] = 10 - (-10) = 20 : 0x0014
- * |                |       |       .- 4 encoded double deltas (see below)
- * v_______________ v______ v______ v______________________
- * \x06\x00\x00\x00\xf6\xff\x14\x00\xb8\xe2\x2e\xb1\xe4\x58
- *
- * 4 binary encoded double deltas (\xb8\xe2\x2e\xb1\xe4\x58):
- * double_delta (DD) = -20 - 2 * 10 + (-10) = -50
- * .- 2-bit prefix                                                         : 0b10
- * | .- sign-bit                                                           : 0b1
- * | |.- abs(DD - 1) = 49                                                  : 0b110001
- * | ||
- * | ||      DD = 20 - 2 * (-20) + 10 = 70
- * | ||      .- 3-bit prefix                                               : 0b110
- * | ||      |  .- sign bit                                                : 0b0
- * | ||      |  |.- abs(DD - 1) = 69                                       : 0b1000101
- * | ||      |  ||
- * | ||      |  ||        DD = -40 - 2 * 20 + (-20) = -100
- * | ||      |  ||        .- 3-bit prefix                                  : 0b110
- * | ||      |  ||        |    .- sign-bit                                 : 0b0
- * | ||      |  ||        |    |.- abs(DD - 1) = 99                        : 0b1100011
- * | ||      |  ||        |    ||
- * | ||      |  ||        |    ||       DD = 40 - 2 * (-40) + 20 = 140
- * | ||      |  ||        |    ||       .- 3-bit prefix                    : 0b110
- * | ||      |  ||        |    ||       |  .- sign bit                     : 0b0
- * | ||      |  ||        |    ||       |  |.- abs(DD - 1) = 139           : 0b10001011
- * | ||      |  ||        |    ||       |  ||
- * V_vv______V__vv________V____vv_______V__vv________,- padding bits
- * 10111000 11100010 00101110 10110001 11100100 01011000
- *
- * Please also see unit tests for:
- *   * Examples on what output `BitWriter` produces on predefined input.
- *   * Compatibility tests solidifying encoded binary output on set of predefined sequences.
- */
-class CompressionCodecDoubleDelta : public ICompressionCodec
-{
-public:
-    CompressionCodecDoubleDelta(UInt8 data_bytes_size_);
-
-    uint8_t getMethodByte() const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return false; }
-
-private:
-    UInt8 data_bytes_size;
-};
-
-}
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@ -1,4 +1,4 @@
-#include <Compression/CompressionCodecGorilla.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
 #include <common/unaligned.h>
@ -14,9 +14,118 @@

 #include <bitset>

+
 namespace DB
 {

+/** Gorilla column codec implementation.
+ *
+ * Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
+ *
+ * This codec is best used against monotonic floating sequences, like CPU usage percentage
+ * or any other gauge.
+ *
+ * Given input sequence a: [a0, a1, ... an]
+ *
+ * First, write number of items (sizeof(int32)*8 bits):                n
+ * Then write first item as is (sizeof(a[0])*8 bits):                  a[0]
+ * Loop over remaining items and calculate xor_diff:
+ *   xor_diff = a[i] ^ a[i - 1] (e.g. 00000011'10110100)
+ *   Write it in compact binary form with `BitWriter`
+ *   if xor_diff == 0:
+ *       write 1 bit:                                                  0
+ *   else:
+ *       calculate leading zero bits (lzb)
+ *       and trailing zero bits (tzb) of xor_diff,
+ *       compare to lzb and tzb of previous xor_diff
+ *       (X = sizeof(a[i]) * 8, e.g. X = 16, lzb = 6, tzb = 2)
+ *       if lzb >= prev_lzb && tzb >= prev_tzb:
+ *           (e.g. prev_lzb=4, prev_tzb=1)
+ *           write 2 bit prefix:                                       0b10
+ *           write xor_diff >> prev_tzb (X - prev_lzb - prev_tzb bits):0b00111011010
+ *           (where X = sizeof(a[i]) * 8, e.g. 16)
+ *       else:
+ *           write 2 bit prefix:                                       0b11
+ *           write 5 bits of lzb:                                      0b00110
+ *           write 6 bits of (X - lzb - tzb)=(16-6-2)=8:               0b001000
+ *           write (X - lzb - tzb) non-zero bits of xor_diff:          0b11101101
+ *           prev_lzb = lzb
+ *           prev_tzb = tzb
+ *
+ * @example sequence of Float32 values [0.1, 0.1, 0.11, 0.2, 0.1] is encoded as:
+ *
+ * .- 4-byte little endian sequence length: 5                                 : 0x00000005
+ * |                .- 4 byte (sizeof(Float32) a[0] as UInt32 : -10           : 0xcdcccc3d
+ * |                |               .- 4 encoded xor diffs (see below)
+ * v_______________ v______________ v__________________________________________________
+ * \x05\x00\x00\x00\xcd\xcc\xcc\x3d\x6a\x5a\xd8\xb6\x3c\xcd\x75\xb1\x6c\x77\x00\x00\x00
+ *
+ * 4 binary encoded xor diffs (\x6a\x5a\xd8\xb6\x3c\xcd\x75\xb1\x6c\x77\x00\x00\x00):
+ *
+ * ...........................................
+ * a[i-1]   = 00111101110011001100110011001101
+ * a[i]     = 00111101110011001100110011001101
+ * xor_diff = 00000000000000000000000000000000
+ * .- 1-bit prefix                                                           : 0b0
+ * |
+ * | ...........................................
+ * | a[i-1]   = 00111101110011001100110011001101
+ * ! a[i]     = 00111101111000010100011110101110
+ * | xor_diff = 00000000001011011000101101100011
+ * | lzb = 10
+ * | tzb = 0
+ * |.- 2-bit prefix                                                          : 0b11
+ * || .- lzb (10)                                                            : 0b1010
+ * || |     .- data length (32-10-0): 22                                     : 0b010110
+ * || |     |     .- data                                                    : 0b1011011000101101100011
+ * || |     |     |
+ * || |     |     |                        ...........................................
+ * || |     |     |                        a[i-1]   = 00111101111000010100011110101110
+ * || |     |     |                        a[i]     = 00111110010011001100110011001101
+ * || |     |     |                        xor_diff = 00000011101011011000101101100011
+ * || |     |     |                        .- 2-bit prefix                            : 0b11
+ * || |     |     |                        | .- lzb = 6                               : 0b00110
+ * || |     |     |                        | |     .- data length = (32 - 6) = 26     : 0b011010
+ * || |     |     |                        | |     |      .- data                     : 0b11101011011000101101100011
+ * || |     |     |                        | |     |      |
+ * || |     |     |                        | |     |      |                            ...........................................
+ * || |     |     |                        | |     |      |                            a[i-1]   = 00111110010011001100110011001101
+ * || |     |     |                        | |     |      |                            a[i]     = 00111101110011001100110011001101
+ * || |     |     |                        | |     |      |                            xor_diff = 00000011100000000000000000000000
+ * || |     |     |                        | |     |      |                            .- 2-bit prefix                            : 0b10
+ * || |     |     |                        | |     |      |                            | .- data                                  : 0b11100000000000000000000000
+ * VV_v____ v_____v________________________V_v_____v______v____________________________V_v_____________________________
+ * 01101010 01011010 11011000 10110110 00111100 11001101 01110101 10110001 01101100 01110111 00000000 00000000 00000000
+ *
+ * Please also see unit tests for:
+ *   * Examples on what output `BitWriter` produces on predefined input.
+ *   * Compatibility tests solidifying encoded binary output on set of predefined sequences.
+ */
+class CompressionCodecGorilla : public ICompressionCodec
+{
+public:
+    CompressionCodecGorilla(UInt8 data_bytes_size_);
+
+    uint8_t getMethodByte() const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return false; }
+
+private:
+    UInt8 data_bytes_size;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecGorilla.h
+++ b/src/Compression/CompressionCodecGorilla.h
@ -1,115 +0,0 @@
-#pragma once
-
-#include <Compression/ICompressionCodec.h>
-
-namespace DB
-{
-
-/** Gorilla column codec implementation.
- *
- * Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
- *
- * This codec is best used against monotonic floating sequences, like CPU usage percentage
- * or any other gauge.
- *
- * Given input sequence a: [a0, a1, ... an]
- *
- * First, write number of items (sizeof(int32)*8 bits):                n
- * Then write first item as is (sizeof(a[0])*8 bits):                  a[0]
- * Loop over remaining items and calculate xor_diff:
- *   xor_diff = a[i] ^ a[i - 1] (e.g. 00000011'10110100)
- *   Write it in compact binary form with `BitWriter`
- *   if xor_diff == 0:
- *       write 1 bit:                                                  0
- *   else:
- *       calculate leading zero bits (lzb)
- *       and trailing zero bits (tzb) of xor_diff,
- *       compare to lzb and tzb of previous xor_diff
- *       (X = sizeof(a[i]) * 8, e.g. X = 16, lzb = 6, tzb = 2)
- *       if lzb >= prev_lzb && tzb >= prev_tzb:
- *           (e.g. prev_lzb=4, prev_tzb=1)
- *           write 2 bit prefix:                                       0b10
- *           write xor_diff >> prev_tzb (X - prev_lzb - prev_tzb bits):0b00111011010
- *           (where X = sizeof(a[i]) * 8, e.g. 16)
- *       else:
- *           write 2 bit prefix:                                       0b11
- *           write 5 bits of lzb:                                      0b00110
- *           write 6 bits of (X - lzb - tzb)=(16-6-2)=8:               0b001000
- *           write (X - lzb - tzb) non-zero bits of xor_diff:          0b11101101
- *           prev_lzb = lzb
- *           prev_tzb = tzb
- *
- * @example sequence of Float32 values [0.1, 0.1, 0.11, 0.2, 0.1] is encoded as:
- *
- * .- 4-byte little endian sequence length: 5                                 : 0x00000005
- * |                .- 4 byte (sizeof(Float32) a[0] as UInt32 : -10           : 0xcdcccc3d
- * |                |               .- 4 encoded xor diffs (see below)
- * v_______________ v______________ v__________________________________________________
- * \x05\x00\x00\x00\xcd\xcc\xcc\x3d\x6a\x5a\xd8\xb6\x3c\xcd\x75\xb1\x6c\x77\x00\x00\x00
- *
- * 4 binary encoded xor diffs (\x6a\x5a\xd8\xb6\x3c\xcd\x75\xb1\x6c\x77\x00\x00\x00):
- *
- * ...........................................
- * a[i-1]   = 00111101110011001100110011001101
- * a[i]     = 00111101110011001100110011001101
- * xor_diff = 00000000000000000000000000000000
- * .- 1-bit prefix                                                           : 0b0
- * |
- * | ...........................................
- * | a[i-1]   = 00111101110011001100110011001101
- * ! a[i]     = 00111101111000010100011110101110
- * | xor_diff = 00000000001011011000101101100011
- * | lzb = 10
- * | tzb = 0
- * |.- 2-bit prefix                                                          : 0b11
- * || .- lzb (10)                                                            : 0b1010
- * || |     .- data length (32-10-0): 22                                     : 0b010110
- * || |     |     .- data                                                    : 0b1011011000101101100011
- * || |     |     |
- * || |     |     |                        ...........................................
- * || |     |     |                        a[i-1]   = 00111101111000010100011110101110
- * || |     |     |                        a[i]     = 00111110010011001100110011001101
- * || |     |     |                        xor_diff = 00000011101011011000101101100011
- * || |     |     |                        .- 2-bit prefix                            : 0b11
- * || |     |     |                        | .- lzb = 6                               : 0b00110
- * || |     |     |                        | |     .- data length = (32 - 6) = 26     : 0b011010
- * || |     |     |                        | |     |      .- data                     : 0b11101011011000101101100011
- * || |     |     |                        | |     |      |
- * || |     |     |                        | |     |      |                            ...........................................
- * || |     |     |                        | |     |      |                            a[i-1]   = 00111110010011001100110011001101
- * || |     |     |                        | |     |      |                            a[i]     = 00111101110011001100110011001101
- * || |     |     |                        | |     |      |                            xor_diff = 00000011100000000000000000000000
- * || |     |     |                        | |     |      |                            .- 2-bit prefix                            : 0b10
- * || |     |     |                        | |     |      |                            | .- data                                  : 0b11100000000000000000000000
- * VV_v____ v_____v________________________V_v_____v______v____________________________V_v_____________________________
- * 01101010 01011010 11011000 10110110 00111100 11001101 01110101 10110001 01101100 01110111 00000000 00000000 00000000
- *
- * Please also see unit tests for:
- *   * Examples on what output `BitWriter` produces on predefined input.
- *   * Compatibility tests solidifying encoded binary output on set of predefined sequences.
- */
-class CompressionCodecGorilla : public ICompressionCodec
-{
-public:
-    CompressionCodecGorilla(UInt8 data_bytes_size_);
-
-    uint8_t getMethodByte() const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return false; }
-
-private:
-    UInt8 data_bytes_size;
-};
-
-}
--- a/src/Compression/CompressionCodecLZ4.cpp
+++ b/src/Compression/CompressionCodecLZ4.cpp
@ -1,7 +1,7 @@
-#include "CompressionCodecLZ4.h"
-
 #include <lz4.h>
 #include <lz4hc.h>
+
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
 #include <Compression/LZ4_decompress_faster.h>
@ -9,7 +9,12 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
+#include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
+#include <IO/BufferWithOwnMemory.h>
+
+#include <Compression/LZ4_decompress_faster.h>
+

 #pragma GCC diagnostic ignored "-Wold-style-cast"

@ -17,11 +22,51 @@
 namespace DB
 {

+class CompressionCodecLZ4 : public ICompressionCodec
+{
+public:
+    CompressionCodecLZ4();
+
+    uint8_t getMethodByte() const override;
+
+    UInt32 getAdditionalSizeAtTheEndOfBuffer() const override { return LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER; }
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return true; }
+
+private:
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    mutable LZ4::PerformanceStatistics lz4_stat;
+    ASTPtr codec_desc;
+};
+
+
+class CompressionCodecLZ4HC : public CompressionCodecLZ4
+{
+public:
+    CompressionCodecLZ4HC(int level_);
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+private:
+    const int level;
+};
+
+
 namespace ErrorCodes
 {
-extern const int CANNOT_COMPRESS;
-extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
-extern const int ILLEGAL_CODEC_PARAMETER;
+    extern const int CANNOT_COMPRESS;
+    extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+    extern const int ILLEGAL_CODEC_PARAMETER;
 }

 CompressionCodecLZ4::CompressionCodecLZ4()
--- a/src/Compression/CompressionCodecLZ4.h
+++ b/src/Compression/CompressionCodecLZ4.h
@ -1,52 +0,0 @@
-#pragma once
-
-#include <IO/WriteBuffer.h>
-#include <Compression/ICompressionCodec.h>
-#include <IO/BufferWithOwnMemory.h>
-#include <Parsers/StringRange.h>
-#include <Compression/LZ4_decompress_faster.h>
-#include <Parsers/IAST_fwd.h>
-
-namespace DB
-{
-
-class CompressionCodecLZ4 : public ICompressionCodec
-{
-public:
-    CompressionCodecLZ4();
-
-    uint8_t getMethodByte() const override;
-
-    UInt32 getAdditionalSizeAtTheEndOfBuffer() const override { return LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER; }
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-
-private:
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-
-    mutable LZ4::PerformanceStatistics lz4_stat;
-    ASTPtr codec_desc;
-};
-
-
-class CompressionCodecLZ4HC : public CompressionCodecLZ4
-{
-public:
-    CompressionCodecLZ4HC(int level_);
-
-protected:
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-private:
-    const int level;
-};
-
-}
--- a/src/Compression/CompressionCodecLZSSE.cpp
+++ b/src/Compression/CompressionCodecLZSSE.cpp
@ -1,4 +1,4 @@
-#include <Compression/CompressionCodecLZSSE.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionFactory.h>
 #include <Compression/CompressionInfo.h>
 #include <Parsers/ASTLiteral.h>
@ -10,6 +10,29 @@

 namespace DB
 {
+
+class CompressionCodecLZSSE : public ICompressionCodec
+{
+public:
+    CompressionCodecLZSSE(UInt32 type_, UInt32 level_);
+
+    uint8_t getMethodByte() const override;
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return true; }
+    bool isExperimental() const override { return true; }
+
+private:
+    const UInt32 type;
+    const UInt32 level;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecLZSSE.h
+++ b/src/Compression/CompressionCodecLZSSE.h
@ -1,30 +0,0 @@
-#pragma once
-
-#include <Compression/ICompressionCodec.h>
-
-
-namespace DB
-{
-
-class CompressionCodecLZSSE : public ICompressionCodec
-{
-public:
-    CompressionCodecLZSSE(UInt32 type_, UInt32 level_);
-
-    uint8_t getMethodByte() const override;
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-    void updateHash(SipHash & hash) const override;
-
-protected:
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-    bool isExperimental() const override { return true; }
-
-private:
-    const UInt32 type;
-    const UInt32 level;
-};
-
-}
--- a/src/Compression/CompressionCodecLizard.cpp
+++ b/src/Compression/CompressionCodecLizard.cpp
@ -1,4 +1,4 @@
-#include <Compression/CompressionCodecLizard.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionFactory.h>
 #include <Compression/CompressionInfo.h>
 #include <Parsers/ASTLiteral.h>
@ -8,6 +8,32 @@

 namespace DB
 {
+class CompressionCodecLizard : public ICompressionCodec
+{
+public:
+    static constexpr auto LIZARD_DEFAULT_LEVEL = 1;
+
+    CompressionCodecLizard(int level_);
+
+    uint8_t getMethodByte() const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return true; }
+    bool isExperimental() const override { return true; }
+
+private:
+    const int level;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecLizard.h
+++ b/src/Compression/CompressionCodecLizard.h
@ -1,33 +0,0 @@
-#pragma once
-
-#include <Compression/ICompressionCodec.h>
-
-
-namespace DB
-{
-class CompressionCodecLizard : public ICompressionCodec
-{
-public:
-    static constexpr auto LIZARD_DEFAULT_LEVEL = 1;
-
-    CompressionCodecLizard(int level_);
-
-    uint8_t getMethodByte() const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-    bool isExperimental() const override { return true; }
-
-private:
-    const int level;
-};
-
-}
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@ -1,6 +1,6 @@
 #include <cstring>

-#include <Compression/CompressionCodecT64.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionFactory.h>
 #include <common/unaligned.h>
 #include <Parsers/IAST.h>
@ -8,18 +8,63 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTFunction.h>
 #include <IO/WriteHelpers.h>
+#include <Core/Types.h>


 namespace DB
 {

+/// Get 64 integer values, makes 64x64 bit matrix, transpose it and crop unused bits (most significant zeroes).
+/// In example, if we have UInt8 with only 0 and 1 inside 64xUInt8 would be compressed into 1xUInt64.
+/// It detects unused bits by calculating min and max values of data part, saving them in header in compression phase.
+/// There's a special case with signed integers parts with crossing zero data. Here it stores one more bit to detect sign of value.
+class CompressionCodecT64 : public ICompressionCodec
+{
+public:
+    static constexpr UInt32 HEADER_SIZE = 1 + 2 * sizeof(UInt64);
+    static constexpr UInt32 MAX_COMPRESSED_BLOCK_SIZE = sizeof(UInt64) * 64;
+
+    /// There're 2 compression variants:
+    /// Byte - transpose bit matrix by bytes (only the last not full byte is transposed by bits). It's default.
+    /// Bits - full bit-transpose of the bit matrix. It uses more resources and leads to better compression with ZSTD (but worse with LZ4).
+    enum class Variant
+    {
+        Byte,
+        Bit
+    };
+
+    CompressionCodecT64(TypeIndex type_idx_, Variant variant_);
+
+    uint8_t getMethodByte() const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+    UInt32 doCompressData(const char * src, UInt32 src_size, char * dst) const override;
+    void doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override
+    {
+        /// uncompressed_size - (uncompressed_size % (sizeof(T) * 64)) + sizeof(UInt64) * sizeof(T) + header_size
+        return uncompressed_size + MAX_COMPRESSED_BLOCK_SIZE + HEADER_SIZE;
+    }
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return false; }
+
+private:
+    TypeIndex type_idx;
+    Variant variant;
+};
+
+
 namespace ErrorCodes
 {
-extern const int CANNOT_COMPRESS;
-extern const int CANNOT_DECOMPRESS;
-extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
-extern const int ILLEGAL_CODEC_PARAMETER;
-extern const int LOGICAL_ERROR;
+    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
+    extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+    extern const int ILLEGAL_CODEC_PARAMETER;
+    extern const int LOGICAL_ERROR;
 }

 namespace
--- a/src/Compression/CompressionCodecT64.h
+++ b/src/Compression/CompressionCodecT64.h
@ -1,53 +0,0 @@
-#pragma once
-
-#include <Core/Types.h>
-#include <Compression/ICompressionCodec.h>
-
-
-namespace DB
-{
-
-/// Get 64 integer values, makes 64x64 bit matrix, transpose it and crop unused bits (most significant zeroes).
-/// In example, if we have UInt8 with only 0 and 1 inside 64xUInt8 would be compressed into 1xUInt64.
-/// It detects unused bits by calculating min and max values of data part, saving them in header in compression phase.
-/// There's a special case with signed integers parts with crossing zero data. Here it stores one more bit to detect sign of value.
-class CompressionCodecT64 : public ICompressionCodec
-{
-public:
-    static constexpr UInt32 HEADER_SIZE = 1 + 2 * sizeof(UInt64);
-    static constexpr UInt32 MAX_COMPRESSED_BLOCK_SIZE = sizeof(UInt64) * 64;
-
-    /// There're 2 compression variants:
-    /// Byte - transpose bit matrix by bytes (only the last not full byte is transposed by bits). It's default.
-    /// Bits - full bit-transpose of the bit matrix. It uses more resources and leads to better compression with ZSTD (but worse with LZ4).
-    enum class Variant
-    {
-        Byte,
-        Bit
-    };
-
-    CompressionCodecT64(TypeIndex type_idx_, Variant variant_);
-
-    uint8_t getMethodByte() const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-    UInt32 doCompressData(const char * src, UInt32 src_size, char * dst) const override;
-    void doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override
-    {
-        /// uncompressed_size - (uncompressed_size % (sizeof(T) * 64)) + sizeof(UInt64) * sizeof(T) + header_size
-        return uncompressed_size + MAX_COMPRESSED_BLOCK_SIZE + HEADER_SIZE;
-    }
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return false; }
-
-private:
-    TypeIndex type_idx;
-    Variant variant;
-};
-
-}
--- a/src/Compression/CompressionCodecZSTD.cpp
+++ b/src/Compression/CompressionCodecZSTD.cpp
@ -1,4 +1,4 @@
-#include <Compression/CompressionCodecZSTD.h>
+#include <Compression/ICompressionCodec.h>
 #include <Compression/CompressionInfo.h>
 #include <Compression/CompressionFactory.h>
 #include <zstd.h>
@ -7,11 +7,45 @@
 #include <Parsers/ASTFunction.h>
 #include <Common/typeid_cast.h>
 #include <IO/WriteHelpers.h>
+#include <IO/WriteBuffer.h>
+#include <IO/BufferWithOwnMemory.h>


 namespace DB
 {

+class CompressionCodecZSTD : public ICompressionCodec
+{
+public:
+    static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
+    static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;
+
+    CompressionCodecZSTD(int level_);
+
+    CompressionCodecZSTD(int level_, int window_log);
+
+    uint8_t getMethodByte() const override;
+
+    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
+
+    void updateHash(SipHash & hash) const override;
+
+protected:
+
+    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
+
+    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
+
+    bool isCompression() const override { return true; }
+    bool isGenericCompression() const override { return true; }
+
+private:
+    const int level;
+    const bool enable_long_range;
+    const int window_log;
+};
+
+
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
--- a/src/Compression/CompressionCodecZSTD.h
+++ b/src/Compression/CompressionCodecZSTD.h
@ -1,42 +0,0 @@
-#pragma once
-
-#include <IO/WriteBuffer.h>
-#include <Compression/ICompressionCodec.h>
-#include <IO/BufferWithOwnMemory.h>
-#include <Parsers/StringRange.h>
-
-namespace DB
-{
-
-class CompressionCodecZSTD : public ICompressionCodec
-{
-public:
-    static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
-    static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;
-
-    CompressionCodecZSTD(int level_);
-
-    CompressionCodecZSTD(int level_, int window_log);
-
-    uint8_t getMethodByte() const override;
-
-    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
-
-    void updateHash(SipHash & hash) const override;
-
-protected:
-
-    UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
-
-    void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
-
-    bool isCompression() const override { return true; }
-    bool isGenericCompression() const override { return true; }
-
-private:
-    const int level;
-    const bool enable_long_range;
-    const int window_log;
-};
-
-}