Add compression codec delta and fix naming for parametric codecs

This commit is contained in:
alesapin 2019-01-14 14:54:40 +03:00
parent 0a334b2dde
commit c8257f6d0b
9 changed files with 44 additions and 26 deletions

View File

@ -8,6 +8,7 @@
#include "CompressionCodecLZ4.h"
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <IO/WriteHelpers.h>
#ifdef __clang__
#pragma clang diagnostic ignored "-Wold-style-cast"
@ -61,7 +62,7 @@ void registerCodecLZ4(CompressionCodecFactory & factory)
String CompressionCodecLZ4HC::getCodecDesc() const
{
return "LZ4HC";
return "LZ4HC(" + toString(level) + ")";
}
UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const

View File

@ -41,7 +41,7 @@ protected:
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
private:
int level;
const int level;
};
}

View File

@ -28,7 +28,7 @@ UInt8 CompressionCodecZSTD::getMethodByte() const
String CompressionCodecZSTD::getCodecDesc() const
{
return "ZSTD";
return "ZSTD(" + toString(level) + ")";
}
UInt32 CompressionCodecZSTD::getCompressedDataSize(UInt32 uncompressed_size) const

View File

@ -27,7 +27,7 @@ protected:
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
private:
int level;
const int level;
};
}

View File

@ -10,7 +10,6 @@
#include <Parsers/queryToString.h>
#include <Compression/CompressionCodecMultiple.h>
#include <Compression/CompressionCodecLZ4.h>
#include <Compression/CompressionCodecNone.h>
#include <IO/WriteHelpers.h>
@ -135,7 +134,7 @@ void registerCodecNone(CompressionCodecFactory & factory);
void registerCodecZSTD(CompressionCodecFactory & factory);
void registerCodecMultiple(CompressionCodecFactory & factory);
void registerCodecLZ4HC(CompressionCodecFactory & factory);
//void registerCodecDelta(CompressionCodecFactory & factory);
void registerCodecDelta(CompressionCodecFactory & factory);
CompressionCodecFactory::CompressionCodecFactory()
{
@ -145,7 +144,7 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecZSTD(*this);
registerCodecMultiple(*this);
registerCodecLZ4HC(*this);
// registerCodecDelta(*this);
registerCodecDelta(*this);
}
}

View File

@ -5,6 +5,7 @@
#include <optional>
#include <unordered_map>
#include <ext/singleton.h>
#include <DataTypes/IDataType.h>
#include <Common/IFactoryWithAliases.h>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionInfo.h>

View File

@ -39,6 +39,7 @@ enum class CompressionMethodByte : uint8_t
LZ4 = 0x82,
ZSTD = 0x90,
Multiple = 0x91,
Delta = 0x92,
};
}

View File

@ -1,6 +1,6 @@
1 hello 2018-12-14 1.1 aaa
2 world 2018-12-15 2.2 bbb
3 ! 2018-12-16 3.3 ccc
1 hello 2018-12-14 1.1 aaa 5
2 world 2018-12-15 2.2 bbb 6
3 ! 2018-12-16 3.3 ccc 7
2
1 world 2018-10-05 1.1
2 hello 2018-10-01 2.2
@ -9,7 +9,7 @@
10003
274972506.6
9175437371954010821
CREATE TABLE test.compression_codec_multiple_more_types ( id Decimal(38, 13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, LZ4HC), data FixedString(12) CODEC(ZSTD, ZSTD, NONE, NONE, NONE, LZ4HC), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192
CREATE TABLE test.compression_codec_multiple_more_types ( id Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), data FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192
1.5555555555555 hello world! [77] ['John']
7.1000000000000 xxxxxxxxxxxx [127] ['Henry']
!

View File

@ -2,17 +2,27 @@ SET send_logs_level = 'none';
DROP TABLE IF EXISTS test.compression_codec;
CREATE TABLE test.compression_codec(id UInt64 CODEC(LZ4), data String CODEC(ZSTD), ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7))) ENGINE = MergeTree() ORDER BY tuple();
CREATE TABLE test.compression_codec(
id UInt64 CODEC(LZ4),
data String CODEC(ZSTD),
ddd Date CODEC(NONE),
somenum Float64 CODEC(ZSTD(2)),
somestr FixedString(3) CODEC(LZ4HC(7)),
othernum Int64 CODEC(Delta)
) ENGINE = MergeTree() ORDER BY tuple();
INSERT INTO test.compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa');
INSERT INTO test.compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb');
INSERT INTO test.compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc');
INSERT INTO test.compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5);
INSERT INTO test.compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6);
INSERT INTO test.compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7);
SELECT * FROM test.compression_codec ORDER BY id;
OPTIMIZE TABLE test.compression_codec FINAL;
INSERT INTO test.compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd');
INSERT INTO test.compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8);
DETACH TABLE test.compression_codec;
ATTACH TABLE test.compression_codec;
SELECT count(*) FROM test.compression_codec WHERE id = 2 GROUP BY id;
@ -23,28 +33,34 @@ DROP TABLE IF EXISTS test.params_when_no_params;
DROP TABLE IF EXISTS test.too_many_params;
DROP TABLE IF EXISTS test.codec_multiple_direct_specification_1;
DROP TABLE IF EXISTS test.codec_multiple_direct_specification_2;
DROP TABLE IF EXISTS test.delta_bad_params1;
DROP TABLE IF EXISTS test.delta_bad_params2;
CREATE TABLE test.bad_codec(id UInt64 CODEC(adssadads)) ENGINE = MergeTree() order by tuple(); -- { serverError 432 }
CREATE TABLE test.too_many_params(id UInt64 CODEC(ZSTD(2,3,4,5))) ENGINE = MergeTree() order by tuple(); -- { serverError 431 }
CREATE TABLE test.params_when_no_params(id UInt64 CODEC(LZ4(1))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 378 }
CREATE TABLE test.codec_multiple_direct_specification_1(id UInt64 CODEC(MULTIPLE(LZ4, ZSTD))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 }
CREATE TABLE test.codec_multiple_direct_specification_2(id UInt64 CODEC(multiple(LZ4, ZSTD))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 }
CREATE TABLE test.delta_bad_params1(id UInt64 CODEC(Delta(3))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 433 }
CREATE TABLE test.delta_bad_params2(id UInt64 CODEC(Delta(16))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 433 }
DROP TABLE IF EXISTS test.bad_codec;
DROP TABLE IF EXISTS test.params_when_no_params;
DROP TABLE IF EXISTS test.too_many_params;
DROP TABLE IF EXISTS test.codec_multiple_direct_specification_1;
DROP TABLE IF EXISTS test.codec_multiple_direct_specification_2;
DROP TABLE IF EXISTS test.delta_bad_params1;
DROP TABLE IF EXISTS test.delta_bad_params2;
DROP TABLE IF EXISTS test.compression_codec_multiple;
SET network_compression_method = 'lz4hc';
CREATE TABLE test.compression_codec_multiple (
id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC),
data String CODEC(ZSTD(2), NONE, LZ4HC, LZ4, LZ4),
ddd Date CODEC(NONE, NONE, NONE, LZ4, ZSTD, LZ4HC, LZ4HC),
somenum Float64 CODEC(LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD)
id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)),
data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)),
ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC),
somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD)
) ENGINE = MergeTree() ORDER BY tuple();
INSERT INTO test.compression_codec_multiple VALUES (1, 'world', toDate('2018-10-05'), 1.1), (2, 'hello', toDate('2018-10-01'), 2.2), (3, 'buy', toDate('2018-10-11'), 3.3);
@ -68,9 +84,9 @@ SELECT sum(cityHash64(*)) FROM test.compression_codec_multiple;
DROP TABLE IF EXISTS test.compression_codec_multiple_more_types;
CREATE TABLE test.compression_codec_multiple_more_types (
id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, LZ4HC),
data FixedString(12) CODEC(ZSTD, ZSTD, NONE, NONE, NONE, LZ4HC),
ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD)
id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC),
data FixedString(12) CODEC(ZSTD, ZSTD, Delta, Delta, Delta, NONE, NONE, NONE, LZ4HC),
ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8))
) ENGINE = MergeTree() ORDER BY tuple();
SHOW CREATE TABLE test.compression_codec_multiple_more_types;
@ -86,9 +102,9 @@ SET network_compression_method = 'zstd';
SET network_zstd_compression_level = 5;
CREATE TABLE test.compression_codec_multiple_with_key (
somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)),
id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC),
data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4)
somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), Delta, Delta),
id UInt64 CODEC(LZ4, ZSTD, Delta, NONE, LZ4HC, Delta),
data String CODEC(ZSTD(2), Delta, LZ4HC, NONE, LZ4, LZ4)
) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2;