ClickHouse/tests/queries/0_stateless/01272_suspicious_codecs.sql

137 lines
4.6 KiB
MySQL
Raw Normal View History

2021-09-12 12:35:27 +00:00
-- Tags: no-parallel
2020-05-04 14:46:11 +00:00
DROP TABLE IF EXISTS codecs;
-- test what should work
CREATE TABLE codecs
(
a UInt8 CODEC(LZ4),
b UInt16 CODEC(ZSTD),
c Float32 CODEC(Gorilla),
d UInt8 CODEC(Delta, LZ4),
e Float64 CODEC(Gorilla, ZSTD),
Deprecate Gorilla compression of non-float columns Reasons: 1. The original Gorilla paper proposed a compression schema for pairs of time stamps and double-precision FP values. ClickHouse's Gorilla codec only implements compression of the latter and it does not impose any data type restrictions. - Data types != Float* or (U)Int* (e.g. Decimal, Point etc.) are definitely not supposed to be used with Gorilla. - (U)Int* types are debatable. The paper only considers integers-stored-as-FP-values, a practical use case for which Gorilla works well. Standalone integers are not considered which makes them at least suspicious. 2. Achieve consistency with FPC, another specialized floating-point timeseries codec, which rejects non-float data. 3. On practical datasets, ZSTD is often "good enough" (**) so it should be okay to disincentive non-ZSTD codecs a little bit. If needed, Delta and DoubleDelta codecs are viable alternative for slowly changing (time-series-like) integer sequences. Since on-prem and hosted users may still have Gorilla-compressed non-float data, this combination is only deprecated for now. No warning or error will be emitted. Users are encouraged to migrate Gorilla-compressed non-float data to an alternative codec. It is planned to treat Gorilla-compressed non-float columns as "suspicious" six months after this commit (i.e. in v23.6). Even then, it will still be possible to set "allow_suspicious_codecs = true" and read and write Gorilla-compressed non-float data. (*) Sec. 4.1.2, "Gorilla restricts the value element in its tuple to a double floating point type.", https://doi.org/10.14778/2824032.2824078 (**) https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema
2023-01-20 14:30:53 +00:00
f UInt32 CODEC(Delta, Delta, T64),
2020-05-04 14:46:11 +00:00
g DateTime CODEC(DoubleDelta),
h DateTime64 CODEC(DoubleDelta, LZ4),
i String CODEC(NONE)
2020-05-04 14:46:11 +00:00
) ENGINE = MergeTree ORDER BY tuple();
DROP TABLE codecs;
-- test what should not work
Deprecate Gorilla compression of non-float columns Reasons: 1. The original Gorilla paper proposed a compression schema for pairs of time stamps and double-precision FP values. ClickHouse's Gorilla codec only implements compression of the latter and it does not impose any data type restrictions. - Data types != Float* or (U)Int* (e.g. Decimal, Point etc.) are definitely not supposed to be used with Gorilla. - (U)Int* types are debatable. The paper only considers integers-stored-as-FP-values, a practical use case for which Gorilla works well. Standalone integers are not considered which makes them at least suspicious. 2. Achieve consistency with FPC, another specialized floating-point timeseries codec, which rejects non-float data. 3. On practical datasets, ZSTD is often "good enough" (**) so it should be okay to disincentive non-ZSTD codecs a little bit. If needed, Delta and DoubleDelta codecs are viable alternative for slowly changing (time-series-like) integer sequences. Since on-prem and hosted users may still have Gorilla-compressed non-float data, this combination is only deprecated for now. No warning or error will be emitted. Users are encouraged to migrate Gorilla-compressed non-float data to an alternative codec. It is planned to treat Gorilla-compressed non-float columns as "suspicious" six months after this commit (i.e. in v23.6). Even then, it will still be possible to set "allow_suspicious_codecs = true" and read and write Gorilla-compressed non-float data. (*) Sec. 4.1.2, "Gorilla restricts the value element in its tuple to a double floating point type.", https://doi.org/10.14778/2824032.2824078 (**) https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema
2023-01-20 14:30:53 +00:00
CREATE TABLE codecs (a UInt8 CODEC(NONE, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(NONE, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(LZ4, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(LZ4, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
2023-01-24 10:35:23 +00:00
CREATE TABLE codecs (a UInt8 CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a FixedString(2) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a Decimal(15,5) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
2020-05-04 14:46:11 +00:00
-- test that sanity check is not performed in ATTACH query
DROP TABLE IF EXISTS codecs1;
DROP TABLE IF EXISTS codecs2;
DROP TABLE IF EXISTS codecs3;
DROP TABLE IF EXISTS codecs4;
DROP TABLE IF EXISTS codecs5;
DROP TABLE IF EXISTS codecs6;
DROP TABLE IF EXISTS codecs7;
DROP TABLE IF EXISTS codecs8;
2023-01-24 10:35:23 +00:00
DROP TABLE IF EXISTS codecs9;
DROP TABLE IF EXISTS codecs10;
DROP TABLE IF EXISTS codecs11;
2020-05-04 14:46:11 +00:00
SET allow_suspicious_codecs = 1;
CREATE TABLE codecs1 (a UInt8 CODEC(NONE, NONE)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs2 (a UInt8 CODEC(NONE, LZ4)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs3 (a UInt8 CODEC(LZ4, NONE)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs4 (a UInt8 CODEC(LZ4, LZ4)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs5 (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs6 (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs7 (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs8 (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple();
2023-01-24 10:35:23 +00:00
CREATE TABLE codecs9 (a UInt8 CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs10 (a FixedString(2) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE codecs11 (a Decimal(15,5) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple();
2020-05-04 14:46:11 +00:00
SET allow_suspicious_codecs = 0;
SHOW CREATE TABLE codecs1;
SHOW CREATE TABLE codecs2;
SHOW CREATE TABLE codecs3;
SHOW CREATE TABLE codecs4;
SHOW CREATE TABLE codecs5;
SHOW CREATE TABLE codecs6;
SHOW CREATE TABLE codecs7;
SHOW CREATE TABLE codecs8;
2023-01-24 10:35:23 +00:00
SHOW CREATE TABLE codecs9;
SHOW CREATE TABLE codecs10;
SHOW CREATE TABLE codecs11;
2020-05-04 14:46:11 +00:00
DETACH TABLE codecs1;
DETACH TABLE codecs2;
DETACH TABLE codecs3;
DETACH TABLE codecs4;
DETACH TABLE codecs5;
DETACH TABLE codecs6;
DETACH TABLE codecs7;
DETACH TABLE codecs8;
2023-01-24 10:35:23 +00:00
DETACH TABLE codecs9;
DETACH TABLE codecs10;
DETACH TABLE codecs11;
2020-05-04 14:46:11 +00:00
ATTACH TABLE codecs1;
ATTACH TABLE codecs2;
ATTACH TABLE codecs3;
ATTACH TABLE codecs4;
ATTACH TABLE codecs5;
ATTACH TABLE codecs6;
ATTACH TABLE codecs7;
ATTACH TABLE codecs8;
2023-01-24 10:35:23 +00:00
ATTACH TABLE codecs9;
ATTACH TABLE codecs10;
ATTACH TABLE codecs11;
2020-05-04 14:46:11 +00:00
SHOW CREATE TABLE codecs1;
SHOW CREATE TABLE codecs2;
SHOW CREATE TABLE codecs3;
SHOW CREATE TABLE codecs4;
SHOW CREATE TABLE codecs5;
SHOW CREATE TABLE codecs6;
SHOW CREATE TABLE codecs7;
SHOW CREATE TABLE codecs8;
2023-01-24 10:35:23 +00:00
SHOW CREATE TABLE codecs9;
SHOW CREATE TABLE codecs10;
SHOW CREATE TABLE codecs11;
2020-05-04 14:46:11 +00:00
SELECT * FROM codecs1;
SELECT * FROM codecs2;
SELECT * FROM codecs3;
SELECT * FROM codecs4;
SELECT * FROM codecs5;
SELECT * FROM codecs6;
SELECT * FROM codecs7;
SELECT * FROM codecs8;
2023-01-24 10:35:23 +00:00
SELECT * FROM codecs9;
SELECT * FROM codecs10;
SELECT * FROM codecs11;
2020-05-04 14:46:11 +00:00
DROP TABLE codecs1;
DROP TABLE codecs2;
DROP TABLE codecs3;
DROP TABLE codecs4;
DROP TABLE codecs5;
DROP TABLE codecs6;
DROP TABLE codecs7;
DROP TABLE codecs8;
2023-01-24 10:35:23 +00:00
DROP TABLE codecs9;
DROP TABLE codecs10;
DROP TABLE codecs11;