ClickHouse/tests/queries/0_stateless/00804_test_delta_codec_compression.sql
Smita Kulkarni 040f2b62d9 Enable compress marks and primary key
Implementation:
* Set compress_marks and compress_primary_key to true by default.
* Updated getIndexExtensionFromFilesystem to only check for compressed file extension , else return default. When both are present, this function was returning uncompressed file extension giving rise to error as file got removed.
Testing:
* Updated tests where bytes size or compressed data is checked to check with compress_marks and compress_primary_key set to false.
2022-11-16 14:23:58 +01:00

118 lines
4.4 KiB
SQL

-- Tags: no-parallel
SET send_logs_level = 'fatal';
SET joined_subquery_requires_alias = 0;
DROP TABLE IF EXISTS delta_codec_synthetic;
DROP TABLE IF EXISTS default_codec_synthetic;
CREATE TABLE delta_codec_synthetic
(
id UInt64 Codec(Delta, ZSTD(3))
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
CREATE TABLE default_codec_synthetic
(
id UInt64 Codec(ZSTD(3))
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000;
OPTIMIZE TABLE delta_codec_synthetic FINAL;
OPTIMIZE TABLE default_codec_synthetic FINAL;
SELECT
floor(big_size / small_size) AS ratio
FROM
(SELECT 1 AS key, sum(bytes_on_disk) AS small_size FROM system.parts WHERE database == currentDatabase() and table == 'delta_codec_synthetic' and active)
INNER JOIN
(SELECT 1 AS key, sum(bytes_on_disk) as big_size FROM system.parts WHERE database == currentDatabase() and table == 'default_codec_synthetic' and active)
USING(key);
SELECT
small_hash == big_hash
FROM
(SELECT 1 AS key, sum(cityHash64(*)) AS small_hash FROM delta_codec_synthetic)
INNER JOIN
(SELECT 1 AS key, sum(cityHash64(*)) AS big_hash FROM default_codec_synthetic)
USING(key);
DROP TABLE IF EXISTS delta_codec_synthetic;
DROP TABLE IF EXISTS default_codec_synthetic;
DROP TABLE IF EXISTS delta_codec_float;
DROP TABLE IF EXISTS default_codec_float;
CREATE TABLE delta_codec_float
(
id Float64 Codec(Delta, LZ4HC)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
CREATE TABLE default_codec_float
(
id Float64 Codec(LZ4HC)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0;
INSERT INTO default_codec_float SELECT * from delta_codec_float;
OPTIMIZE TABLE delta_codec_float FINAL;
OPTIMIZE TABLE default_codec_float FINAL;
SELECT
floor(big_size / small_size) as ratio
FROM
(SELECT 1 AS key, sum(bytes_on_disk) AS small_size FROM system.parts WHERE database = currentDatabase() and table = 'delta_codec_float' and active)
INNER JOIN
(SELECT 1 AS key, sum(bytes_on_disk) as big_size FROM system.parts WHERE database = currentDatabase() and table = 'default_codec_float' and active) USING(key);
SELECT
small_hash == big_hash
FROM
(SELECT 1 AS key, sum(cityHash64(*)) AS small_hash FROM delta_codec_float)
INNER JOIN
(SELECT 1 AS key, sum(cityHash64(*)) AS big_hash FROM default_codec_float)
USING(key);
DROP TABLE IF EXISTS delta_codec_float;
DROP TABLE IF EXISTS default_codec_float;
DROP TABLE IF EXISTS delta_codec_string;
DROP TABLE IF EXISTS default_codec_string;
CREATE TABLE delta_codec_string
(
id Float64 Codec(Delta, LZ4)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
CREATE TABLE default_codec_string
(
id Float64 Codec(LZ4)
) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false;
INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000);
INSERT INTO default_codec_string SELECT * from delta_codec_string;
OPTIMIZE TABLE delta_codec_string FINAL;
OPTIMIZE TABLE default_codec_string FINAL;
SELECT
floor(big_size / small_size) as ratio
FROM
(SELECT 1 AS key, sum(bytes_on_disk) AS small_size FROM system.parts WHERE database = currentDatabase() and table = 'delta_codec_string' and active)
INNER JOIN
(SELECT 1 AS key, sum(bytes_on_disk) as big_size FROM system.parts WHERE database = currentDatabase() and table = 'default_codec_string' and active) USING(key);
SELECT
small_hash == big_hash
FROM
(SELECT 1 AS key, sum(cityHash64(*)) AS small_hash FROM delta_codec_string)
INNER JOIN
(SELECT 1 AS key, sum(cityHash64(*)) AS big_hash FROM default_codec_string)
USING(key);
DROP TABLE IF EXISTS delta_codec_string;
DROP TABLE IF EXISTS default_codec_string;