2020-08-28 17:40:45 +00:00
|
|
|
import random
|
2020-09-16 04:26:10 +00:00
|
|
|
import string
|
2021-11-11 08:12:54 +00:00
|
|
|
import logging
|
2020-09-16 04:26:10 +00:00
|
|
|
import pytest
|
2021-11-11 08:12:54 +00:00
|
|
|
import time
|
2020-08-28 17:40:45 +00:00
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
node1 = cluster.add_instance(
|
|
|
|
"node1",
|
2023-08-07 15:00:25 +00:00
|
|
|
main_configs=[
|
|
|
|
"configs/default_compression.xml",
|
|
|
|
"configs/wide_parts_only.xml",
|
|
|
|
"configs/long_names.xml",
|
|
|
|
],
|
2022-03-22 16:39:58 +00:00
|
|
|
with_zookeeper=True,
|
|
|
|
)
|
|
|
|
node2 = cluster.add_instance(
|
|
|
|
"node2",
|
2023-08-07 15:00:25 +00:00
|
|
|
main_configs=[
|
|
|
|
"configs/default_compression.xml",
|
|
|
|
"configs/wide_parts_only.xml",
|
|
|
|
"configs/long_names.xml",
|
|
|
|
],
|
2022-03-22 16:39:58 +00:00
|
|
|
with_zookeeper=True,
|
|
|
|
)
|
|
|
|
node4 = cluster.add_instance("node4")
|
|
|
|
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def start_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
def get_compression_codec_byte(node, table_name, part_name):
|
2020-09-16 04:26:10 +00:00
|
|
|
cmd = "tail -c +17 /var/lib/clickhouse/data/default/{}/{}/data1.bin | od -x -N 1 | head -n 1 | awk '{{print $2}}'".format(
|
2022-03-22 16:39:58 +00:00
|
|
|
table_name, part_name
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
return node.exec_in_container(["bash", "-c", cmd]).strip()
|
|
|
|
|
|
|
|
|
|
|
|
def get_second_multiple_codec_byte(node, table_name, part_name):
|
2020-09-16 04:26:10 +00:00
|
|
|
cmd = "tail -c +17 /var/lib/clickhouse/data/default/{}/{}/data1.bin | od -x -j 11 -N 1 | head -n 1 | awk '{{print $2}}'".format(
|
2022-03-22 16:39:58 +00:00
|
|
|
table_name, part_name
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
return node.exec_in_container(["bash", "-c", cmd]).strip()
|
|
|
|
|
|
|
|
|
|
|
|
def get_random_string(length):
|
2022-03-22 16:39:58 +00:00
|
|
|
return "".join(
|
|
|
|
random.choice(string.ascii_uppercase + string.digits) for _ in range(length)
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
CODECS_MAPPING = {
|
2022-03-22 16:39:58 +00:00
|
|
|
"LZ4": "0082",
|
|
|
|
"LZ4HC": "0082", # not an error, same byte
|
|
|
|
"ZSTD": "0090",
|
|
|
|
"Multiple": "0091",
|
2020-08-28 17:40:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def test_default_codec_single(start_cluster):
|
|
|
|
for i, node in enumerate([node1, node2]):
|
2022-03-22 16:39:58 +00:00
|
|
|
node.query(
|
|
|
|
"""
|
2020-08-28 17:40:45 +00:00
|
|
|
CREATE TABLE compression_table (
|
|
|
|
key UInt64,
|
|
|
|
data1 String CODEC(Default)
|
|
|
|
) ENGINE = ReplicatedMergeTree('/t', '{}') ORDER BY tuple() PARTITION BY key;
|
2022-03-22 16:39:58 +00:00
|
|
|
""".format(
|
|
|
|
i
|
|
|
|
)
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
# ZSTD(10) and ZSTD(10) after merge
|
|
|
|
node1.query("INSERT INTO compression_table VALUES (1, 'x')")
|
|
|
|
|
|
|
|
# ZSTD(10) and LZ4HC(10) after merge
|
2022-03-22 16:39:58 +00:00
|
|
|
node1.query(
|
|
|
|
"INSERT INTO compression_table VALUES (2, '{}')".format(get_random_string(2048))
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
# ZSTD(10) and LZ4 after merge
|
2022-03-22 16:39:58 +00:00
|
|
|
node1.query(
|
|
|
|
"INSERT INTO compression_table VALUES (3, '{}')".format(
|
|
|
|
get_random_string(22048)
|
|
|
|
)
|
|
|
|
)
|
2020-08-26 15:29:46 +00:00
|
|
|
|
|
|
|
node2.query("SYSTEM SYNC REPLICA compression_table", timeout=15)
|
|
|
|
|
|
|
|
# to reload parts
|
|
|
|
node1.query("DETACH TABLE compression_table")
|
|
|
|
node2.query("DETACH TABLE compression_table")
|
|
|
|
|
|
|
|
node1.query("ATTACH TABLE compression_table")
|
|
|
|
node2.query("ATTACH TABLE compression_table")
|
|
|
|
|
|
|
|
node1.query("SYSTEM FLUSH LOGS")
|
|
|
|
node2.query("SYSTEM FLUSH LOGS")
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
# Same codec for all
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table", "1_0_0_0")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '1_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '1_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table", "2_0_0_0")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '2_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '2_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table", "3_0_0_0")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '3_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '3_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
# just to be sure that replication works
|
2020-08-26 15:29:46 +00:00
|
|
|
node1.query("OPTIMIZE TABLE compression_table FINAL")
|
|
|
|
|
2020-08-28 17:40:45 +00:00
|
|
|
node2.query("SYSTEM SYNC REPLICA compression_table", timeout=15)
|
|
|
|
|
2020-08-26 15:29:46 +00:00
|
|
|
# to reload parts
|
|
|
|
node1.query("DETACH TABLE compression_table")
|
|
|
|
node2.query("DETACH TABLE compression_table")
|
|
|
|
|
|
|
|
node1.query("ATTACH TABLE compression_table")
|
|
|
|
node2.query("ATTACH TABLE compression_table")
|
|
|
|
|
|
|
|
node1.query("SYSTEM FLUSH LOGS")
|
|
|
|
node2.query("SYSTEM FLUSH LOGS")
|
2020-08-28 17:40:45 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table", "1_0_0_1")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '1_0_0_1'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '1_0_0_1'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
2020-08-26 15:29:46 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table", "2_0_0_1")
|
|
|
|
== CODECS_MAPPING["LZ4HC"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '2_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4HC(5)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '2_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4HC(5)\n"
|
|
|
|
)
|
2020-08-26 15:29:46 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table", "3_0_0_1")
|
|
|
|
== CODECS_MAPPING["LZ4"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '3_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '3_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4\n"
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
assert node1.query("SELECT COUNT() FROM compression_table") == "3\n"
|
|
|
|
assert node2.query("SELECT COUNT() FROM compression_table") == "3\n"
|
|
|
|
|
2021-11-11 08:12:54 +00:00
|
|
|
node1.query("DROP TABLE compression_table SYNC")
|
|
|
|
node2.query("DROP TABLE compression_table SYNC")
|
|
|
|
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
def test_default_codec_multiple(start_cluster):
|
|
|
|
for i, node in enumerate([node1, node2]):
|
2022-03-22 16:39:58 +00:00
|
|
|
node.query(
|
|
|
|
"""
|
2020-08-28 17:40:45 +00:00
|
|
|
CREATE TABLE compression_table_multiple (
|
|
|
|
key UInt64,
|
|
|
|
data1 String CODEC(NONE, Default)
|
|
|
|
) ENGINE = ReplicatedMergeTree('/d', '{}') ORDER BY tuple() PARTITION BY key;
|
2022-03-22 16:39:58 +00:00
|
|
|
""".format(
|
|
|
|
i
|
|
|
|
),
|
|
|
|
settings={"allow_suspicious_codecs": 1},
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
# ZSTD(10) and ZSTD(10) after merge
|
|
|
|
node1.query("INSERT INTO compression_table_multiple VALUES (1, 'x')")
|
|
|
|
|
|
|
|
# ZSTD(10) and LZ4HC(10) after merge
|
2022-03-22 16:39:58 +00:00
|
|
|
node1.query(
|
|
|
|
"INSERT INTO compression_table_multiple VALUES (2, '{}')".format(
|
|
|
|
get_random_string(2048)
|
|
|
|
)
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
# ZSTD(10) and LZ4 after merge
|
2022-03-22 16:39:58 +00:00
|
|
|
node1.query(
|
|
|
|
"INSERT INTO compression_table_multiple VALUES (3, '{}')".format(
|
|
|
|
get_random_string(22048)
|
|
|
|
)
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
2023-05-15 16:28:12 +00:00
|
|
|
node2.query("SYSTEM SYNC REPLICA compression_table_multiple", timeout=15)
|
|
|
|
|
2020-08-28 17:40:45 +00:00
|
|
|
# Same codec for all
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table_multiple", "1_0_0_0")
|
|
|
|
== CODECS_MAPPING["Multiple"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
get_second_multiple_codec_byte(node1, "compression_table_multiple", "1_0_0_0")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '1_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '1_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table_multiple", "2_0_0_0")
|
|
|
|
== CODECS_MAPPING["Multiple"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
get_second_multiple_codec_byte(node1, "compression_table_multiple", "2_0_0_0")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '2_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '2_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table_multiple", "3_0_0_0")
|
|
|
|
== CODECS_MAPPING["Multiple"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
get_second_multiple_codec_byte(node1, "compression_table_multiple", "3_0_0_0")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '3_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '3_0_0_0'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
2020-08-28 17:40:45 +00:00
|
|
|
|
|
|
|
node2.query("SYSTEM SYNC REPLICA compression_table_multiple", timeout=15)
|
|
|
|
|
|
|
|
node1.query("OPTIMIZE TABLE compression_table_multiple FINAL")
|
|
|
|
|
2023-05-15 16:28:12 +00:00
|
|
|
node2.query("SYSTEM SYNC REPLICA compression_table_multiple", timeout=15)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table_multiple", "1_0_0_1")
|
|
|
|
== CODECS_MAPPING["Multiple"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
get_second_multiple_codec_byte(node1, "compression_table_multiple", "1_0_0_1")
|
|
|
|
== CODECS_MAPPING["ZSTD"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '1_0_0_1'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '1_0_0_1'"
|
|
|
|
)
|
|
|
|
== "ZSTD(10)\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table_multiple", "2_0_0_1")
|
|
|
|
== CODECS_MAPPING["Multiple"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
get_second_multiple_codec_byte(node1, "compression_table_multiple", "2_0_0_1")
|
|
|
|
== CODECS_MAPPING["LZ4HC"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '2_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4HC(5)\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '2_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4HC(5)\n"
|
|
|
|
)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
get_compression_codec_byte(node1, "compression_table_multiple", "3_0_0_1")
|
|
|
|
== CODECS_MAPPING["Multiple"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
get_second_multiple_codec_byte(node1, "compression_table_multiple", "3_0_0_1")
|
|
|
|
== CODECS_MAPPING["LZ4"]
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node1.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '3_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4\n"
|
|
|
|
)
|
|
|
|
assert (
|
|
|
|
node2.query(
|
|
|
|
"SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table_multiple' and name = '3_0_0_1'"
|
|
|
|
)
|
|
|
|
== "LZ4\n"
|
|
|
|
)
|
2020-08-25 15:02:32 +00:00
|
|
|
|
|
|
|
assert node1.query("SELECT COUNT() FROM compression_table_multiple") == "3\n"
|
|
|
|
assert node2.query("SELECT COUNT() FROM compression_table_multiple") == "3\n"
|
2020-08-28 09:07:20 +00:00
|
|
|
|
2021-11-11 08:12:54 +00:00
|
|
|
node1.query("DROP TABLE compression_table_multiple SYNC")
|
|
|
|
node2.query("DROP TABLE compression_table_multiple SYNC")
|
|
|
|
|
2020-08-28 09:07:20 +00:00
|
|
|
|
2020-12-23 11:53:49 +00:00
|
|
|
def test_default_codec_for_compact_parts(start_cluster):
|
2022-03-22 16:39:58 +00:00
|
|
|
node4.query(
|
|
|
|
"""
|
2020-12-23 11:53:49 +00:00
|
|
|
CREATE TABLE compact_parts_table (
|
|
|
|
key UInt64,
|
|
|
|
data String
|
|
|
|
)
|
|
|
|
ENGINE MergeTree ORDER BY tuple()
|
2022-03-22 16:39:58 +00:00
|
|
|
"""
|
|
|
|
)
|
2020-12-23 11:53:49 +00:00
|
|
|
|
|
|
|
node4.query("INSERT INTO compact_parts_table VALUES (1, 'Hello world')")
|
|
|
|
assert node4.query("SELECT COUNT() FROM compact_parts_table") == "1\n"
|
|
|
|
|
|
|
|
node4.query("ALTER TABLE compact_parts_table DETACH PART 'all_1_1_0'")
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
node4.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
|
|
|
"rm /var/lib/clickhouse/data/default/compact_parts_table/detached/all_1_1_0/default_compression_codec.txt",
|
|
|
|
]
|
|
|
|
)
|
2020-12-23 11:53:49 +00:00
|
|
|
|
|
|
|
node4.query("ALTER TABLE compact_parts_table ATTACH PART 'all_1_1_0'")
|
|
|
|
|
|
|
|
assert node4.query("SELECT COUNT() FROM compact_parts_table") == "1\n"
|
|
|
|
|
|
|
|
node4.query("DETACH TABLE compact_parts_table")
|
|
|
|
node4.query("ATTACH TABLE compact_parts_table")
|
|
|
|
|
|
|
|
assert node4.query("SELECT COUNT() FROM compact_parts_table") == "1\n"
|
2021-11-11 08:12:54 +00:00
|
|
|
node4.query("DROP TABLE compact_parts_table SYNC")
|