ClickHouse/tests/integration/test_manipulate_statistics/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

125 lines
4.1 KiB
Python
Raw Normal View History

2023-09-26 17:16:01 +00:00
import pytest
import logging
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
2023-09-26 22:48:47 +00:00
node1 = cluster.add_instance(
2024-03-15 15:32:57 +00:00
"node1", user_configs=["config/config.xml"], with_zookeeper=False
2023-09-26 22:48:47 +00:00
)
2023-09-26 17:16:01 +00:00
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
2023-09-26 22:48:47 +00:00
2023-09-26 17:16:01 +00:00
def check_stat_file_on_disk(node, table, part_name, column_name, exist):
part_path = node.query(
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
table, part_name
)
).strip()
assert len(part_path) != 0
2023-09-26 17:16:01 +00:00
output = node.exec_in_container(
[
"bash",
"-c",
2024-05-21 14:03:38 +00:00
"find {p} -type f -name statistics_{col}.stats".format(
2023-09-26 17:16:01 +00:00
p=part_path, col=column_name
),
],
privileged=True,
)
2023-09-26 22:48:47 +00:00
logging.debug(
2024-05-21 14:03:38 +00:00
f"Checking stats file in {part_path} for column {column_name}, got {output}"
2023-09-26 22:48:47 +00:00
)
2023-09-26 17:16:01 +00:00
if exist:
assert len(output) != 0
else:
assert len(output) == 0
2023-09-26 22:48:47 +00:00
def run_test_single_node(started_cluster):
2023-09-26 17:16:01 +00:00
node1.query("INSERT INTO test_stat VALUES (1,2,3), (4,5,6)")
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "a", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "b", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0", "c", True)
2024-04-24 16:20:48 +00:00
node1.query("ALTER TABLE test_stat DROP STATISTICS a")
2023-09-26 17:16:01 +00:00
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "a", False)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "b", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_2", "c", True)
2024-04-24 16:20:48 +00:00
node1.query("ALTER TABLE test_stat CLEAR STATISTICS b, c")
2023-09-26 17:16:01 +00:00
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_3", "a", False)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_3", "b", False)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_3", "c", False)
2024-04-24 16:20:48 +00:00
node1.query("ALTER TABLE test_stat MATERIALIZE STATISTICS b, c")
2023-09-26 17:16:01 +00:00
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_4", "a", False)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_4", "b", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_4", "c", True)
2023-09-26 17:16:01 +00:00
2024-04-24 16:20:48 +00:00
node1.query("ALTER TABLE test_stat ADD STATISTICS a type tdigest")
node1.query("ALTER TABLE test_stat MATERIALIZE STATISTICS a")
2023-09-26 17:16:01 +00:00
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_5", "a", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_5", "b", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_5", "c", True)
node1.query("ALTER TABLE test_stat DROP COLUMN c")
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "a", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "b", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_6", "c", False)
2023-10-03 22:58:26 +00:00
node1.query("ALTER TABLE test_stat RENAME COLUMN b TO c")
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_7", "a", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_7", "b", False)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_7", "c", True)
node1.query("ALTER TABLE test_stat RENAME COLUMN c TO b")
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_8", "a", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_8", "b", True)
check_stat_file_on_disk(node1, "test_stat", "all_1_1_0_8", "c", False)
def test_single_node_wide(started_cluster):
node1.query("DROP TABLE IF EXISTS test_stat")
node1.query(
"""
2024-04-24 16:20:48 +00:00
CREATE TABLE test_stat(a Int64 STATISTICS(tdigest), b Int64 STATISTICS(tdigest), c Int64 STATISTICS(tdigest))
ENGINE = MergeTree() ORDER BY a
SETTINGS min_bytes_for_wide_part = 0;
"""
)
run_test_single_node(started_cluster)
def test_single_node_normal(started_cluster):
node1.query("DROP TABLE IF EXISTS test_stat")
node1.query(
"""
2024-04-24 16:20:48 +00:00
CREATE TABLE test_stat(a Int64 STATISTICS(tdigest), b Int64 STATISTICS(tdigest), c Int64 STATISTICS(tdigest))
ENGINE = MergeTree() ORDER BY a;
"""
)
run_test_single_node(started_cluster)