ClickHouse/tests/integration/test_encrypted_disk/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

298 lines
9.0 KiB
Python
Raw Normal View History

import pytest
from helpers.cluster import ClickHouseCluster
from helpers.client import QueryRuntimeException
from helpers.test_tools import assert_eq_with_retry
FIRST_PART_NAME = "all_1_1_0"
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/storage.xml"],
tmpfs=["/disk:size=100M"],
with_minio=True,
stay_alive=True,
)
@pytest.fixture(scope="module", autouse=True)
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
2021-05-23 13:31:48 +00:00
@pytest.fixture(autouse=True)
def cleanup_after_test():
try:
yield
finally:
node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY")
@pytest.mark.parametrize(
"policy",
["encrypted_policy", "encrypted_policy_key192b", "local_policy", "s3_policy"],
)
def test_encrypted_disk(policy):
node.query(
"""
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS storage_policy='{}'
""".format(
policy
)
)
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')")
node.query("OPTIMIZE TABLE encrypted_test FINAL")
assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"
@pytest.mark.parametrize(
"policy, destination_disks",
[
(
"local_policy",
[
"disk_local_encrypted",
"disk_local_encrypted2",
"disk_local_encrypted_key192b",
"disk_local",
],
),
("s3_policy", ["disk_s3_encrypted", "disk_s3"]),
],
)
def test_part_move(policy, destination_disks):
node.query(
"""
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS storage_policy='{}'
""".format(
policy
)
)
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
2021-07-17 13:35:15 +00:00
for destination_disk in destination_disks:
node.query(
"ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(
FIRST_PART_NAME, destination_disk
)
)
2021-07-17 13:35:15 +00:00
assert node.query(select_query) == "(0,'data'),(1,'data')"
with pytest.raises(QueryRuntimeException) as exc:
node.query(
"ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(
FIRST_PART_NAME, destination_disk
)
)
assert "Part '{}' is already on disk '{}'".format(
FIRST_PART_NAME, destination_disk
) in str(exc.value)
assert node.query(select_query) == "(0,'data'),(1,'data')"
@pytest.mark.parametrize(
"policy,encrypted_disk",
[("local_policy", "disk_local_encrypted"), ("s3_policy", "disk_s3_encrypted")],
)
def test_optimize_table(policy, encrypted_disk):
node.query(
"""
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS storage_policy='{}'
""".format(
policy
)
)
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
node.query(
"ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(
FIRST_PART_NAME, encrypted_disk
)
)
assert node.query(select_query) == "(0,'data'),(1,'data')"
node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')")
node.query("OPTIMIZE TABLE encrypted_test FINAL")
with pytest.raises(QueryRuntimeException) as exc:
node.query(
"ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(
FIRST_PART_NAME, encrypted_disk
)
)
assert "Part {} is not exists or not active".format(FIRST_PART_NAME) in str(
exc.value
)
assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"
# Test adding encryption key on the fly.
def test_add_key():
def make_storage_policy_with_keys(policy_name, keys):
node.exec_in_container(
[
"bash",
"-c",
"""cat > /etc/clickhouse-server/config.d/storage_policy_{policy_name}.xml << EOF
<clickhouse>
<storage_configuration>
<disks>
<{policy_name}_disk>
<type>encrypted</type>
<disk>disk_local</disk>
<path>{policy_name}_dir/</path>
{keys}
</{policy_name}_disk>
</disks>
<policies>
<{policy_name}>
<volumes>
<main>
<disk>{policy_name}_disk</disk>
</main>
</volumes>
</{policy_name}>
</policies>
</storage_configuration>
</clickhouse>
EOF""".format(
policy_name=policy_name, keys=keys
),
]
)
node.query("SYSTEM RELOAD CONFIG")
# Add some data to an encrypted disk.
node.query("SELECT policy_name FROM system.storage_policies")
make_storage_policy_with_keys(
"encrypted_policy_multikeys", "<key>firstfirstfirstf</key>"
)
assert_eq_with_retry(
node,
"SELECT policy_name FROM system.storage_policies WHERE policy_name='encrypted_policy_multikeys'",
"encrypted_policy_multikeys",
)
node.query(
"""
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS storage_policy='encrypted_policy_multikeys'
"""
)
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
# Add a second key and start using it.
make_storage_policy_with_keys(
"encrypted_policy_multikeys",
"""
<key id="0">firstfirstfirstf</key>
<key id="1">secondsecondseco</key>
<current_key_id>1</current_key_id>
""",
)
node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')")
# Now "(0,'data'),(1,'data')" is encrypted with the first key and "(2,'data'),(3,'data')" is encrypted with the second key.
# All data are accessible.
assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')"
# Try to replace the first key with something wrong, and check that "(0,'data'),(1,'data')" cannot be read.
make_storage_policy_with_keys(
"encrypted_policy_multikeys",
"""
<key id="0">wrongwrongwrongw</key>
<key id="1">secondsecondseco</key>
<current_key_id>1</current_key_id>
""",
)
expected_error = "Wrong key"
assert expected_error in node.query_and_get_error(select_query)
# Detach the part encrypted with the wrong key and check that another part containing "(2,'data'),(3,'data')" still can be read.
node.query("ALTER TABLE encrypted_test DETACH PART '{}'".format(FIRST_PART_NAME))
assert node.query(select_query) == "(2,'data'),(3,'data')"
2022-07-28 20:08:53 +00:00
def test_read_in_order():
node.query(
"CREATE TABLE encrypted_test(`a` UInt64, `b` String(150)) ENGINE = MergeTree() ORDER BY (a, b) SETTINGS storage_policy='encrypted_policy'"
)
node.query(
"INSERT INTO encrypted_test SELECT * FROM generateRandom('a UInt64, b FixedString(150)') LIMIT 100000"
)
node.query(
"SELECT * FROM encrypted_test ORDER BY a, b SETTINGS optimize_read_in_order=1 FORMAT Null"
)
node.query(
"SELECT * FROM encrypted_test ORDER BY a, b SETTINGS optimize_read_in_order=0 FORMAT Null"
)
def test_restart():
2023-03-27 14:54:40 +00:00
for policy in ["disk_s3_encrypted_default_path", "encrypted_s3_cache"]:
node.query(
f"""
DROP TABLE IF EXISTS encrypted_test;
CREATE TABLE encrypted_test (
id Int64,
data String
) ENGINE=MergeTree()
ORDER BY id
SETTINGS disk='{policy}'
"""
)
2023-03-27 14:54:40 +00:00
node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
2023-03-27 14:54:40 +00:00
node.restart_clickhouse()
2023-03-27 14:54:40 +00:00
assert node.query(select_query) == "(0,'data'),(1,'data')"
2023-03-24 17:53:00 +00:00
2023-03-27 14:54:40 +00:00
node.query("DROP TABLE encrypted_test NO DELAY;")