mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-15 03:53:41 +00:00
3903b442fb
The problem with chmod 000 is that it is simply ignored for the owner of the namespace (verified with kprobe for security_capable [1]), previously it worked only cause there was a check for uid explicitly in FS::canRead/canWrite. [1]: cat-10561 [001] 1340776.172944: security_capable_retprobe: (capable_wrt_inode_uidgid+0x40/0x70 <- security_capable) arg1=0xffffffff 0xffffffff is -1 and it is EPERM Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
128 lines
3.6 KiB
Python
128 lines
3.6 KiB
Python
import json
|
|
import random
|
|
import re
|
|
import string
|
|
import threading
|
|
import time
|
|
from multiprocessing.dummy import Pool
|
|
|
|
import pytest
|
|
from helpers.client import QueryRuntimeException
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
node1 = cluster.add_instance(
|
|
"node1",
|
|
main_configs=[
|
|
"configs/config.d/storage_configuration.xml",
|
|
],
|
|
with_zookeeper=True,
|
|
stay_alive=True,
|
|
tmpfs=["/jbod1:size=100M", "/jbod2:size=100M", "/jbod3:size=100M"],
|
|
macros={"shard": 0, "replica": 1},
|
|
)
|
|
|
|
|
|
node2 = cluster.add_instance(
|
|
"node2",
|
|
main_configs=["configs/config.d/storage_configuration.xml"],
|
|
with_zookeeper=True,
|
|
stay_alive=True,
|
|
tmpfs=["/jbod1:size=100M", "/jbod2:size=100M", "/jbod3:size=100M"],
|
|
macros={"shard": 0, "replica": 2},
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def start_cluster():
|
|
try:
|
|
cluster.start()
|
|
yield cluster
|
|
|
|
finally:
|
|
cluster.shutdown()
|
|
|
|
|
|
def test_jbod_ha(start_cluster):
|
|
try:
|
|
for i, node in enumerate([node1, node2]):
|
|
node.query(
|
|
"""
|
|
CREATE TABLE tbl (p UInt8, d String)
|
|
ENGINE = ReplicatedMergeTree('/clickhouse/tbl', '{}')
|
|
PARTITION BY p
|
|
ORDER BY tuple()
|
|
SETTINGS
|
|
storage_policy = 'jbod',
|
|
old_parts_lifetime = 1,
|
|
cleanup_delay_period = 1,
|
|
cleanup_delay_period_random_add = 2,
|
|
max_bytes_to_merge_at_max_space_in_pool = 4096
|
|
""".format(
|
|
i
|
|
)
|
|
)
|
|
|
|
for i in range(50):
|
|
# around 1k per block
|
|
node1.query(
|
|
"insert into tbl select randConstant() % 2, randomPrintableASCII(16) from numbers(50)"
|
|
)
|
|
|
|
node2.query("SYSTEM SYNC REPLICA tbl", timeout=10)
|
|
|
|
# Mimic disk failure
|
|
#
|
|
# NOTE: you cannot do one of the following:
|
|
# - chmod 000 - this will not block access to the owner of the namespace,
|
|
# and running clickhouse from non-root user is very tricky in this
|
|
# sandbox.
|
|
# - unmount it, to replace with something else because in this case you
|
|
# will loose tmpfs and besides clickhouse works from root, so it will
|
|
# still be able to write/read from/to it.
|
|
#
|
|
# So it simply mounts over tmpfs, proc, and this will throw exception
|
|
# for read, because there is no such file and does not allows writes
|
|
# either.
|
|
node1.exec_in_container(
|
|
["bash", "-c", "mount -t proc proc /jbod1"], privileged=True, user="root"
|
|
)
|
|
|
|
time.sleep(3)
|
|
|
|
# after 3 seconds jbod1 will be set as broken disk. Let's wait for another 5 seconds for data to be recovered
|
|
time.sleep(5)
|
|
|
|
assert (
|
|
int(
|
|
node1.query("select total_space from system.disks where name = 'jbod1'")
|
|
)
|
|
== 0
|
|
)
|
|
|
|
assert int(node1.query("select count(p) from tbl")) == 2500
|
|
|
|
# Mimic disk recovery
|
|
#
|
|
# NOTE: this will unmount only proc from /jbod1 and leave tmpfs
|
|
node1.exec_in_container(
|
|
["bash", "-c", "umount /jbod1"],
|
|
privileged=True,
|
|
user="root",
|
|
)
|
|
|
|
node1.restart_clickhouse()
|
|
time.sleep(5)
|
|
|
|
assert (
|
|
int(
|
|
node1.query("select total_space from system.disks where name = 'jbod1'")
|
|
)
|
|
> 0
|
|
)
|
|
|
|
finally:
|
|
for node in [node1, node2]:
|
|
node.query("DROP TABLE IF EXISTS tbl SYNC")
|