Merge pull request #57977 from bianpengyuan/add-s3-read-only-setting

Add a setting to specify s3 disk is read only.
This commit is contained in:
Alexey Milovidov 2023-12-28 01:24:50 +01:00 committed by GitHub
commit 30f6d03b37
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 154 additions and 2 deletions

View File

@ -22,11 +22,13 @@ struct S3ObjectStorageSettings
const S3Settings::RequestSettings & request_settings_,
uint64_t min_bytes_for_seek_,
int32_t list_object_keys_size_,
int32_t objects_chunk_size_to_delete_)
int32_t objects_chunk_size_to_delete_,
bool read_only_)
: request_settings(request_settings_)
, min_bytes_for_seek(min_bytes_for_seek_)
, list_object_keys_size(list_object_keys_size_)
, objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
, read_only(read_only_)
{}
S3Settings::RequestSettings request_settings;
@ -34,6 +36,7 @@ struct S3ObjectStorageSettings
uint64_t min_bytes_for_seek;
int32_t list_object_keys_size;
int32_t objects_chunk_size_to_delete;
bool read_only;
};
@ -166,6 +169,8 @@ public:
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
bool isReadOnly() const override { return s3_settings.get()->read_only; }
private:
void setNewSettings(std::unique_ptr<S3ObjectStorageSettings> && s3_settings_);

View File

@ -34,7 +34,8 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractC
request_settings,
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getInt(config_prefix + ".list_object_keys_size", 1000),
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000));
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
config.getBool(config_prefix + ".readonly", false));
}
std::unique_ptr<S3::Client> getClient(

View File

@ -0,0 +1,22 @@
<clickhouse>
<storage_configuration>
<disks>
<disk_s3_plain_readonly>
<type>s3_plain</type>
<endpoint>http://minio1:9001/root/data/disks/disk_s3_plain/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
<readonly>true</readonly>
</disk_s3_plain_readonly>
</disks>
<policies>
<s3_plain_readonly>
<volumes>
<main>
<disk>disk_s3_plain_readonly</disk>
</main>
</volumes>
</s3_plain_readonly>
</policies>
</storage_configuration>
</clickhouse>

View File

@ -0,0 +1,12 @@
<clickhouse>
<profiles>
<default>
<allow_experimental_database_replicated>1</allow_experimental_database_replicated>
</default>
</profiles>
<users>
<default>
<profile>default</profile>
</default>
</users>
</clickhouse>

View File

@ -0,0 +1,112 @@
import re
import os
import logging
import pytest
from helpers.cluster import ClickHouseCluster
from minio.error import S3Error
from pathlib import Path
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1",
main_configs=["configs/config.xml"],
user_configs=["configs/settings.xml"],
with_zookeeper=True,
with_minio=True,
stay_alive=True,
macros={"shard": 1, "replica": 1},
)
node2 = cluster.add_instance(
"node2",
main_configs=["configs/config.xml"],
user_configs=["configs/settings.xml"],
with_zookeeper=True,
with_minio=True,
stay_alive=True,
macros={"shard": 1, "replica": 2},
)
uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")
def upload_to_minio(minio_client, bucket_name, local_path, minio_path=""):
local_path = Path(local_path)
for root, _, files in os.walk(local_path):
for file in files:
local_file_path = Path(root) / file
minio_object_name = minio_path + str(
local_file_path.relative_to(local_path)
)
try:
with open(local_file_path, "rb") as data:
file_stat = os.stat(local_file_path)
minio_client.put_object(
bucket_name, minio_object_name, data, file_stat.st_size
)
logging.info(f"Uploaded {local_file_path} to {minio_object_name}")
except S3Error as e:
logging.error(f"Error uploading {local_file_path}: {e}")
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_attach_table_from_s3_plain_readonly(started_cluster):
# Create an atomic DB with mergetree sample data
node1.query(
"""
create database local_db;
create table local_db.test_table (num UInt32) engine=MergeTree() order by num;
insert into local_db.test_table (*) Values (5)
"""
)
assert int(node1.query("select num from local_db.test_table limit 1")) == 5
# Copy local MergeTree data into minio bucket
table_data_path = os.path.join(node1.path, f"database/store")
minio = cluster.minio_client
upload_to_minio(
minio, cluster.minio_bucket, table_data_path, "data/disks/disk_s3_plain/store/"
)
# Drop the non-replicated table, we don't need it anymore
table_uuid = node1.query(
"SELECT uuid FROM system.tables WHERE database='local_db' AND table='test_table'"
).strip()
node1.query("drop table local_db.test_table SYNC;")
# Create a replicated database
node1.query(
"create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica1');"
)
node2.query(
"create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica2');"
)
# Create a MergeTree table at one node, by attaching the merge tree data
node1.query(
f"""
attach table s3_plain_test_db.test_table UUID '{table_uuid}' (num UInt32)
engine=MergeTree()
order by num
settings storage_policy = 's3_plain_readonly'
"""
)
# Check that both nodes can query and get result.
assert int(node1.query("select num from s3_plain_test_db.test_table limit 1")) == 5
assert int(node2.query("select num from s3_plain_test_db.test_table limit 1")) == 5