mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #68615 from pamarcos/secure-name-collections-storage
Add storage encryption for named collections
This commit is contained in:
commit
4965f3403b
@ -73,13 +73,21 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen
|
||||
|
||||
### Storage for named collections
|
||||
|
||||
Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used.
|
||||
Named collections can either be stored on local disk or in ZooKeeper/Keeper. By default local storage is used.
|
||||
They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system),
|
||||
where `aes_128_ctr` is used by default.
|
||||
|
||||
To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file:
|
||||
To configure named collections storage you need to specify a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage,
|
||||
you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`.
|
||||
|
||||
To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to
|
||||
`named_collections_storage` section in configuration file. The following example uses encryption and ZooKeeper/Keeper:
|
||||
```
|
||||
<clickhouse>
|
||||
<named_collections_storage>
|
||||
<type>zookeeper</type>
|
||||
<type>zookeeper_encrypted</type>
|
||||
<key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
|
||||
<algorithm>aes_128_ctr</algorithm>
|
||||
<path>/named_collections_path/</path>
|
||||
<update_timeout_ms>1000</update_timeout_ms>
|
||||
</named_collections_storage>
|
||||
@ -315,7 +323,7 @@ The description of parameters see [postgresql](../sql-reference/table-functions/
|
||||
Parameter `addresses_expr` is used in a collection instead of `host:port`. The parameter is optional, because there are other optional ones: `host`, `hostname`, `port`. The following pseudo code explains the priority:
|
||||
|
||||
```sql
|
||||
CASE
|
||||
CASE
|
||||
WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr']
|
||||
WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432')
|
||||
WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432')
|
||||
@ -496,7 +504,7 @@ kafka_topic_list = 'kafka_topic',
|
||||
kafka_group_name = 'consumer_group',
|
||||
kafka_format = 'JSONEachRow',
|
||||
kafka_max_block_size = '1048576';
|
||||
|
||||
|
||||
```
|
||||
### XML example
|
||||
|
||||
|
@ -6,14 +6,18 @@
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/FileEncryptionCommon.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <filesystem>
|
||||
#include <boost/algorithm/hex.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@ -26,6 +30,7 @@ namespace ErrorCodes
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
static const std::string named_collections_storage_config_path = "named_collections_storage";
|
||||
@ -74,9 +79,9 @@ public:
|
||||
};
|
||||
|
||||
|
||||
class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext
|
||||
class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, protected WithContext
|
||||
{
|
||||
private:
|
||||
protected:
|
||||
std::string root_path;
|
||||
|
||||
public:
|
||||
@ -126,6 +131,11 @@ public:
|
||||
ReadBufferFromFile in(getPath(file_name));
|
||||
std::string data;
|
||||
readStringUntilEOF(data, in);
|
||||
return readHook(data);
|
||||
}
|
||||
|
||||
virtual std::string readHook(const std::string & data) const
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
@ -142,8 +152,9 @@ public:
|
||||
fs::create_directories(root_path);
|
||||
|
||||
auto tmp_path = getPath(file_name + ".tmp");
|
||||
WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
|
||||
writeString(data, out);
|
||||
auto write_data = writeHook(data);
|
||||
WriteBufferFromFile out(tmp_path, write_data.size(), O_WRONLY | O_CREAT | O_EXCL);
|
||||
writeString(write_data, out);
|
||||
|
||||
out.next();
|
||||
if (getContext()->getSettingsRef().fsync_metadata)
|
||||
@ -153,6 +164,11 @@ public:
|
||||
fs::rename(tmp_path, getPath(file_name));
|
||||
}
|
||||
|
||||
virtual std::string writeHook(const std::string & data) const
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
void remove(const std::string & file_name) override
|
||||
{
|
||||
if (!removeIfExists(file_name))
|
||||
@ -168,7 +184,7 @@ public:
|
||||
return fs::remove(getPath(file_name));
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
std::string getPath(const std::string & file_name) const
|
||||
{
|
||||
const auto file_name_as_path = fs::path(file_name);
|
||||
@ -178,6 +194,7 @@ private:
|
||||
return fs::path(root_path) / file_name_as_path;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Delete .tmp files. They could be left undeleted in case of
|
||||
/// some exception or abrupt server restart.
|
||||
void cleanup()
|
||||
@ -194,8 +211,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext
|
||||
class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, protected WithContext
|
||||
{
|
||||
private:
|
||||
std::string root_path;
|
||||
@ -275,18 +291,25 @@ public:
|
||||
|
||||
std::string read(const std::string & file_name) const override
|
||||
{
|
||||
return getClient()->get(getPath(file_name));
|
||||
auto data = getClient()->get(getPath(file_name));
|
||||
return readHook(data);
|
||||
}
|
||||
|
||||
virtual std::string readHook(const std::string & data) const
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
void write(const std::string & file_name, const std::string & data, bool replace) override
|
||||
{
|
||||
auto write_data = writeHook(data);
|
||||
if (replace)
|
||||
{
|
||||
getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent);
|
||||
getClient()->createOrUpdate(getPath(file_name), write_data, zkutil::CreateMode::Persistent);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent);
|
||||
auto code = getClient()->tryCreate(getPath(file_name), write_data, zkutil::CreateMode::Persistent);
|
||||
|
||||
if (code == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
@ -298,6 +321,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual std::string writeHook(const std::string & data) const
|
||||
{
|
||||
return data;
|
||||
}
|
||||
|
||||
void remove(const std::string & file_name) override
|
||||
{
|
||||
getClient()->remove(getPath(file_name));
|
||||
@ -334,6 +362,93 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
#if USE_SSL
|
||||
|
||||
template <typename BaseMetadataStorage>
|
||||
class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage
|
||||
{
|
||||
public:
|
||||
NamedCollectionsMetadataStorageEncrypted(ContextPtr context_, const std::string & path_)
|
||||
: BaseMetadataStorage(context_, path_)
|
||||
{
|
||||
const auto & config = BaseMetadataStorage::getContext()->getConfigRef();
|
||||
auto key_hex = config.getRawString("named_collections_storage.key_hex", "");
|
||||
try
|
||||
{
|
||||
key = boost::algorithm::unhex(key_hex);
|
||||
key_fingerprint = FileEncryption::calculateKeyFingerprint(key);
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
|
||||
}
|
||||
|
||||
algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr"));
|
||||
}
|
||||
|
||||
std::string readHook(const std::string & data) const override
|
||||
{
|
||||
ReadBufferFromString in(data);
|
||||
Memory<> encrypted_buffer(data.length());
|
||||
|
||||
FileEncryption::Header header;
|
||||
try
|
||||
{
|
||||
header.read(in);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While reading the header of encrypted data");
|
||||
throw;
|
||||
}
|
||||
|
||||
size_t bytes_read = 0;
|
||||
while (bytes_read < encrypted_buffer.size() && !in.eof())
|
||||
{
|
||||
bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read);
|
||||
}
|
||||
|
||||
std::string decrypted_buffer;
|
||||
decrypted_buffer.resize(bytes_read);
|
||||
FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
|
||||
encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data());
|
||||
|
||||
return decrypted_buffer;
|
||||
}
|
||||
|
||||
std::string writeHook(const std::string & data) const override
|
||||
{
|
||||
FileEncryption::Header header{
|
||||
.algorithm = algorithm,
|
||||
.key_fingerprint = key_fingerprint,
|
||||
.init_vector = FileEncryption::InitVector::random()
|
||||
};
|
||||
|
||||
FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
|
||||
WriteBufferFromOwnString out;
|
||||
header.write(out);
|
||||
encryptor.encrypt(data.data(), data.size(), out);
|
||||
return std::string(out.str());
|
||||
}
|
||||
|
||||
private:
|
||||
std::string key;
|
||||
UInt128 key_fingerprint;
|
||||
FileEncryption::Algorithm algorithm;
|
||||
};
|
||||
|
||||
class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::LocalStorage>
|
||||
{
|
||||
using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::LocalStorage>::NamedCollectionsMetadataStorageEncrypted;
|
||||
};
|
||||
|
||||
class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>
|
||||
{
|
||||
using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>::NamedCollectionsMetadataStorageEncrypted;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage(
|
||||
std::shared_ptr<INamedCollectionsStorage> storage_,
|
||||
ContextPtr context_)
|
||||
@ -495,7 +610,7 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
|
||||
const auto & config = context_->getConfigRef();
|
||||
const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local");
|
||||
|
||||
if (storage_type == "local")
|
||||
if (storage_type == "local" || storage_type == "local_encrypted")
|
||||
{
|
||||
const auto path = config.getString(
|
||||
named_collections_storage_config_path + ".path",
|
||||
@ -504,14 +619,36 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
|
||||
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
|
||||
"Using local storage for named collections at path: {}", path);
|
||||
|
||||
auto local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
|
||||
std::unique_ptr<INamedCollectionsStorage> local_storage;
|
||||
if (storage_type == "local")
|
||||
local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
|
||||
else if (storage_type == "local_encrypted")
|
||||
{
|
||||
#if USE_SSL
|
||||
local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorageEncrypted>(context_, path);
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support");
|
||||
#endif
|
||||
}
|
||||
|
||||
return std::unique_ptr<NamedCollectionsMetadataStorage>(
|
||||
new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
|
||||
}
|
||||
if (storage_type == "zookeeper" || storage_type == "keeper")
|
||||
if (storage_type == "zookeeper" || storage_type == "keeper" || storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted")
|
||||
{
|
||||
const auto path = config.getString(named_collections_storage_config_path + ".path");
|
||||
auto zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
|
||||
|
||||
std::unique_ptr<INamedCollectionsStorage> zk_storage;
|
||||
if (!storage_type.ends_with("_encrypted"))
|
||||
zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
|
||||
else
|
||||
{
|
||||
#if USE_SSL
|
||||
zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted>(context_, path);
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support");
|
||||
#endif
|
||||
}
|
||||
|
||||
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
|
||||
"Using zookeeper storage for named collections at path: {}", path);
|
||||
|
@ -35,7 +35,9 @@ public:
|
||||
private:
|
||||
class INamedCollectionsStorage;
|
||||
class LocalStorage;
|
||||
class LocalStorageEncrypted;
|
||||
class ZooKeeperStorage;
|
||||
class ZooKeeperStorageEncrypted;
|
||||
|
||||
std::shared_ptr<INamedCollectionsStorage> storage;
|
||||
|
||||
|
@ -0,0 +1,12 @@
|
||||
<clickhouse>
|
||||
<named_collections_storage>
|
||||
<type>local_encrypted</type>
|
||||
<key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
|
||||
</named_collections_storage>
|
||||
|
||||
<named_collections>
|
||||
<collection1>
|
||||
<key1>value1</key1>
|
||||
</collection1>
|
||||
</named_collections>
|
||||
</clickhouse>
|
@ -0,0 +1,31 @@
|
||||
<clickhouse>
|
||||
<named_collections_storage>
|
||||
<type>zookeeper_encrypted</type>
|
||||
<key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
|
||||
<path>/named_collections_path/</path>
|
||||
<update_timeout_ms>5000</update_timeout_ms>
|
||||
</named_collections_storage>
|
||||
|
||||
<named_collections>
|
||||
<collection1>
|
||||
<key1>value1</key1>
|
||||
</collection1>
|
||||
</named_collections>
|
||||
|
||||
<remote_servers>
|
||||
<replicated_nc_nodes_cluster>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>node_with_keeper</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>node_with_keeper_2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<allow_distributed_ddl_queries>true</allow_distributed_ddl_queries>
|
||||
</replicated_nc_nodes_cluster>
|
||||
</remote_servers>
|
||||
</clickhouse>
|
@ -0,0 +1,17 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<ignore_on_cluster_for_replicated_named_collections_queries>0</ignore_on_cluster_for_replicated_named_collections_queries>
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<profile>default</profile>
|
||||
<quota>default</quota>
|
||||
<named_collection_control>1</named_collection_control>
|
||||
<show_named_collections>1</show_named_collections>
|
||||
<show_named_collections_secrets>1</show_named_collections_secrets>
|
||||
</default>
|
||||
</users>
|
||||
</clickhouse>
|
123
tests/integration/test_named_collections_encrypted/test.py
Normal file
123
tests/integration/test_named_collections_encrypted/test.py
Normal file
@ -0,0 +1,123 @@
|
||||
import logging
|
||||
import pytest
|
||||
import os
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
NAMED_COLLECTIONS_CONFIG = os.path.join(
|
||||
SCRIPT_DIR, "./configs/config.d/named_collections.xml"
|
||||
)
|
||||
|
||||
ZK_PATH = "/named_collections_path"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance(
|
||||
"node_encrypted",
|
||||
main_configs=[
|
||||
"configs/config.d/named_collections_encrypted.xml",
|
||||
],
|
||||
user_configs=[
|
||||
"configs/users.d/users.xml",
|
||||
],
|
||||
stay_alive=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
"node_with_keeper_encrypted",
|
||||
main_configs=[
|
||||
"configs/config.d/named_collections_with_zookeeper_encrypted.xml",
|
||||
],
|
||||
user_configs=[
|
||||
"configs/users.d/users.xml",
|
||||
],
|
||||
stay_alive=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
"node_with_keeper_2_encrypted",
|
||||
main_configs=[
|
||||
"configs/config.d/named_collections_with_zookeeper_encrypted.xml",
|
||||
],
|
||||
user_configs=[
|
||||
"configs/users.d/users.xml",
|
||||
],
|
||||
stay_alive=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
|
||||
logging.info("Starting cluster...")
|
||||
cluster.start()
|
||||
logging.info("Cluster started")
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def check_encrypted_content(node, zk=None):
|
||||
assert (
|
||||
"collection1\ncollection2"
|
||||
== node.query("select name from system.named_collections").strip()
|
||||
)
|
||||
|
||||
assert (
|
||||
"['key1','key2']"
|
||||
== node.query(
|
||||
"select mapKeys(collection) from system.named_collections where name = 'collection2'"
|
||||
).strip()
|
||||
)
|
||||
|
||||
assert (
|
||||
"1234\tvalue2"
|
||||
== node.query(
|
||||
"select collection['key1'], collection['key2'] from system.named_collections where name = 'collection2'"
|
||||
).strip()
|
||||
)
|
||||
|
||||
# Check that the underlying storage is encrypted
|
||||
content = (
|
||||
zk.get(ZK_PATH + "/collection2.sql")[0]
|
||||
if zk is not None
|
||||
else open(
|
||||
f"{node.path}/database/named_collections/collection2.sql", "rb"
|
||||
).read()
|
||||
)
|
||||
|
||||
assert (
|
||||
content[0:3] == b"ENC"
|
||||
) # file signature (aka magic number) of the encrypted file
|
||||
assert b"key1" not in content
|
||||
assert b"1234" not in content
|
||||
assert b"key2" not in content
|
||||
assert b"value2" not in content
|
||||
|
||||
|
||||
def test_local_storage_encrypted(cluster):
|
||||
node = cluster.instances["node_encrypted"]
|
||||
node.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'")
|
||||
|
||||
check_encrypted_content(node)
|
||||
node.restart_clickhouse()
|
||||
check_encrypted_content(node)
|
||||
|
||||
node.query("DROP NAMED COLLECTION collection2")
|
||||
|
||||
|
||||
def test_zookeper_storage_encrypted(cluster):
|
||||
node1 = cluster.instances["node_with_keeper_encrypted"]
|
||||
node2 = cluster.instances["node_with_keeper_2_encrypted"]
|
||||
zk = cluster.get_kazoo_client("zoo1")
|
||||
|
||||
node1.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'")
|
||||
|
||||
check_encrypted_content(node1, zk)
|
||||
check_encrypted_content(node2, zk)
|
||||
node1.restart_clickhouse()
|
||||
node2.restart_clickhouse()
|
||||
check_encrypted_content(node1, zk)
|
||||
check_encrypted_content(node2, zk)
|
||||
|
||||
node1.query("DROP NAMED COLLECTION collection2")
|
Loading…
Reference in New Issue
Block a user