Merge pull request #68615 from pamarcos/secure-name-collections-storage

Add storage encryption for named collections
This commit is contained in:
Pablo Marcos 2024-08-21 15:21:12 +00:00 committed by GitHub
commit 4965f3403b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 350 additions and 20 deletions

View File

@ -73,13 +73,21 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen
### Storage for named collections
Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used.
Named collections can either be stored on local disk or in ZooKeeper/Keeper. By default local storage is used.
They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system),
where `aes_128_ctr` is used by default.
To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file:
To configure named collections storage you need to specify a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage,
you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`.
To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to
`named_collections_storage` section in configuration file. The following example uses encryption and ZooKeeper/Keeper:
```
<clickhouse>
<named_collections_storage>
<type>zookeeper</type>
<type>zookeeper_encrypted</type>
<key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
<algorithm>aes_128_ctr</algorithm>
<path>/named_collections_path/</path>
<update_timeout_ms>1000</update_timeout_ms>
</named_collections_storage>
@ -315,7 +323,7 @@ The description of parameters see [postgresql](../sql-reference/table-functions/
Parameter `addresses_expr` is used in a collection instead of `host:port`. The parameter is optional, because there are other optional ones: `host`, `hostname`, `port`. The following pseudo code explains the priority:
```sql
CASE
CASE
WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr']
WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432')
WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432')
@ -496,7 +504,7 @@ kafka_topic_list = 'kafka_topic',
kafka_group_name = 'consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = '1048576';
```
### XML example

View File

@ -6,14 +6,18 @@
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Core/Settings.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/FileEncryptionCommon.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Interpreters/Context.h>
#include <filesystem>
#include <boost/algorithm/hex.hpp>
namespace fs = std::filesystem;
@ -26,6 +30,7 @@ namespace ErrorCodes
extern const int INVALID_CONFIG_PARAMETER;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int SUPPORT_IS_DISABLED;
}
static const std::string named_collections_storage_config_path = "named_collections_storage";
@ -74,9 +79,9 @@ public:
};
class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext
class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, protected WithContext
{
private:
protected:
std::string root_path;
public:
@ -126,6 +131,11 @@ public:
ReadBufferFromFile in(getPath(file_name));
std::string data;
readStringUntilEOF(data, in);
return readHook(data);
}
virtual std::string readHook(const std::string & data) const
{
return data;
}
@ -142,8 +152,9 @@ public:
fs::create_directories(root_path);
auto tmp_path = getPath(file_name + ".tmp");
WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
writeString(data, out);
auto write_data = writeHook(data);
WriteBufferFromFile out(tmp_path, write_data.size(), O_WRONLY | O_CREAT | O_EXCL);
writeString(write_data, out);
out.next();
if (getContext()->getSettingsRef().fsync_metadata)
@ -153,6 +164,11 @@ public:
fs::rename(tmp_path, getPath(file_name));
}
virtual std::string writeHook(const std::string & data) const
{
return data;
}
void remove(const std::string & file_name) override
{
if (!removeIfExists(file_name))
@ -168,7 +184,7 @@ public:
return fs::remove(getPath(file_name));
}
private:
protected:
std::string getPath(const std::string & file_name) const
{
const auto file_name_as_path = fs::path(file_name);
@ -178,6 +194,7 @@ private:
return fs::path(root_path) / file_name_as_path;
}
private:
/// Delete .tmp files. They could be left undeleted in case of
/// some exception or abrupt server restart.
void cleanup()
@ -194,8 +211,7 @@ private:
}
};
class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext
class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, protected WithContext
{
private:
std::string root_path;
@ -275,18 +291,25 @@ public:
std::string read(const std::string & file_name) const override
{
return getClient()->get(getPath(file_name));
auto data = getClient()->get(getPath(file_name));
return readHook(data);
}
virtual std::string readHook(const std::string & data) const
{
return data;
}
void write(const std::string & file_name, const std::string & data, bool replace) override
{
auto write_data = writeHook(data);
if (replace)
{
getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent);
getClient()->createOrUpdate(getPath(file_name), write_data, zkutil::CreateMode::Persistent);
}
else
{
auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent);
auto code = getClient()->tryCreate(getPath(file_name), write_data, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZNODEEXISTS)
{
@ -298,6 +321,11 @@ public:
}
}
virtual std::string writeHook(const std::string & data) const
{
return data;
}
void remove(const std::string & file_name) override
{
getClient()->remove(getPath(file_name));
@ -334,6 +362,93 @@ private:
}
};
#if USE_SSL
template <typename BaseMetadataStorage>
class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage
{
public:
NamedCollectionsMetadataStorageEncrypted(ContextPtr context_, const std::string & path_)
: BaseMetadataStorage(context_, path_)
{
const auto & config = BaseMetadataStorage::getContext()->getConfigRef();
auto key_hex = config.getRawString("named_collections_storage.key_hex", "");
try
{
key = boost::algorithm::unhex(key_hex);
key_fingerprint = FileEncryption::calculateKeyFingerprint(key);
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
}
algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr"));
}
std::string readHook(const std::string & data) const override
{
ReadBufferFromString in(data);
Memory<> encrypted_buffer(data.length());
FileEncryption::Header header;
try
{
header.read(in);
}
catch (Exception & e)
{
e.addMessage("While reading the header of encrypted data");
throw;
}
size_t bytes_read = 0;
while (bytes_read < encrypted_buffer.size() && !in.eof())
{
bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read);
}
std::string decrypted_buffer;
decrypted_buffer.resize(bytes_read);
FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data());
return decrypted_buffer;
}
std::string writeHook(const std::string & data) const override
{
FileEncryption::Header header{
.algorithm = algorithm,
.key_fingerprint = key_fingerprint,
.init_vector = FileEncryption::InitVector::random()
};
FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector);
WriteBufferFromOwnString out;
header.write(out);
encryptor.encrypt(data.data(), data.size(), out);
return std::string(out.str());
}
private:
std::string key;
UInt128 key_fingerprint;
FileEncryption::Algorithm algorithm;
};
class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::LocalStorage>
{
using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::LocalStorage>::NamedCollectionsMetadataStorageEncrypted;
};
class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>
{
using NamedCollectionsMetadataStorageEncrypted<NamedCollectionsMetadataStorage::ZooKeeperStorage>::NamedCollectionsMetadataStorageEncrypted;
};
#endif
NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage(
std::shared_ptr<INamedCollectionsStorage> storage_,
ContextPtr context_)
@ -495,7 +610,7 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
const auto & config = context_->getConfigRef();
const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local");
if (storage_type == "local")
if (storage_type == "local" || storage_type == "local_encrypted")
{
const auto path = config.getString(
named_collections_storage_config_path + ".path",
@ -504,14 +619,36 @@ std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
"Using local storage for named collections at path: {}", path);
auto local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
std::unique_ptr<INamedCollectionsStorage> local_storage;
if (storage_type == "local")
local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
else if (storage_type == "local_encrypted")
{
#if USE_SSL
local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorageEncrypted>(context_, path);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support");
#endif
}
return std::unique_ptr<NamedCollectionsMetadataStorage>(
new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
}
if (storage_type == "zookeeper" || storage_type == "keeper")
if (storage_type == "zookeeper" || storage_type == "keeper" || storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted")
{
const auto path = config.getString(named_collections_storage_config_path + ".path");
auto zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
std::unique_ptr<INamedCollectionsStorage> zk_storage;
if (!storage_type.ends_with("_encrypted"))
zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
else
{
#if USE_SSL
zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted>(context_, path);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support");
#endif
}
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
"Using zookeeper storage for named collections at path: {}", path);

View File

@ -35,7 +35,9 @@ public:
private:
class INamedCollectionsStorage;
class LocalStorage;
class LocalStorageEncrypted;
class ZooKeeperStorage;
class ZooKeeperStorageEncrypted;
std::shared_ptr<INamedCollectionsStorage> storage;

View File

@ -0,0 +1,12 @@
<clickhouse>
<named_collections_storage>
<type>local_encrypted</type>
<key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
</named_collections_storage>
<named_collections>
<collection1>
<key1>value1</key1>
</collection1>
</named_collections>
</clickhouse>

View File

@ -0,0 +1,31 @@
<clickhouse>
<named_collections_storage>
<type>zookeeper_encrypted</type>
<key_hex>bebec0cabebec0cabebec0cabebec0ca</key_hex>
<path>/named_collections_path/</path>
<update_timeout_ms>5000</update_timeout_ms>
</named_collections_storage>
<named_collections>
<collection1>
<key1>value1</key1>
</collection1>
</named_collections>
<remote_servers>
<replicated_nc_nodes_cluster>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node_with_keeper</host>
<port>9000</port>
</replica>
<replica>
<host>node_with_keeper_2</host>
<port>9000</port>
</replica>
</shard>
<allow_distributed_ddl_queries>true</allow_distributed_ddl_queries>
</replicated_nc_nodes_cluster>
</remote_servers>
</clickhouse>

View File

@ -0,0 +1,17 @@
<clickhouse>
<profiles>
<default>
<ignore_on_cluster_for_replicated_named_collections_queries>0</ignore_on_cluster_for_replicated_named_collections_queries>
</default>
</profiles>
<users>
<default>
<password></password>
<profile>default</profile>
<quota>default</quota>
<named_collection_control>1</named_collection_control>
<show_named_collections>1</show_named_collections>
<show_named_collections_secrets>1</show_named_collections_secrets>
</default>
</users>
</clickhouse>

View File

@ -0,0 +1,123 @@
import logging
import pytest
import os
from helpers.cluster import ClickHouseCluster
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
NAMED_COLLECTIONS_CONFIG = os.path.join(
SCRIPT_DIR, "./configs/config.d/named_collections.xml"
)
ZK_PATH = "/named_collections_path"
@pytest.fixture(scope="module")
def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance(
"node_encrypted",
main_configs=[
"configs/config.d/named_collections_encrypted.xml",
],
user_configs=[
"configs/users.d/users.xml",
],
stay_alive=True,
)
cluster.add_instance(
"node_with_keeper_encrypted",
main_configs=[
"configs/config.d/named_collections_with_zookeeper_encrypted.xml",
],
user_configs=[
"configs/users.d/users.xml",
],
stay_alive=True,
with_zookeeper=True,
)
cluster.add_instance(
"node_with_keeper_2_encrypted",
main_configs=[
"configs/config.d/named_collections_with_zookeeper_encrypted.xml",
],
user_configs=[
"configs/users.d/users.xml",
],
stay_alive=True,
with_zookeeper=True,
)
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
yield cluster
finally:
cluster.shutdown()
def check_encrypted_content(node, zk=None):
assert (
"collection1\ncollection2"
== node.query("select name from system.named_collections").strip()
)
assert (
"['key1','key2']"
== node.query(
"select mapKeys(collection) from system.named_collections where name = 'collection2'"
).strip()
)
assert (
"1234\tvalue2"
== node.query(
"select collection['key1'], collection['key2'] from system.named_collections where name = 'collection2'"
).strip()
)
# Check that the underlying storage is encrypted
content = (
zk.get(ZK_PATH + "/collection2.sql")[0]
if zk is not None
else open(
f"{node.path}/database/named_collections/collection2.sql", "rb"
).read()
)
assert (
content[0:3] == b"ENC"
) # file signature (aka magic number) of the encrypted file
assert b"key1" not in content
assert b"1234" not in content
assert b"key2" not in content
assert b"value2" not in content
def test_local_storage_encrypted(cluster):
node = cluster.instances["node_encrypted"]
node.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'")
check_encrypted_content(node)
node.restart_clickhouse()
check_encrypted_content(node)
node.query("DROP NAMED COLLECTION collection2")
def test_zookeper_storage_encrypted(cluster):
node1 = cluster.instances["node_with_keeper_encrypted"]
node2 = cluster.instances["node_with_keeper_2_encrypted"]
zk = cluster.get_kazoo_client("zoo1")
node1.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'")
check_encrypted_content(node1, zk)
check_encrypted_content(node2, zk)
node1.restart_clickhouse()
node2.restart_clickhouse()
check_encrypted_content(node1, zk)
check_encrypted_content(node2, zk)
node1.query("DROP NAMED COLLECTION collection2")