Improve codec for encr 19896 (#27476)

* change syntax of encrypted command

* commit all encrypted changes

* correct encryption

* correct config for test

* add tests and correct code style and typos

* correct test

* fix unbundled build

* add log warning messages

* improve code according to review comments

* correct nonce

* correct errors found by fuzzing

* improve codec AES_128_GCM_SIV. Add AES_256_GCM_SIV. Add sections for last in tests. Improve documentation

* Update CompressionCodecEncrypted.h

* Update 01683_codec_encrypted.sql

* correct compression factory after changes in master

* correct behavior with wrong key in data

* correct fuzzer

* add connection for fuzzer with fix for compression_encrypted

* refactor code

* add load from config with throwing errors on server start

* fix typos and check style

* Update Server.cpp

* correct loading and reading

* refactor code. fix uninitialized value

* refactor code

* move defines from server to cpp file

* correct build

* remove repeated code

* correct namespace

* fix code style
This commit is contained in:
Filatenkov Artur 2021-09-13 11:25:36 +03:00 committed by GitHub
parent e9d7dfeeb1
commit c23fe5baf6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1102 additions and 350 deletions

View File

@ -69,29 +69,85 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression> </compression>
``` ```
<!--
## encryption {#server-settings-encryption} ## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). The command, or a shell script, is expected to write a Base64-encoded key of any length to the stdout. Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in enviroment variables or be set in configuration file.
Keys can be hex or string. Their length must be equal to 16.
**Example** **Example**
For Linux with systemd: Load from config:
```xml ```xml
<encryption> <encryption_codecs>
<key_command>/usr/bin/systemd-ask-password --id="clickhouse-server" --timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command> <aes_128_gcm_siv>
</encryption> <key>12345567812345678</key>
</aes_128_gcm_siv>
</encryption_codecs>
``` ```
For other systems: !!! note "NOTE"
Storing keys in configuration file is not recommended. It isn't secure. You can move the keys into a separate config file on a secure disk and put a symlink to that config file to `config.d/` folder.
Load from config, when key is in hex:
```xml ```xml
<encryption> <encryption_codecs>
<key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command> <aes_128_gcm_siv>
</encryption> <key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
``` ```
-->
Load key from environment variable:
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex from_env="KEY"></key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
```
Where current_key_id sets the current key for encryption, and all specified keys can be used for decryption.
All this methods can be applied for multiple keys:
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex id="0">00112233445566778899aabbccddeeff</key_hex>
<key_hex id="1" from_env=".."></key_hex>
<current_key_id>1</current_key_id>
</aes_128_gcm_siv>
</encryption_codecs>
```
Where `current_key_id` shows current key for encryption.
Also user can add nonce that must be 12 bytes long (by default encryption and decryption will use nonce consisting of zero bytes):
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce>0123456789101</nonce>
</aes_128_gcm_siv>
</encryption_codecs>
```
Or it can be set in hex:
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce_hex>abcdefabcdef</nonce_hex>
</aes_128_gcm_siv>
</encryption_codecs>
```
Everything above can be applied for `aes_256_gcm_siv` (but key must be 32 bytes length).
## custom_settings_prefixes {#custom_settings_prefixes} ## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas. List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.

View File

@ -79,6 +79,7 @@
#include <Server/ProtocolServerAdapter.h> #include <Server/ProtocolServerAdapter.h>
#include <Server/HTTP/HTTPServer.h> #include <Server/HTTP/HTTPServer.h>
#include <filesystem> #include <filesystem>
#include <Compression/CompressionCodecEncrypted.h>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include "config_core.h" # include "config_core.h"
@ -251,7 +252,6 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
extern const int ARGUMENT_OUT_OF_BOUND; extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int INCORRECT_DATA;
extern const int INVALID_CONFIG_PARAMETER; extern const int INVALID_CONFIG_PARAMETER;
extern const int SYSTEM_ERROR; extern const int SYSTEM_ERROR;
extern const int FAILED_TO_GETPWUID; extern const int FAILED_TO_GETPWUID;
@ -456,40 +456,6 @@ void checkForUsersNotInMainConfig(
} }
} }
static void loadEncryptionKey(const std::string & key_command [[maybe_unused]], Poco::Logger * log)
{
#if USE_BASE64 && USE_SSL && USE_INTERNAL_SSL_LIBRARY
auto process = ShellCommand::execute(key_command);
std::string b64_key;
readStringUntilEOF(b64_key, process->out);
process->wait();
// turbob64 doesn't like whitespace characters in input. Strip
// them before decoding.
std::erase_if(b64_key, [](char c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
});
std::vector<char> buf(b64_key.size());
const size_t key_size = tb64dec(reinterpret_cast<const unsigned char *>(b64_key.data()), b64_key.size(),
reinterpret_cast<unsigned char *>(buf.data()));
if (!key_size)
throw Exception("Failed to decode encryption key", ErrorCodes::INCORRECT_DATA);
else if (key_size < 16)
LOG_WARNING(log, "The encryption key should be at least 16 octets long.");
const std::string_view key = std::string_view(buf.data(), key_size);
CompressionCodecEncrypted::setMasterKey(key);
#else
LOG_WARNING(log, "Server was built without Base64 or SSL support. Encryption is disabled.");
#endif
}
[[noreturn]] void forceShutdown() [[noreturn]] void forceShutdown()
{ {
#if defined(THREAD_SANITIZER) && defined(OS_LINUX) #if defined(THREAD_SANITIZER) && defined(OS_LINUX)
@ -904,6 +870,8 @@ if (ThreadFuzzer::instance().isEffective())
global_context->updateStorageConfiguration(*config); global_context->updateStorageConfiguration(*config);
global_context->updateInterserverCredentials(*config); global_context->updateInterserverCredentials(*config);
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
}, },
/* already_loaded = */ false); /// Reload it right now (initial loading) /* already_loaded = */ false); /// Reload it right now (initial loading)
@ -976,9 +944,9 @@ if (ThreadFuzzer::instance().isEffective())
global_context->getMergeTreeSettings().sanityCheck(settings); global_context->getMergeTreeSettings().sanityCheck(settings);
global_context->getReplicatedMergeTreeSettings().sanityCheck(settings); global_context->getReplicatedMergeTreeSettings().sanityCheck(settings);
/// Set up encryption.
if (config().has("encryption.key_command")) /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
loadEncryptionKey(config().getString("encryption.key_command"), log); CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);

View File

@ -1026,10 +1026,28 @@
defined, or encryption codecs will be disabled otherwise. The defined, or encryption codecs will be disabled otherwise. The
command is executed through /bin/sh and is expected to write command is executed through /bin/sh and is expected to write
a Base64-encoded key to the stdout. --> a Base64-encoded key to the stdout. -->
<encryption> <encryption_codecs>
<!-- <key_command>/usr/bin/systemd-ask-password &#45;&#45;id="clickhouse-server" &#45;&#45;timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command> --> <!-- aes_128_gcm_siv -->
<!-- <key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command> --> <!-- Example of getting hex key from env -->
</encryption> <!-- the code should use this key and throw an exception if its length is not 16 bytes -->
<!--key_hex from_env="..."></key_hex -->
<!-- Example of multiple hex keys. They can be imported from env or be written down in config-->
<!-- the code should use these keys and throw an exception if their length is not 16 bytes -->
<!-- key_hex id="0">...</key_hex -->
<!-- key_hex id="1" from_env=".."></key_hex -->
<!-- key_hex id="2">...</key_hex -->
<!-- current_key_id>2</current_key_id -->
<!-- Example of getting hex key from config -->
<!-- the code should use this key and throw an exception if its length is not 16 bytes -->
<!-- key>...</key -->
<!-- example of adding nonce -->
<!-- nonce>...</nonce -->
<!-- /aes_128_gcm_siv -->
</encryption_codecs>
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster. <!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. --> Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->

View File

@ -1,215 +1,553 @@
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include <Common/config.h> # include <Common/config.h>
#endif #endif
#include "Common/Exception.h"
#include "common/types.h"
#include "IO/VarInt.h"
#include <Compression/CompressionFactory.h> #include <Compression/CompressionFactory.h>
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY
#include <Compression/CompressionCodecEncrypted.h> #include <Compression/CompressionCodecEncrypted.h>
#include <Poco/Logger.h>
#include <common/logger_useful.h>
#include <Common/ErrorCodes.h>
// This depends on BoringSSL-specific API, notably <openssl/aead.h>.
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <cassert>
#include <openssl/digest.h> // Y_IGNORE #include <openssl/digest.h> // Y_IGNORE
#include <openssl/err.h> #include <openssl/err.h>
#include <openssl/hkdf.h> // Y_IGNORE #include <boost/algorithm/hex.hpp>
#include <string_view> #include <openssl/aead.h> // Y_IGNORE
#endif
// Common part for both parts (with SSL and without)
namespace DB namespace DB
{ {
namespace ErrorCodes
{ namespace ErrorCodes
extern const int ILLEGAL_CODEC_PARAMETER; {
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int OPENSSL_ERROR; extern const int OPENSSL_ERROR;
} }
void CompressionCodecEncrypted::setMasterKey(const std::string_view & master_key) namespace
{
/// Get string name for method. Return empty string for undefined Method
String getMethodName(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{ {
keys.emplace(master_key); return "AES_128_GCM_SIV";
} }
else if (Method == AES_256_GCM_SIV)
CompressionCodecEncrypted::KeyHolder::KeyHolder(const std::string_view & master_key)
{ {
// Derive a key from it. return "AES_256_GCM_SIV";
keygen_key = deriveKey(master_key);
// EVP_AEAD_CTX is not stateful so we can create an
// instance now.
EVP_AEAD_CTX_zero(&ctx);
const int ok = EVP_AEAD_CTX_init(&ctx, EVP_aead_aes_128_gcm(),
reinterpret_cast<const uint8_t*>(keygen_key.data()), keygen_key.size(),
16 /* tag size */, nullptr);
if (!ok)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
} }
CompressionCodecEncrypted::KeyHolder::~KeyHolder()
{
EVP_AEAD_CTX_cleanup(&ctx);
}
const CompressionCodecEncrypted::KeyHolder & CompressionCodecEncrypted::getKeys()
{
if (keys)
return *keys;
else else
throw Exception("There is no configuration for encryption in the server config",
ErrorCodes::NO_ELEMENTS_IN_CONFIG);
}
CompressionCodecEncrypted::CompressionCodecEncrypted(const std::string_view & cipher)
{ {
setCodecDescription("Encrypted", {std::make_shared<ASTLiteral>(cipher)}); return "";
} }
}
uint8_t CompressionCodecEncrypted::getMethodByte() const /// Get method code (used for codec, to understand which one we are using)
uint8_t getMethodCode(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{ {
return static_cast<uint8_t>(CompressionMethodByte::Encrypted); return uint8_t(CompressionMethodByte::AES_128_GCM_SIV);
} }
else if (Method == AES_256_GCM_SIV)
void CompressionCodecEncrypted::updateHash(SipHash & hash) const
{ {
getCodecDesc()->updateTreeHash(hash); return uint8_t(CompressionMethodByte::AES_256_GCM_SIV);
} }
else
UInt32 CompressionCodecEncrypted::getMaxCompressedDataSize(UInt32 uncompressed_size) const
{ {
// The GCM mode is a stream cipher. No paddings are throw Exception("Wrong encryption Method. Got " + getMethodName(Method), ErrorCodes::BAD_ARGUMENTS);
// involved. There will be a tag at the end of ciphertext (16
// octets).
return uncompressed_size + 16;
} }
}
UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 source_size, char * dest) const } // end of namespace
} // end of namespace DB
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
namespace
{
constexpr size_t tag_size = 16; /// AES-GCM-SIV always uses a tag of 16 bytes length
constexpr size_t key_id_max_size = 8; /// Max size of varint.
constexpr size_t nonce_max_size = 13; /// Nonce size and one byte to show if nonce in in text
/// Get encryption/decryption algorithms.
auto getMethod(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{ {
// Generate an IV out of the data block and the key-generation return EVP_aead_aes_128_gcm_siv;
// key. It is completely deterministic, but does not leak any
// information about the data block except for equivalence of
// identical blocks (under the same master key). The IV will
// be used as an authentication tag. The ciphertext and the
// tag will be written directly in the dest buffer.
const std::string_view plaintext = std::string_view(source, source_size);
encrypt(plaintext, dest);
return source_size + 16;
} }
else if (Method == AES_256_GCM_SIV)
void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size [[maybe_unused]]) const
{ {
// Extract the IV from the encrypted data block. Decrypt the return EVP_aead_aes_256_gcm_siv;
// block with the extracted IV, and compare the tag. Throw an
// exception if tags don't match.
const std::string_view ciphertext_and_tag = std::string_view(source, source_size);
assert(ciphertext_and_tag.size() == uncompressed_size + 16);
decrypt(ciphertext_and_tag, dest);
} }
else
std::string CompressionCodecEncrypted::lastErrorString()
{ {
std::array<char, 1024> buffer{}; throw Exception("Wrong encryption Method. Got " + getMethodName(Method), ErrorCodes::BAD_ARGUMENTS);
}
}
/// Find out key size for each algorithm
UInt64 methodKeySize(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{
return 16;
}
else if (Method == AES_256_GCM_SIV)
{
return 32;
}
else
{
throw Exception("Wrong encryption Method. Got " + getMethodName(Method), ErrorCodes::BAD_ARGUMENTS);
}
}
std::string lastErrorString()
{
std::array<char, 1024> buffer;
ERR_error_string_n(ERR_get_error(), buffer.data(), buffer.size()); ERR_error_string_n(ERR_get_error(), buffer.data(), buffer.size());
return std::string(buffer.data()); return std::string(buffer.data());
} }
std::string CompressionCodecEncrypted::deriveKey(const std::string_view & master_key) /// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
{ /// This function get key and nonce and encrypt text with their help.
/// No salt: derive keys in a deterministic manner. /// If something went wrong (can't init context or can't encrypt data) it throws exception.
std::string_view salt(""); // NOLINT /// It returns length of encrypted text.
std::string_view info("Codec Encrypted('AES-128-GCM-SIV') key generation key"); size_t encrypt(const std::string_view & plaintext, char * ciphertext_and_tag, EncryptionMethod method, const String & key, const String & nonce)
std::array<char, 32> result; {
/// Init context for encryption, using key.
const int ok = HKDF(reinterpret_cast<uint8_t *>(result.data()), result.size(), EVP_AEAD_CTX encrypt_ctx;
EVP_sha256(), EVP_AEAD_CTX_zero(&encrypt_ctx);
reinterpret_cast<const uint8_t *>(master_key.data()), master_key.size(), const int ok_init = EVP_AEAD_CTX_init(&encrypt_ctx, getMethod(method)(),
reinterpret_cast<const uint8_t *>(salt.data()), salt.size(), reinterpret_cast<const uint8_t*>(key.data()), key.size(),
reinterpret_cast<const uint8_t *>(info.data()), info.size()); 16 /* tag size */, nullptr);
if (!ok) if (!ok_init)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR); throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
return std::string(result.data(), 16); /// encrypt data using context and given nonce.
}
void CompressionCodecEncrypted::encrypt(const std::string_view & plaintext, char * ciphertext_and_tag)
{
// Fixed nonce. Yes this is unrecommended, but we have to live
// with it.
std::string_view nonce("\0\0\0\0\0\0\0\0\0\0\0\0", 12);
size_t out_len; size_t out_len;
const int ok = EVP_AEAD_CTX_seal(&getKeys().ctx, const int ok_open = EVP_AEAD_CTX_seal(&encrypt_ctx,
reinterpret_cast<uint8_t *>(ciphertext_and_tag), reinterpret_cast<uint8_t *>(ciphertext_and_tag),
&out_len, plaintext.size() + 16, &out_len, plaintext.size() + 16,
reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(), reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(),
reinterpret_cast<const uint8_t *>(plaintext.data()), plaintext.size(), reinterpret_cast<const uint8_t *>(plaintext.data()), plaintext.size(),
nullptr, 0); nullptr, 0);
if (!ok) if (!ok_open)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR); throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
assert(out_len == plaintext.size() + 16); return out_len;
} }
void CompressionCodecEncrypted::decrypt(const std::string_view & ciphertext, char * plaintext) /// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
{ /// This function get key and nonce and encrypt text with their help.
std::string_view nonce("\0\0\0\0\0\0\0\0\0\0\0\0", 12); /// If something went wrong (can't init context or can't encrypt data) it throws exception.
/// It returns length of encrypted text.
size_t decrypt(const std::string_view & ciphertext, char * plaintext, EncryptionMethod method, const String & key, const String & nonce)
{
/// Init context for decryption with given key.
EVP_AEAD_CTX decrypt_ctx;
EVP_AEAD_CTX_zero(&decrypt_ctx);
const int ok_init = EVP_AEAD_CTX_init(&decrypt_ctx, getMethod(method)(),
reinterpret_cast<const uint8_t*>(key.data()), key.size(),
16 /* tag size */, nullptr);
if (!ok_init)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
/// decrypt data using given nonce
size_t out_len; size_t out_len;
const int ok = EVP_AEAD_CTX_open(&getKeys().ctx, const int ok_open = EVP_AEAD_CTX_open(&decrypt_ctx,
reinterpret_cast<uint8_t *>(plaintext), reinterpret_cast<uint8_t *>(plaintext),
&out_len, ciphertext.size(), &out_len, ciphertext.size(),
reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(), reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(),
reinterpret_cast<const uint8_t *>(ciphertext.data()), ciphertext.size(), reinterpret_cast<const uint8_t *>(ciphertext.data()), ciphertext.size(),
nullptr, 0); nullptr, 0);
if (!ok) if (!ok_open)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR); throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
assert(out_len == ciphertext.size() - 16); return out_len;
} }
void registerCodecEncrypted(CompressionCodecFactory & factory) /// Register codec in factory
{ void registerEncryptionCodec(CompressionCodecFactory & factory, EncryptionMethod Method)
const auto method_code = uint8_t(CompressionMethodByte::Encrypted); {
factory.registerCompressionCodec("Encrypted", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr const auto method_code = getMethodCode(Method); /// Codec need to know its code
factory.registerCompressionCodec(getMethodName(Method), method_code, [&, Method](const ASTPtr & arguments) -> CompressionCodecPtr
{ {
if (arguments) if (arguments)
{ {
if (arguments->children.size() != 1) if (!arguments->children.empty())
throw Exception("Codec Encrypted() must have 1 parameter, given " + throw Exception("Codec " + getMethodName(Method) + " must not have parameters, given " +
std::to_string(arguments->children.size()), std::to_string(arguments->children.size()),
ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
}
return std::make_shared<CompressionCodecEncrypted>(Method);
});
}
const auto children = arguments->children; String unhexKey(const String & hex)
const auto * literal = children[0]->as<ASTLiteral>(); {
if (!literal) try
throw Exception("Wrong argument for codec Encrypted(). Expected a string literal", {
ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); return boost::algorithm::unhex(hex);
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
}
}
const String cipher = literal->value.safeGet<String>(); /// Firstly, write a byte, which shows if the nonce will be put in text (if it was defined in config)
if (cipher == "AES-128-GCM-SIV") /// Secondly, write nonce in text (this step depends from first step)
return std::make_shared<CompressionCodecEncrypted>(cipher); /// return new position to write
else inline char* writeNonce(const String& nonce, char* dest)
throw Exception("Cipher '" + cipher + "' is not supported", {
ErrorCodes::ILLEGAL_CODEC_PARAMETER); /// If nonce consists of nul bytes, it shouldn't be in dest. Zero byte is the only byte that should be written.
/// Otherwise, 1 is written and data from nonce is copied
if (nonce != String("\0\0\0\0\0\0\0\0\0\0\0\0", 12))
{
*dest = 1;
++dest;
size_t copied_symbols = nonce.copy(dest, nonce.size());
if (copied_symbols != nonce.size())
throw Exception(ErrorCodes::INCORRECT_DATA, "Can't copy nonce into destination. Count of copied symbols {}, need to copy {}", copied_symbols, nonce.size());
dest += copied_symbols;
return dest;
} }
else else
{ {
/* The factory is asking us to construct the codec *dest = 0;
* only from the method code. How can that be return ++dest;
* possible? For now we only support a single cipher
* so it's not really a problem, but if we were to
* support more ciphers it would be catastrophic. */
return std::make_shared<CompressionCodecEncrypted>("AES-128-GCM-SIV");
} }
}); }
/// Firstly, read a byte, which shows if the nonce will be put in text (if it was defined in config)
/// Secondly, read nonce in text (this step depends from first step)
/// return new position to read
inline const char* readNonce(String& nonce, const char* source)
{
/// If first is zero byte: move source and set zero-bytes nonce
if (!*source)
{
nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", 12};
return ++source;
} }
/// Move to next byte. Nonce will begin from there
++source;
/// Otherwise, use data from source in nonce
nonce = {source, 12};
source += 12;
return source;
}
}
CompressionCodecEncrypted::Configuration & CompressionCodecEncrypted::Configuration::instance()
{
static CompressionCodecEncrypted::Configuration ret;
return ret;
}
void CompressionCodecEncrypted::Configuration::loadImpl(
const Poco::Util::AbstractConfiguration & config, const String & config_prefix, EncryptionMethod method, std::unique_ptr<Params> & new_params)
{
// if method is not smaller than MAX_ENCRYPTION_METHOD it is incorrect
if (method >= MAX_ENCRYPTION_METHOD)
throw Exception("Wrong argument for loading configurations.", ErrorCodes::BAD_ARGUMENTS);
/// Scan all keys in config and add them into storage. If key is in hex, transform it.
/// Remember key ID for each key, because it will be used in encryption/decryption
Strings config_keys;
config.keys(config_prefix, config_keys);
for (const std::string & config_key : config_keys)
{
String key;
UInt64 key_id;
if ((config_key == "key") || config_key.starts_with("key["))
{
key = config.getString(config_prefix + "." + config_key, "");
key_id = config.getUInt64(config_prefix + "." + config_key + "[@id]", 0);
}
else if ((config_key == "key_hex") || config_key.starts_with("key_hex["))
{
key = unhexKey(config.getString(config_prefix + "." + config_key, ""));
key_id = config.getUInt64(config_prefix + "." + config_key + "[@id]", 0);
}
else
continue;
/// For each key its id should be unique.
if (new_params->keys_storage[method].contains(key_id))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple keys have the same ID {}", key_id);
/// Check size of key. Its length depends on encryption algorithm.
if (key.size() != methodKeySize(method))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Got an encryption key with unexpected size {}, the size should be {}",
key.size(), methodKeySize(method));
new_params->keys_storage[method][key_id] = key;
}
/// Check that we have at least one key for this method (otherwise it is incorrect to use it).
if (new_params->keys_storage[method].empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No keys, an encryption needs keys to work");
/// Try to find which key will be used for encryption. If there is no current_key,
/// first key will be used for encryption (its index equals to zero).
new_params->current_key_id[method] = config.getUInt64(config_prefix + ".current_key_id", 0);
/// Check that we have current key. Otherwise config is incorrect.
if (!new_params->keys_storage[method].contains(new_params->current_key_id[method]))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found a key with the current ID {}", new_params->current_key_id[method]);
/// Read nonce (in hex or in string). Its length should be 12 bytes.
if (config.has(config_prefix + ".nonce_hex"))
new_params->nonce[method] = unhexKey(config.getString(config_prefix + ".nonce_hex"));
else
new_params->nonce[method] = config.getString(config_prefix + ".nonce", "");
if (new_params->nonce[method].size() != 12 && !new_params->nonce[method].empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got nonce with unexpected size {}, the size should be 12", new_params->nonce[method].size());
}
bool CompressionCodecEncrypted::Configuration::tryLoad(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
/// Try to create new parameters and fill them from config.
/// If there will be some errors, print their message to notify user that
/// something went wrong and new parameters are not available
try
{
load(config, config_prefix);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
return false;
}
return true;
}
void CompressionCodecEncrypted::Configuration::load(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
/// Try to create new parameters and fill them from config.
/// If there will be some errors, throw error
std::unique_ptr<Params> new_params(new Params);
if (config.has(config_prefix + ".aes_128_gcm_siv"))
{
loadImpl(config, config_prefix + ".aes_128_gcm_siv", AES_128_GCM_SIV, new_params);
}
if (config.has(config_prefix + ".aes_256_gcm_siv"))
{
loadImpl(config, config_prefix + ".aes_256_gcm_siv", AES_256_GCM_SIV, new_params);
}
params.set(std::move(new_params));
}
void CompressionCodecEncrypted::Configuration::getCurrentKeyAndNonce(EncryptionMethod method, UInt64 & current_key_id, String &current_key, String & nonce) const
{
/// It parameters were not set, throw exception
if (!params.get())
throw Exception("Empty params in CompressionCodecEncrypted configuration", ErrorCodes::BAD_ARGUMENTS);
/// Save parameters in variable, because they can always change.
/// As this function not atomic, we should be certain that we get information from one particular version for correct work.
const auto current_params = params.get();
current_key_id = current_params->current_key_id[method];
/// As parameters can be created empty, we need to check that this key is available.
if (current_params->keys_storage[method].contains(current_key_id))
current_key = current_params->keys_storage[method].at(current_key_id);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no current_key {} in config. Please, put it in config and reload.", current_key_id);
/// If there is no nonce in config, we need to generate particular one,
/// because all encryptions should have nonce and random nonce generation will lead to cases
/// when nonce after config reload (nonce is not defined in config) will differ from previously generated one.
/// This will lead to data loss.
nonce = current_params->nonce[method];
if (nonce.empty())
nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", 12};
}
String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method, const UInt64 & key_id) const
{
String key;
/// See description of previous finction, logic is the same.
if (!params.get())
throw Exception("Empty params in CompressionCodecEncrypted configuration", ErrorCodes::BAD_ARGUMENTS);
const auto current_params = params.get();
/// check if there is current key in storage
if (current_params->keys_storage[method].contains(key_id))
key = current_params->keys_storage[method].at(key_id);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id);
return key;
}
CompressionCodecEncrypted::CompressionCodecEncrypted(EncryptionMethod Method): encryption_method(Method)
{
setCodecDescription(getMethodName(encryption_method));
}
uint8_t CompressionCodecEncrypted::getMethodByte() const
{
return getMethodCode(encryption_method);
}
void CompressionCodecEncrypted::updateHash(SipHash & hash) const
{
getCodecDesc()->updateTreeHash(hash);
}
UInt32 CompressionCodecEncrypted::getMaxCompressedDataSize(UInt32 uncompressed_size) const
{
// The GCM mode is a stream cipher. No paddings are
// involved. There will be a tag at the end of ciphertext (16
// octets). Also it has not more than 8 bytes for key_id in the beginning
// KeyID is followed by byte, that shows if nonce was set in config (and also will be put into data)
// and 12 bytes nonce or this byte will be equal to zero and no nonce will follow it.
return uncompressed_size + tag_size + key_id_max_size + nonce_max_size;
}
UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
// Generate an IV out of the data block and the key-generation
// key. It is completely deterministic, but does not leak any
// information about the data block except for equivalence of
// identical blocks (under the same key).
const std::string_view plaintext = std::string_view(source, source_size);
/// Get key and nonce for encryption
UInt64 current_key_id;
String current_key, nonce;
Configuration::instance().getCurrentKeyAndNonce(encryption_method, current_key_id, current_key, nonce);
/// Write current key id to support multiple keys.
/// (key id in the beginning will help to decrypt data after changing current key)
char* ciphertext_with_nonce = writeVarUInt(current_key_id, dest);
size_t keyid_size = ciphertext_with_nonce - dest;
/// write nonce in data. This will help to read data even after changing nonce in config
/// If there were no nonce in data, one zero byte will be written
char* ciphertext = writeNonce(nonce, ciphertext_with_nonce);
UInt64 nonce_size = ciphertext - ciphertext_with_nonce;
// The IV will be used as an authentication tag. The ciphertext and the
// tag will be written directly in the dest buffer.
size_t out_len = encrypt(plaintext, ciphertext, encryption_method, current_key, nonce);
/// Length of encrypted text should be equal to text length plus tag_size (which was added by algorithm).
if (out_len != source_size + tag_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't encrypt data, length after encryption {} is wrong, expected {}", out_len, source_size + tag_size);
return out_len + keyid_size + nonce_size;
}
void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
/// The key is needed for decrypting. That's why it is read at the beginning of process.
UInt64 key_id;
const char * ciphertext_with_nonce = readVarUInt(key_id, source, source_size);
/// Size of text should be decreased by key_size, because key_size bytes were not participating in encryption process.
size_t keyid_size = ciphertext_with_nonce - source;
String nonce;
String key = Configuration::instance().getKey(encryption_method, key_id);
/// try to read nonce from file (if it was set while encrypting)
const char * ciphertext = readNonce(nonce, ciphertext_with_nonce);
/// Size of text should be decreased by nonce_size, because nonce_size bytes were not participating in encryption process.
UInt64 nonce_size = ciphertext - ciphertext_with_nonce;
/// Count text size (nonce and key_id was read from source)
size_t ciphertext_size = source_size - keyid_size - nonce_size;
if (ciphertext_size != uncompressed_size + tag_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't decrypt data, uncompressed_size {} is wrong, expected {}", uncompressed_size, ciphertext_size - tag_size);
size_t out_len = decrypt({ciphertext, ciphertext_size}, dest, encryption_method, key, nonce);
if (out_len != ciphertext_size - tag_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't decrypt data, out length after decryption {} is wrong, expected {}", out_len, ciphertext_size - tag_size);
}
} }
#else /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */ #else /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */
namespace DB namespace DB
{ {
void registerCodecEncrypted(CompressionCodecFactory &)
{ namespace
} {
/// Register codec in factory
void registerEncryptionCodec(CompressionCodecFactory & factory, EncryptionMethod Method)
{
auto throw_no_ssl = [](const ASTPtr &) -> CompressionCodecPtr { throw Exception(ErrorCodes::OPENSSL_ERROR, "Server was built without SSL support. Encryption is disabled."); };
const auto method_code = getMethodCode(Method); /// Codec need to know its code
factory.registerCompressionCodec(getMethodName(Method), method_code, throw_no_ssl);
}
}
CompressionCodecEncrypted::Configuration & CompressionCodecEncrypted::Configuration::instance()
{
static CompressionCodecEncrypted::Configuration ret;
return ret;
}
/// if encryption is disabled.
bool CompressionCodecEncrypted::Configuration::tryLoad(const Poco::Util::AbstractConfiguration & config [[maybe_unused]], const String & config_prefix [[maybe_unused]])
{
return false;
}
/// if encryption is disabled, print warning about this.
void CompressionCodecEncrypted::Configuration::load(const Poco::Util::AbstractConfiguration & config [[maybe_unused]], const String & config_prefix [[maybe_unused]])
{
LOG_WARNING(&Poco::Logger::get("CompressionCodecEncrypted"), "Server was built without SSL support. Encryption is disabled.");
}
} }
#endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */ #endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */
namespace DB
{
/// Register codecs for all algorithms
void registerCodecEncrypted(CompressionCodecFactory & factory)
{
registerEncryptionCodec(factory, AES_128_GCM_SIV);
registerEncryptionCodec(factory, AES_256_GCM_SIV);
}
}

View File

@ -1,17 +1,25 @@
#pragma once #pragma once
// This depends on BoringSSL-specific API, notably <openssl/aead.h>. #if !defined(ARCADIA_BUILD)
#include <Common/config.h> #include <string_view>
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY && !defined(ARCADIA_BUILD) #include <unordered_map>
#include <common/types.h>
#include <Compression/ICompressionCodec.h> #include <Compression/ICompressionCodec.h>
#include <boost/noncopyable.hpp> #include <Poco/Util/LayeredConfiguration.h>
#include <openssl/aead.h> // Y_IGNORE #include <Common/MultiVersion.h>
#include <optional>
namespace DB namespace DB
{ {
/** This codec encrypts and decrypts blocks with AES-128 in
/// Now we have two algorithms.
enum EncryptionMethod
{
AES_128_GCM_SIV,
AES_256_GCM_SIV,
MAX_ENCRYPTION_METHOD
};
/** This codec encrypts and decrypts blocks with AES-128 in
* GCM-SIV mode (RFC-8452), which is the only cipher currently * GCM-SIV mode (RFC-8452), which is the only cipher currently
* supported. Although it is implemented as a compression codec * supported. Although it is implemented as a compression codec
* it doesn't actually compress data. In fact encrypted data will * it doesn't actually compress data. In fact encrypted data will
@ -19,7 +27,7 @@ namespace DB
* you want to apply both compression and encryption to your * you want to apply both compression and encryption to your
* columns, you need to put this codec at the end of the chain * columns, you need to put this codec at the end of the chain
* like "column Int32 Codec(Delta, LZ4, * like "column Int32 Codec(Delta, LZ4,
* Encrypted('AES-128-GCM-SIV'))". * AES_128_GCM_SIV)".
* *
* The key is obtained by executing a command specified in the * The key is obtained by executing a command specified in the
* configuration file at startup, and if it doesn't specify a * configuration file at startup, and if it doesn't specify a
@ -37,21 +45,62 @@ namespace DB
* as otherwise our engines like ReplicatedMergeTree cannot * as otherwise our engines like ReplicatedMergeTree cannot
* deduplicate data blocks. * deduplicate data blocks.
*/ */
class CompressionCodecEncrypted : public ICompressionCodec class CompressionCodecEncrypted : public ICompressionCodec
{ {
public: public:
/** If a master key is available, the server is supposed to /** If a key is available, the server is supposed to
* invoke this static method at the startup. The codec will * invoke this static method at the startup. The codec will
* refuse to compress or decompress any data until that. The * refuse to compress or decompress any data until that. The
* key can be an arbitrary octet string, but it is * key can be an arbitrary octet string, but it is
* recommended that the key is at least 16 octets long. * recommended that the key is at least 16 octets long.
* *
* Note that the master key is currently not guarded by a * Note that the key is currently not guarded by a
* mutex. This method should be invoked no more than once. * mutex. This method should be invoked no more than once.
*/ */
static void setMasterKey(const std::string_view & master_key); explicit CompressionCodecEncrypted(EncryptionMethod Method);
explicit CompressionCodecEncrypted(const std::string_view & cipher); /**
* This is utility class. It holds information about encryption configuration.
*/
class Configuration
{
public:
/// Configuration should be singleton. Instance method
static Configuration & instance();
/// Try to load data from config.
bool tryLoad(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
/// Load data and throw exception if something went wrong.
void load(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
/// Get current key and nonce (they will be set in variables, which are pass in this function).
/// All data sets at the same time to prevent situations,
/// when config changes and key and nonce are read from different versions
/// If nonce is empty, it will return 12 null bytes.
void getCurrentKeyAndNonce(EncryptionMethod method, UInt64 & current_key_id, String & current_key, String & nonce) const;
/// Same as getCurrentKeyAndNonce. It is used to get key. (need for correct decryption, that is why nonce is not necessary)
String getKey(EncryptionMethod method, const UInt64 & key_id) const;
private:
/// struct Params consists of:
/// 1) hash-table of keys and their ids
/// 2) current key for encryption
/// 3) nonce for encryption
/// All this parameters have MAX_ENCRYPTION_METHOD count of versions,
/// because all algorithms can be described in config and used for different tables.
struct Params
{
std::unordered_map<UInt64, String> keys_storage[MAX_ENCRYPTION_METHOD];
UInt64 current_key_id[MAX_ENCRYPTION_METHOD] = {0, 0};
String nonce[MAX_ENCRYPTION_METHOD];
};
// used to read data from config and create Params
static void loadImpl(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, EncryptionMethod method, std::unique_ptr<Params>& new_params);
MultiVersion<Params> params;
};
uint8_t getMethodByte() const override; uint8_t getMethodByte() const override;
void updateHash(SipHash & hash) const override; void updateHash(SipHash & hash) const override;
@ -66,44 +115,24 @@ namespace DB
return false; return false;
} }
bool isPostProcessing() const override bool isEncryption() const override
{ {
return true; return true;
} }
protected:
protected:
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
/// Encrypt data with chosen method.
/// Throws exception if encryption is impossible or size of encrypted text is incorrect
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
/// Decrypt data with chosen method
/// Throws exception if decryption is impossible or size of decrypted text is incorrect
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
private:
EncryptionMethod encryption_method;
};
private:
static std::string lastErrorString();
static std::string deriveKey(const std::string_view & master_key);
static void encrypt(const std::string_view & plaintext, char * ciphertext_and_tag);
static void decrypt(const std::string_view & ciphertext_and_tag, char * plaintext);
/** A private class that holds keys derived from the master
* key.
*/
struct KeyHolder : private boost::noncopyable
{
explicit KeyHolder(const std::string_view & master_key);
~KeyHolder();
std::string keygen_key;
EVP_AEAD_CTX ctx;
};
static const KeyHolder & getKeys();
static inline std::optional<KeyHolder> keys;
};
inline CompressionCodecPtr getCompressionCodecEncrypted(const std::string_view & master_key)
{
return std::make_shared<CompressionCodecEncrypted>(master_key);
}
} }
#endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */ #endif /* NOT Arcadia_build */

View File

@ -62,7 +62,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
bool is_compression = false; bool is_compression = false;
bool has_none = false; bool has_none = false;
std::optional<size_t> generic_compression_codec_pos; std::optional<size_t> generic_compression_codec_pos;
std::set<size_t> post_processing_codecs; std::set<size_t> encryption_codecs;
bool can_substitute_codec_arguments = true; bool can_substitute_codec_arguments = true;
for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i) for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i)
@ -141,8 +141,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
if (!generic_compression_codec_pos && result_codec->isGenericCompression()) if (!generic_compression_codec_pos && result_codec->isGenericCompression())
generic_compression_codec_pos = i; generic_compression_codec_pos = i;
if (result_codec->isPostProcessing()) if (result_codec->isEncryption())
post_processing_codecs.insert(i); encryption_codecs.insert(i);
} }
String codec_description = queryToString(codecs_descriptions); String codec_description = queryToString(codecs_descriptions);
@ -157,8 +157,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
/// Allow to explicitly specify single NONE codec if user don't want any compression. /// Allow to explicitly specify single NONE codec if user don't want any compression.
/// But applying other transformations solely without compression (e.g. Delta) does not make sense. /// But applying other transformations solely without compression (e.g. Delta) does not make sense.
/// It's okay to apply post-processing codecs solely without anything else. /// It's okay to apply encryption codecs solely without anything else.
if (!is_compression && !has_none && post_processing_codecs.size() != codecs_descriptions->children.size()) if (!is_compression && !has_none && encryption_codecs.size() != codecs_descriptions->children.size())
throw Exception( throw Exception(
"Compression codec " + codec_description "Compression codec " + codec_description
+ " does not compress anything." + " does not compress anything."
@ -168,10 +168,10 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", " (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
ErrorCodes::BAD_ARGUMENTS); ErrorCodes::BAD_ARGUMENTS);
/// It does not make sense to apply any non-post-processing codecs /// It does not make sense to apply any non-encryption codecs
/// after post-processing one. /// after encryption one.
if (!post_processing_codecs.empty() && if (!encryption_codecs.empty() &&
*post_processing_codecs.begin() != codecs_descriptions->children.size() - post_processing_codecs.size()) *encryption_codecs.begin() != codecs_descriptions->children.size() - encryption_codecs.size())
throw Exception("The combination of compression codecs " + codec_description + " is meaningless," throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
" because it does not make sense to apply any non-post-processing codecs after" " because it does not make sense to apply any non-post-processing codecs after"
" post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'" " post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'"
@ -180,7 +180,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
/// It does not make sense to apply any transformations after generic compression algorithm /// It does not make sense to apply any transformations after generic compression algorithm
/// So, generic compression can be only one and only at the end. /// So, generic compression can be only one and only at the end.
if (generic_compression_codec_pos && if (generic_compression_codec_pos &&
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - post_processing_codecs.size()) *generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs.size())
throw Exception("The combination of compression codecs " + codec_description + " is meaningless," throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
" because it does not make sense to apply any transformations after generic compression algorithm." " because it does not make sense to apply any transformations after generic compression algorithm."
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS); " (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);

View File

@ -43,7 +43,8 @@ enum class CompressionMethodByte : uint8_t
T64 = 0x93, T64 = 0x93,
DoubleDelta = 0x94, DoubleDelta = 0x94,
Gorilla = 0x95, Gorilla = 0x95,
Encrypted = 0x96, AES_128_GCM_SIV = 0x96,
AES_256_GCM_SIV = 0x97
}; };
} }

View File

@ -76,7 +76,7 @@ public:
virtual bool isGenericCompression() const = 0; virtual bool isGenericCompression() const = 0;
/// If it is a post-processing codec such as encryption. Usually it does not make sense to apply non-post-processing codecs after this. /// If it is a post-processing codec such as encryption. Usually it does not make sense to apply non-post-processing codecs after this.
virtual bool isPostProcessing() const { return false; } virtual bool isEncryption() const { return false; }
/// It is a codec available only for evaluation purposes and not meant to be used in production. /// It is a codec available only for evaluation purposes and not meant to be used in production.
/// It will not be allowed to use unless the user will turn off the safety switch. /// It will not be allowed to use unless the user will turn off the safety switch.

View File

@ -1,49 +1,301 @@
#include <cstddef>
#include <cstdint>
#include <iostream> #include <iostream>
#include <memory>
#include <string> #include <string>
#include <Compression/ICompressionCodec.h> #include <Compression/ICompressionCodec.h>
#include <Compression/CompressionCodecEncrypted.h> #include <Compression/CompressionCodecEncrypted.h>
#include <IO/BufferWithOwnMemory.h> #include <IO/BufferWithOwnMemory.h>
#include <Poco/DOM/AutoPtr.h>
#include <Poco/DOM/Document.h>
#include <Poco/DOM/Element.h>
#include <Poco/DOM/Text.h>
#include <Poco/NumericString.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Util/XMLConfiguration.h>
#include "Common/Exception.h"
namespace DB inline DB::CompressionCodecPtr getCompressionCodecEncrypted(DB::EncryptionMethod Method)
{ {
CompressionCodecPtr getCompressionCodecEncrypted(const std::string_view & master_key); return std::make_shared<DB::CompressionCodecEncrypted>(Method);
} }
constexpr size_t key_size = 20; namespace
struct AuxiliaryRandomData
{ {
char key[key_size];
size_t decompressed_size; using namespace Poco;
using namespace Poco::XML;
/*
Fuzzing data consists of:
first byte:
1) length of nonce is in data (for correct work with wrong data from user)
2) length of key is in data (for correct work with wrong data from user)
3) is 128 turn on
4) multiple keys for 128
5) is 256 turn on
6) multiple keys for 256
7) nonce is set
8) current_key is set
read_key() will cosist of following steps:
if (2):
read 4 byte to know size
if multiple_keys:
read id
else:
size is chosen correctly according to algorithm
read size_bytes as a key
next bytes will have information in this order:
if (3):
if (4):
read count
for i in range(count):
read_key()
else:
read_key()
if (7):
read_nonce (simillar to read_key)
if (8):
set current_key
same for AES_256_GCM_SIV with (5) and (6) instead of (3) and (4)
This class read data and generate xml documentation.
*/
class XMLGenerator
{
public:
XMLGenerator(const uint8_t * data, size_t& size);
/// Try to generate config from input data using algorithm, which is described before class declaration
void generate();
/// Size of part, which was used on generating config
size_t keySize() const;
/// Get config
const Poco::AutoPtr<Poco::Util::XMLConfiguration>& getResult() const;
/// If something happened in generator, it will be true
bool hasError() const;
private:
/// generate algorithm section with key and nonce
bool generateAlgorithmKeys(AutoPtr<Poco::XML::Element>& document_root, std::string name,
uint8_t mask_for_algo, uint8_t mask_for_multiple_keys);
/// move on count bytes stream and increase counter
/// returns false if some errors occuried
bool next(ssize_t count=1);
/// Create a key from data
ssize_t generateKey(std::string name, bool multiple=false);
const uint8_t * data;
size_t start_size;
size_t keys_size;
AutoPtr<Poco::XML::Document> xml_document;
AutoPtr<Poco::XML::Element> algo;
AutoPtr<Poco::Util::XMLConfiguration> conf;
uint8_t first_byte;
bool error;
}; };
XMLGenerator::XMLGenerator(const uint8_t * Data, size_t& Size): data(Data), start_size(Size),
conf(new Poco::Util::XMLConfiguration()), error(false) {}
size_t XMLGenerator::keySize() const { return keys_size; }
const Poco::AutoPtr<Poco::Util::XMLConfiguration>& XMLGenerator::getResult() const { return conf; }
bool XMLGenerator::hasError() const { return error; }
bool XMLGenerator::next(ssize_t count)
{
/// If negative step - something went wrong
if (count == -1)
{
error = true;
return false;
}
/// move data and increase counter
keys_size += count;
/// If get after eof
if (keys_size >= start_size)
{
error = true;
return false;
}
data += count;
return true;
}
/*
<Key>key</key>
or
<key id=..>key</key>
*/
ssize_t XMLGenerator::generateKey(std::string name, bool multiple)
{
/// set traditional key size for algorithms
uint64_t size = 0;
if (name == "aes_128_gcm_siv")
size = 16;
if (name == "aes_256_gcm_siv")
size = 32;
/// try to read size from data
if (first_byte & 0x40)
{
size = *(reinterpret_cast<const uint64_t*>(data));
if (!next(8))
return -1;
}
/// if it is not defined, leave
if (!size)
return -1;
AutoPtr<Poco::XML::Element> key_holder;
if (multiple)
{
/// multiple keys have ids.
uint64_t id = *(reinterpret_cast<const uint64_t*>(data));
if (!next(8))
return -1;
key_holder = xml_document->createElement("key[id=" + std::to_string(id) + "]");
}
else
{
key_holder = xml_document->createElement("key");
}
AutoPtr<Text> key(xml_document->createTextNode(std::string(data, data + size)));
key_holder->appendChild(key);
algo->appendChild(key_holder);
if (!next(size))
return -1;
return size;
}
bool XMLGenerator::generateAlgorithmKeys(
AutoPtr<Poco::XML::Element>& document_root, std::string name, uint8_t mask_for_algo, uint8_t mask_for_multiple_keys)
{
/// check if algorithm is enabled, then add multiple keys or single key
if (first_byte & mask_for_algo)
{
algo = xml_document->createElement(name);
document_root->appendChild(algo);
if (first_byte & mask_for_multiple_keys)
{
uint64_t count = *(reinterpret_cast<const uint64_t*>(data));
if (!next(8))
return false;
for (size_t i = 0; i < count; ++i)
{
if (!next(generateKey(name)))
return false;
}
}
else
{
if (!next(generateKey(name)))
return false;
}
}
/// add nonce
if (first_byte & 0x02)
{
uint64_t nonce_size = 12;
if (first_byte & 0x80)
{
nonce_size = *(reinterpret_cast<const uint64_t*>(data));
if (!next(8))
return false;
}
AutoPtr<Poco::XML::Element> nonce_holder(xml_document->createElement("nonce"));
AutoPtr<Text> nonce(xml_document->createTextNode(std::string(data, data + nonce_size)));
nonce_holder->appendChild(nonce);
algo->appendChild(nonce_holder);
}
/// add current key id
if (first_byte & 0x01)
{
uint64_t current_key = *(reinterpret_cast<const uint64_t*>(data));
if (!next(8))
return false;
AutoPtr<Poco::XML::Element> cur_key_holder(xml_document->createElement("nonce"));
AutoPtr<Text> cur_key(xml_document->createTextNode(std::to_string(current_key)));
cur_key_holder->appendChild(cur_key);
algo->appendChild(cur_key_holder);
}
return true;
}
void XMLGenerator::generate()
{
AutoPtr<Poco::XML::Element> document_root(xml_document->createElement("encryption_codecs"));
xml_document->appendChild(document_root);
/// read first byte for parsing
first_byte = *data;
if (!next())
return;
if (!generateAlgorithmKeys(document_root, "aes_128_gmc_siv", 0x20, 0x10))
return;
if (!generateAlgorithmKeys(document_root, "aes_256_gmc_siv", 0x08, 0x04))
return;
conf->load(xml_document);
}
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
try try
{ {
if (size < sizeof(AuxiliaryRandomData)) XMLGenerator generator(data, size);
generator.generate();
if (generator.hasError())
return 0; return 0;
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data); auto config = generator.getResult();
auto codec_128 = getCompressionCodecEncrypted(DB::AES_128_GCM_SIV);
auto codec_256 = getCompressionCodecEncrypted(DB::AES_256_GCM_SIV);
DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "");
std::string key = std::string(p->key, key_size); size_t data_size = size - generator.keySize();
auto codec = DB::getCompressionCodecEncrypted(key);
size_t output_buffer_size = p->decompressed_size % 65536; std::string input = std::string(reinterpret_cast<const char*>(data), data_size);
size -= sizeof(AuxiliaryRandomData); fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, data_size, input.size() - 31);
data += sizeof(AuxiliaryRandomData) / sizeof(uint8_t);
std::string input = std::string(reinterpret_cast<const char*>(data), size);
fmt::print(stderr, "Using input {} of size {}, output size is {}. \n", input, size, output_buffer_size);
if (output_buffer_size < size)
return 0;
DB::Memory<> memory; DB::Memory<> memory;
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
return 0; return 0;
} }
catch (...) catch (...)

View File

@ -35,6 +35,11 @@ SRCS(
CompressionFactoryAdditions.cpp CompressionFactoryAdditions.cpp
ICompressionCodec.cpp ICompressionCodec.cpp
LZ4_decompress_faster.cpp LZ4_decompress_faster.cpp
fuzzers/compressed_buffer_fuzzer.cpp
fuzzers/delta_decompress_fuzzer.cpp
fuzzers/double_delta_decompress_fuzzer.cpp
fuzzers/encrypted_decompress_fuzzer.cpp
fuzzers/lz4_decompress_fuzzer.cpp
getCompressionCodecForFile.cpp getCompressionCodecForFile.cpp
) )

View File

@ -1,6 +1,10 @@
<yandex> <yandex>
<encryption> <encryption_codecs>
<!-- "Some fixed key that is at least 16 bytes long" in Base64 --> <aes_128_gcm_siv>
<key_command>echo "U29tZSBmaXhlZCBrZXkgdGhhdCBpcyBhdCBsZWFzdCAxNiBieXRlcyBsb25n"</key_command> <key_hex>a32902703dab1cedd7ff7287067787ca</key_hex>
</encryption> </aes_128_gcm_siv>
<aes_256_gcm_siv>
<key_hex>a32902703dab1cedd7ff7287067787caa32902703dab1cedd7ff7287067787ca</key_hex>
</aes_256_gcm_siv>
</encryption_codecs>
</yandex> </yandex>

View File

@ -0,0 +1,73 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.client import QueryRuntimeException
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node')
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def make_storage_with_key(id):
node.exec_in_container(["bash", "-c" , """cat > /etc/clickhouse-server/config.d/storage_keys_config.xml << EOF
<?xml version="1.0"?>
<yandex>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex id="0">83e84e9a4eb11535c0670dc62d808ee0</key_hex>
<key id="1">abcdefghijklmnop</key>
<current_key_id>{cur_id}</current_key_id>
</aes_128_gcm_siv>
<aes_256_gcm_siv>
<key_hex id="0">83e84e9a4eb11535c0670dc62d808ee083e84e9a4eb11535c0670dc62d808ee0</key_hex>
<key id="1">abcdefghijklmnopabcdefghijklmnop</key>
<current_key_id>{cur_id}</current_key_id>
</aes_256_gcm_siv>
</encryption_codecs>
</yandex>
EOF""".format(cur_id=id)])
node.query("SYSTEM RELOAD CONFIG")
def test_different_keys(start_cluster):
make_storage_with_key(0)
node.query("""
CREATE TABLE encrypted_test_128 (
id Int64,
data String Codec(AES_128_GCM_SIV)
) ENGINE=MergeTree()
ORDER BY id
""")
node.query("""
CREATE TABLE encrypted_test_256 (
id Int64,
data String Codec(AES_256_GCM_SIV)
) ENGINE=MergeTree()
ORDER BY id
""")
node.query("INSERT INTO encrypted_test_128 VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test_128 ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
make_storage_with_key(1)
node.query("INSERT INTO encrypted_test_128 VALUES (3,'text'),(4,'text')")
select_query = "SELECT * FROM encrypted_test_128 ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data'),(3,'text'),(4,'text')"
node.query("INSERT INTO encrypted_test_256 VALUES (0,'data'),(1,'data')")
select_query = "SELECT * FROM encrypted_test_256 ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data')"
make_storage_with_key(1)
node.query("INSERT INTO encrypted_test_256 VALUES (3,'text'),(4,'text')")
select_query = "SELECT * FROM encrypted_test_256 ORDER BY id FORMAT Values"
assert node.query(select_query) == "(0,'data'),(1,'data'),(3,'text'),(4,'text')"

View File

@ -1 +1,2 @@
1 Some plaintext 1 Some plaintext
1 Some plaintext

View File

@ -2,7 +2,14 @@
-- Tag no-fasttest: Depends on OpenSSL -- Tag no-fasttest: Depends on OpenSSL
DROP TABLE IF EXISTS encryption_test; DROP TABLE IF EXISTS encryption_test;
CREATE TABLE encryption_test (i Int, s String Codec(Encrypted('AES-128-GCM-SIV'))) ENGINE = MergeTree ORDER BY i; CREATE TABLE encryption_test (i Int, s String Codec(AES_128_GCM_SIV)) ENGINE = MergeTree ORDER BY i;
INSERT INTO encryption_test VALUES (1, 'Some plaintext');
SELECT * FROM encryption_test;
DROP TABLE encryption_test;
CREATE TABLE encryption_test (i Int, s String Codec(AES_256_GCM_SIV)) ENGINE = MergeTree ORDER BY i;
INSERT INTO encryption_test VALUES (1, 'Some plaintext'); INSERT INTO encryption_test VALUES (1, 'Some plaintext');
SELECT * FROM encryption_test; SELECT * FROM encryption_test;