ClickHouse/src/Compression/CompressionCodecEncrypted.cpp
Filatenkov Artur c23fe5baf6
Improve codec for encr 19896 (#27476)
* change syntax of encrypted command

* commit all encrypted changes

* correct encryption

* correct config for test

* add tests and correct code style and typos

* correct test

* fix unbundled build

* add log warning messages

* improve code according to review comments

* correct nonce

* correct errors found by fuzzing

* improve codec AES_128_GCM_SIV. Add AES_256_GCM_SIV. Add sections for last in tests. Improve documentation

* Update CompressionCodecEncrypted.h

* Update 01683_codec_encrypted.sql

* correct compression factory after changes in master

* correct behavior with wrong key in data

* correct fuzzer

* add connection for fuzzer with fix for compression_encrypted

* refactor code

* add load from config with throwing errors on server start

* fix typos and check style

* Update Server.cpp

* correct loading and reading

* refactor code. fix uninitialized value

* refactor code

* move defines from server to cpp file

* correct build

* remove repeated code

* correct namespace

* fix code style
2021-09-13 11:25:36 +03:00

554 lines
21 KiB
C++

#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include "Common/Exception.h"
#include "common/types.h"
#include "IO/VarInt.h"
#include <Compression/CompressionFactory.h>
#include <Compression/CompressionCodecEncrypted.h>
#include <Poco/Logger.h>
#include <common/logger_useful.h>
#include <Common/ErrorCodes.h>
// This depends on BoringSSL-specific API, notably <openssl/aead.h>.
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY
#include <Parsers/ASTLiteral.h>
#include <openssl/digest.h> // Y_IGNORE
#include <openssl/err.h>
#include <boost/algorithm/hex.hpp>
#include <openssl/aead.h> // Y_IGNORE
#endif
// Common part for both parts (with SSL and without)
namespace DB
{
namespace ErrorCodes
{
extern const int OPENSSL_ERROR;
}
namespace
{
/// Get string name for method. Return empty string for undefined Method
String getMethodName(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{
return "AES_128_GCM_SIV";
}
else if (Method == AES_256_GCM_SIV)
{
return "AES_256_GCM_SIV";
}
else
{
return "";
}
}
/// Get method code (used for codec, to understand which one we are using)
uint8_t getMethodCode(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{
return uint8_t(CompressionMethodByte::AES_128_GCM_SIV);
}
else if (Method == AES_256_GCM_SIV)
{
return uint8_t(CompressionMethodByte::AES_256_GCM_SIV);
}
else
{
throw Exception("Wrong encryption Method. Got " + getMethodName(Method), ErrorCodes::BAD_ARGUMENTS);
}
}
} // end of namespace
} // end of namespace DB
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
namespace
{
constexpr size_t tag_size = 16; /// AES-GCM-SIV always uses a tag of 16 bytes length
constexpr size_t key_id_max_size = 8; /// Max size of varint.
constexpr size_t nonce_max_size = 13; /// Nonce size and one byte to show if nonce in in text
/// Get encryption/decryption algorithms.
auto getMethod(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{
return EVP_aead_aes_128_gcm_siv;
}
else if (Method == AES_256_GCM_SIV)
{
return EVP_aead_aes_256_gcm_siv;
}
else
{
throw Exception("Wrong encryption Method. Got " + getMethodName(Method), ErrorCodes::BAD_ARGUMENTS);
}
}
/// Find out key size for each algorithm
UInt64 methodKeySize(EncryptionMethod Method)
{
if (Method == AES_128_GCM_SIV)
{
return 16;
}
else if (Method == AES_256_GCM_SIV)
{
return 32;
}
else
{
throw Exception("Wrong encryption Method. Got " + getMethodName(Method), ErrorCodes::BAD_ARGUMENTS);
}
}
std::string lastErrorString()
{
std::array<char, 1024> buffer;
ERR_error_string_n(ERR_get_error(), buffer.data(), buffer.size());
return std::string(buffer.data());
}
/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
/// This function get key and nonce and encrypt text with their help.
/// If something went wrong (can't init context or can't encrypt data) it throws exception.
/// It returns length of encrypted text.
size_t encrypt(const std::string_view & plaintext, char * ciphertext_and_tag, EncryptionMethod method, const String & key, const String & nonce)
{
/// Init context for encryption, using key.
EVP_AEAD_CTX encrypt_ctx;
EVP_AEAD_CTX_zero(&encrypt_ctx);
const int ok_init = EVP_AEAD_CTX_init(&encrypt_ctx, getMethod(method)(),
reinterpret_cast<const uint8_t*>(key.data()), key.size(),
16 /* tag size */, nullptr);
if (!ok_init)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
/// encrypt data using context and given nonce.
size_t out_len;
const int ok_open = EVP_AEAD_CTX_seal(&encrypt_ctx,
reinterpret_cast<uint8_t *>(ciphertext_and_tag),
&out_len, plaintext.size() + 16,
reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(),
reinterpret_cast<const uint8_t *>(plaintext.data()), plaintext.size(),
nullptr, 0);
if (!ok_open)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
return out_len;
}
/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
/// This function get key and nonce and encrypt text with their help.
/// If something went wrong (can't init context or can't encrypt data) it throws exception.
/// It returns length of encrypted text.
size_t decrypt(const std::string_view & ciphertext, char * plaintext, EncryptionMethod method, const String & key, const String & nonce)
{
/// Init context for decryption with given key.
EVP_AEAD_CTX decrypt_ctx;
EVP_AEAD_CTX_zero(&decrypt_ctx);
const int ok_init = EVP_AEAD_CTX_init(&decrypt_ctx, getMethod(method)(),
reinterpret_cast<const uint8_t*>(key.data()), key.size(),
16 /* tag size */, nullptr);
if (!ok_init)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
/// decrypt data using given nonce
size_t out_len;
const int ok_open = EVP_AEAD_CTX_open(&decrypt_ctx,
reinterpret_cast<uint8_t *>(plaintext),
&out_len, ciphertext.size(),
reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(),
reinterpret_cast<const uint8_t *>(ciphertext.data()), ciphertext.size(),
nullptr, 0);
if (!ok_open)
throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
return out_len;
}
/// Register codec in factory
void registerEncryptionCodec(CompressionCodecFactory & factory, EncryptionMethod Method)
{
const auto method_code = getMethodCode(Method); /// Codec need to know its code
factory.registerCompressionCodec(getMethodName(Method), method_code, [&, Method](const ASTPtr & arguments) -> CompressionCodecPtr
{
if (arguments)
{
if (!arguments->children.empty())
throw Exception("Codec " + getMethodName(Method) + " must not have parameters, given " +
std::to_string(arguments->children.size()),
ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
}
return std::make_shared<CompressionCodecEncrypted>(Method);
});
}
String unhexKey(const String & hex)
{
try
{
return boost::algorithm::unhex(hex);
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length");
}
}
/// Firstly, write a byte, which shows if the nonce will be put in text (if it was defined in config)
/// Secondly, write nonce in text (this step depends from first step)
/// return new position to write
inline char* writeNonce(const String& nonce, char* dest)
{
/// If nonce consists of nul bytes, it shouldn't be in dest. Zero byte is the only byte that should be written.
/// Otherwise, 1 is written and data from nonce is copied
if (nonce != String("\0\0\0\0\0\0\0\0\0\0\0\0", 12))
{
*dest = 1;
++dest;
size_t copied_symbols = nonce.copy(dest, nonce.size());
if (copied_symbols != nonce.size())
throw Exception(ErrorCodes::INCORRECT_DATA, "Can't copy nonce into destination. Count of copied symbols {}, need to copy {}", copied_symbols, nonce.size());
dest += copied_symbols;
return dest;
}
else
{
*dest = 0;
return ++dest;
}
}
/// Firstly, read a byte, which shows if the nonce will be put in text (if it was defined in config)
/// Secondly, read nonce in text (this step depends from first step)
/// return new position to read
inline const char* readNonce(String& nonce, const char* source)
{
/// If first is zero byte: move source and set zero-bytes nonce
if (!*source)
{
nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", 12};
return ++source;
}
/// Move to next byte. Nonce will begin from there
++source;
/// Otherwise, use data from source in nonce
nonce = {source, 12};
source += 12;
return source;
}
}
CompressionCodecEncrypted::Configuration & CompressionCodecEncrypted::Configuration::instance()
{
static CompressionCodecEncrypted::Configuration ret;
return ret;
}
void CompressionCodecEncrypted::Configuration::loadImpl(
const Poco::Util::AbstractConfiguration & config, const String & config_prefix, EncryptionMethod method, std::unique_ptr<Params> & new_params)
{
// if method is not smaller than MAX_ENCRYPTION_METHOD it is incorrect
if (method >= MAX_ENCRYPTION_METHOD)
throw Exception("Wrong argument for loading configurations.", ErrorCodes::BAD_ARGUMENTS);
/// Scan all keys in config and add them into storage. If key is in hex, transform it.
/// Remember key ID for each key, because it will be used in encryption/decryption
Strings config_keys;
config.keys(config_prefix, config_keys);
for (const std::string & config_key : config_keys)
{
String key;
UInt64 key_id;
if ((config_key == "key") || config_key.starts_with("key["))
{
key = config.getString(config_prefix + "." + config_key, "");
key_id = config.getUInt64(config_prefix + "." + config_key + "[@id]", 0);
}
else if ((config_key == "key_hex") || config_key.starts_with("key_hex["))
{
key = unhexKey(config.getString(config_prefix + "." + config_key, ""));
key_id = config.getUInt64(config_prefix + "." + config_key + "[@id]", 0);
}
else
continue;
/// For each key its id should be unique.
if (new_params->keys_storage[method].contains(key_id))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple keys have the same ID {}", key_id);
/// Check size of key. Its length depends on encryption algorithm.
if (key.size() != methodKeySize(method))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Got an encryption key with unexpected size {}, the size should be {}",
key.size(), methodKeySize(method));
new_params->keys_storage[method][key_id] = key;
}
/// Check that we have at least one key for this method (otherwise it is incorrect to use it).
if (new_params->keys_storage[method].empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No keys, an encryption needs keys to work");
/// Try to find which key will be used for encryption. If there is no current_key,
/// first key will be used for encryption (its index equals to zero).
new_params->current_key_id[method] = config.getUInt64(config_prefix + ".current_key_id", 0);
/// Check that we have current key. Otherwise config is incorrect.
if (!new_params->keys_storage[method].contains(new_params->current_key_id[method]))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found a key with the current ID {}", new_params->current_key_id[method]);
/// Read nonce (in hex or in string). Its length should be 12 bytes.
if (config.has(config_prefix + ".nonce_hex"))
new_params->nonce[method] = unhexKey(config.getString(config_prefix + ".nonce_hex"));
else
new_params->nonce[method] = config.getString(config_prefix + ".nonce", "");
if (new_params->nonce[method].size() != 12 && !new_params->nonce[method].empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got nonce with unexpected size {}, the size should be 12", new_params->nonce[method].size());
}
bool CompressionCodecEncrypted::Configuration::tryLoad(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
/// Try to create new parameters and fill them from config.
/// If there will be some errors, print their message to notify user that
/// something went wrong and new parameters are not available
try
{
load(config, config_prefix);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
return false;
}
return true;
}
void CompressionCodecEncrypted::Configuration::load(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
/// Try to create new parameters and fill them from config.
/// If there will be some errors, throw error
std::unique_ptr<Params> new_params(new Params);
if (config.has(config_prefix + ".aes_128_gcm_siv"))
{
loadImpl(config, config_prefix + ".aes_128_gcm_siv", AES_128_GCM_SIV, new_params);
}
if (config.has(config_prefix + ".aes_256_gcm_siv"))
{
loadImpl(config, config_prefix + ".aes_256_gcm_siv", AES_256_GCM_SIV, new_params);
}
params.set(std::move(new_params));
}
void CompressionCodecEncrypted::Configuration::getCurrentKeyAndNonce(EncryptionMethod method, UInt64 & current_key_id, String &current_key, String & nonce) const
{
/// It parameters were not set, throw exception
if (!params.get())
throw Exception("Empty params in CompressionCodecEncrypted configuration", ErrorCodes::BAD_ARGUMENTS);
/// Save parameters in variable, because they can always change.
/// As this function not atomic, we should be certain that we get information from one particular version for correct work.
const auto current_params = params.get();
current_key_id = current_params->current_key_id[method];
/// As parameters can be created empty, we need to check that this key is available.
if (current_params->keys_storage[method].contains(current_key_id))
current_key = current_params->keys_storage[method].at(current_key_id);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no current_key {} in config. Please, put it in config and reload.", current_key_id);
/// If there is no nonce in config, we need to generate particular one,
/// because all encryptions should have nonce and random nonce generation will lead to cases
/// when nonce after config reload (nonce is not defined in config) will differ from previously generated one.
/// This will lead to data loss.
nonce = current_params->nonce[method];
if (nonce.empty())
nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", 12};
}
String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method, const UInt64 & key_id) const
{
String key;
/// See description of previous finction, logic is the same.
if (!params.get())
throw Exception("Empty params in CompressionCodecEncrypted configuration", ErrorCodes::BAD_ARGUMENTS);
const auto current_params = params.get();
/// check if there is current key in storage
if (current_params->keys_storage[method].contains(key_id))
key = current_params->keys_storage[method].at(key_id);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id);
return key;
}
CompressionCodecEncrypted::CompressionCodecEncrypted(EncryptionMethod Method): encryption_method(Method)
{
setCodecDescription(getMethodName(encryption_method));
}
uint8_t CompressionCodecEncrypted::getMethodByte() const
{
return getMethodCode(encryption_method);
}
void CompressionCodecEncrypted::updateHash(SipHash & hash) const
{
getCodecDesc()->updateTreeHash(hash);
}
UInt32 CompressionCodecEncrypted::getMaxCompressedDataSize(UInt32 uncompressed_size) const
{
// The GCM mode is a stream cipher. No paddings are
// involved. There will be a tag at the end of ciphertext (16
// octets). Also it has not more than 8 bytes for key_id in the beginning
// KeyID is followed by byte, that shows if nonce was set in config (and also will be put into data)
// and 12 bytes nonce or this byte will be equal to zero and no nonce will follow it.
return uncompressed_size + tag_size + key_id_max_size + nonce_max_size;
}
UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 source_size, char * dest) const
{
// Generate an IV out of the data block and the key-generation
// key. It is completely deterministic, but does not leak any
// information about the data block except for equivalence of
// identical blocks (under the same key).
const std::string_view plaintext = std::string_view(source, source_size);
/// Get key and nonce for encryption
UInt64 current_key_id;
String current_key, nonce;
Configuration::instance().getCurrentKeyAndNonce(encryption_method, current_key_id, current_key, nonce);
/// Write current key id to support multiple keys.
/// (key id in the beginning will help to decrypt data after changing current key)
char* ciphertext_with_nonce = writeVarUInt(current_key_id, dest);
size_t keyid_size = ciphertext_with_nonce - dest;
/// write nonce in data. This will help to read data even after changing nonce in config
/// If there were no nonce in data, one zero byte will be written
char* ciphertext = writeNonce(nonce, ciphertext_with_nonce);
UInt64 nonce_size = ciphertext - ciphertext_with_nonce;
// The IV will be used as an authentication tag. The ciphertext and the
// tag will be written directly in the dest buffer.
size_t out_len = encrypt(plaintext, ciphertext, encryption_method, current_key, nonce);
/// Length of encrypted text should be equal to text length plus tag_size (which was added by algorithm).
if (out_len != source_size + tag_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't encrypt data, length after encryption {} is wrong, expected {}", out_len, source_size + tag_size);
return out_len + keyid_size + nonce_size;
}
void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
/// The key is needed for decrypting. That's why it is read at the beginning of process.
UInt64 key_id;
const char * ciphertext_with_nonce = readVarUInt(key_id, source, source_size);
/// Size of text should be decreased by key_size, because key_size bytes were not participating in encryption process.
size_t keyid_size = ciphertext_with_nonce - source;
String nonce;
String key = Configuration::instance().getKey(encryption_method, key_id);
/// try to read nonce from file (if it was set while encrypting)
const char * ciphertext = readNonce(nonce, ciphertext_with_nonce);
/// Size of text should be decreased by nonce_size, because nonce_size bytes were not participating in encryption process.
UInt64 nonce_size = ciphertext - ciphertext_with_nonce;
/// Count text size (nonce and key_id was read from source)
size_t ciphertext_size = source_size - keyid_size - nonce_size;
if (ciphertext_size != uncompressed_size + tag_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't decrypt data, uncompressed_size {} is wrong, expected {}", uncompressed_size, ciphertext_size - tag_size);
size_t out_len = decrypt({ciphertext, ciphertext_size}, dest, encryption_method, key, nonce);
if (out_len != ciphertext_size - tag_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't decrypt data, out length after decryption {} is wrong, expected {}", out_len, ciphertext_size - tag_size);
}
}
#else /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */
namespace DB
{
namespace
{
/// Register codec in factory
void registerEncryptionCodec(CompressionCodecFactory & factory, EncryptionMethod Method)
{
auto throw_no_ssl = [](const ASTPtr &) -> CompressionCodecPtr { throw Exception(ErrorCodes::OPENSSL_ERROR, "Server was built without SSL support. Encryption is disabled."); };
const auto method_code = getMethodCode(Method); /// Codec need to know its code
factory.registerCompressionCodec(getMethodName(Method), method_code, throw_no_ssl);
}
}
CompressionCodecEncrypted::Configuration & CompressionCodecEncrypted::Configuration::instance()
{
static CompressionCodecEncrypted::Configuration ret;
return ret;
}
/// if encryption is disabled.
bool CompressionCodecEncrypted::Configuration::tryLoad(const Poco::Util::AbstractConfiguration & config [[maybe_unused]], const String & config_prefix [[maybe_unused]])
{
return false;
}
/// if encryption is disabled, print warning about this.
void CompressionCodecEncrypted::Configuration::load(const Poco::Util::AbstractConfiguration & config [[maybe_unused]], const String & config_prefix [[maybe_unused]])
{
LOG_WARNING(&Poco::Logger::get("CompressionCodecEncrypted"), "Server was built without SSL support. Encryption is disabled.");
}
}
#endif /* USE_SSL && USE_INTERNAL_SSL_LIBRARY */
namespace DB
{
/// Register codecs for all algorithms
void registerCodecEncrypted(CompressionCodecFactory & factory)
{
registerEncryptionCodec(factory, AES_128_GCM_SIV);
registerEncryptionCodec(factory, AES_256_GCM_SIV);
}
}