Merge pull request #42484 from vitlibar/mask-sensitive-info-in-logs

Mask some information in logs
This commit is contained in:
Vitaly Baranov 2022-11-04 14:09:38 +01:00 committed by GitHub
commit 52b1f4aed9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 1346 additions and 264 deletions

View File

@ -1336,17 +1336,13 @@
name - name for the rule (optional)
regexp - RE2 compatible regular expression (mandatory)
replace - substitution string for sensitive data (optional, by default - six asterisks)
-->
<query_masking_rules>
<rule>
<name>hide encrypt/decrypt arguments</name>
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
<!-- or more secure, but also more invasive:
(aes_\w+)\s*\(.*\)
-->
<replace>\1(???)</replace>
</rule>
</query_masking_rules>
</query_masking_rules> -->
<!-- Uncomment to use custom http handlers.
rules are checked from top to bottom, first match runs the handler

View File

@ -138,12 +138,12 @@ private:
BackupImpl::BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupReader> reader_,
const ContextPtr & context_)
: backup_name(backup_name_)
: backup_name_for_logging(backup_name_for_logging_)
, archive_params(archive_params_)
, use_archives(!archive_params.archive_name.empty())
, open_mode(OpenMode::READ)
@ -158,7 +158,7 @@ BackupImpl::BackupImpl(
BackupImpl::BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupWriter> writer_,
@ -166,7 +166,7 @@ BackupImpl::BackupImpl(
bool is_internal_backup_,
const std::shared_ptr<IBackupCoordination> & coordination_,
const std::optional<UUID> & backup_uuid_)
: backup_name(backup_name_)
: backup_name_for_logging(backup_name_for_logging_)
, archive_params(archive_params_)
, use_archives(!archive_params.archive_name.empty())
, open_mode(OpenMode::WRITE)
@ -225,10 +225,19 @@ void BackupImpl::open(const ContextPtr & context)
base_backup = BackupFactory::instance().createBackup(params);
if (open_mode == OpenMode::WRITE)
{
base_backup_uuid = base_backup->getUUID();
}
else if (base_backup_uuid != base_backup->getUUID())
throw Exception(ErrorCodes::WRONG_BASE_BACKUP, "Backup {}: The base backup {} has different UUID ({} != {})",
backup_name, base_backup->getName(), toString(base_backup->getUUID()), (base_backup_uuid ? toString(*base_backup_uuid) : ""));
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: The base backup {} has different UUID ({} != {})",
backup_name_for_logging,
base_backup->getNameForLogging(),
toString(base_backup->getUUID()),
(base_backup_uuid ? toString(*base_backup_uuid) : ""));
}
}
}
@ -349,14 +358,14 @@ void BackupImpl::readBackupMetadata()
if (use_archives)
{
if (!reader->fileExists(archive_params.archive_name))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
setCompressedSize();
in = getArchiveReader("")->readFile(".backup");
}
else
{
if (!reader->fileExists(".backup"))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
in = reader->readFile(".backup");
}
@ -369,7 +378,8 @@ void BackupImpl::readBackupMetadata()
version = config->getInt("version");
if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION))
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name, version);
throw Exception(
ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name_for_logging, version);
timestamp = parse<LocalDateTime>(config->getString("timestamp")).to_time_t();
uuid = parse<UUID>(config->getString("uuid"));
@ -400,7 +410,13 @@ void BackupImpl::readBackupMetadata()
use_base = true;
if (info.base_size > info.size)
throw Exception(ErrorCodes::BACKUP_DAMAGED, "Backup {}: Base size must not be greater than the size of entry {}", backup_name, quoteString(info.file_name));
{
throw Exception(
ErrorCodes::BACKUP_DAMAGED,
"Backup {}: Base size must not be greater than the size of entry {}",
backup_name_for_logging,
quoteString(info.file_name));
}
if (use_base)
{
@ -436,14 +452,14 @@ void BackupImpl::checkBackupDoesntExist() const
file_name_to_check_existence = ".backup";
if (writer->fileExists(file_name_to_check_existence))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name_for_logging);
/// Check that no other backup (excluding internal backups) is writing to the same destination.
if (!is_internal_backup)
{
assert(!lock_file_name.empty());
if (writer->fileExists(lock_file_name))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name_for_logging);
}
}
@ -466,8 +482,16 @@ bool BackupImpl::checkLockFile(bool throw_if_failed) const
if (throw_if_failed)
{
if (!writer->fileExists(lock_file_name))
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Lock file {} suddenly disappeared while writing backup {}", lock_file_name, backup_name);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "A concurrent backup writing to the same destination {} detected", backup_name);
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Lock file {} suddenly disappeared while writing backup {}",
lock_file_name,
backup_name_for_logging);
}
throw Exception(
ErrorCodes::BACKUP_ALREADY_EXISTS, "A concurrent backup writing to the same destination {} detected", backup_name_for_logging);
}
return false;
}
@ -514,8 +538,13 @@ UInt64 BackupImpl::getFileSize(const String & file_name) const
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
quoteString(file_name));
}
return info->size;
}
@ -525,8 +554,13 @@ UInt128 BackupImpl::getFileChecksum(const String & file_name) const
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
quoteString(file_name));
}
return info->checksum;
}
@ -536,8 +570,13 @@ SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) con
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
quoteString(file_name));
}
return {info->size, info->checksum};
}
@ -560,8 +599,13 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
auto info_opt = coordination->getFileInfo(size_and_checksum);
if (!info_opt)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, formatSizeAndChecksum(size_and_checksum));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
formatSizeAndChecksum(size_and_checksum));
}
const auto & info = *info_opt;
@ -577,7 +621,7 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
throw Exception(
ErrorCodes::NO_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
backup_name, formatSizeAndChecksum(size_and_checksum));
backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
}
if (!base_backup->fileExists(std::pair(info.base_size, info.base_checksum)))
@ -585,7 +629,7 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
backup_name, formatSizeAndChecksum(size_and_checksum));
backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
}
auto base_entry = base_backup->readFile(std::pair{info.base_size, info.base_checksum});
@ -695,9 +739,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
LOG_TRACE(log, "Writing backup for file {} from {}", file_name, from_file_name);
auto adjusted_path = removeLeadingSlash(file_name);
if (coordination->getFileInfo(adjusted_path))
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name_for_logging, quoteString(file_name));
}
FileInfo info
{
@ -893,12 +940,12 @@ void BackupImpl::finalizeWriting()
if (!is_internal_backup)
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
LOG_TRACE(log, "Finalizing backup {}", backup_name_for_logging);
writeBackupMetadata();
closeArchives();
setCompressedSize();
removeLockFile();
LOG_TRACE(log, "Finalized backup {}", backup_name);
LOG_TRACE(log, "Finalized backup {}", backup_name_for_logging);
}
writing_finalized = true;
@ -971,7 +1018,7 @@ void BackupImpl::removeAllFilesAfterFailure()
try
{
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name);
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name_for_logging);
Strings files_to_remove;
if (use_archives)

View File

@ -35,14 +35,14 @@ public:
};
BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupReader> reader_,
const ContextPtr & context_);
BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupWriter> writer_,
@ -53,7 +53,7 @@ public:
~BackupImpl() override;
const String & getName() const override { return backup_name; }
const String & getNameForLogging() const override { return backup_name_for_logging; }
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override { return timestamp; }
UUID getUUID() const override { return *uuid; }
@ -107,7 +107,7 @@ private:
/// Calculates and sets `compressed_size`.
void setCompressedSize();
const String backup_name;
const String backup_name_for_logging;
const ArchiveParams archive_params;
const bool use_archives;
const OpenMode open_mode;

View File

@ -6,6 +6,7 @@
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
namespace DB
@ -92,4 +93,10 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
}
String BackupInfo::toStringForLogging(const ContextPtr & context) const
{
ASTPtr ast = toAST();
return maskSensitiveInfoInBackupNameForLogging(serializeAST(*ast), ast, context);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Field.h>
#include <Interpreters/Context_fwd.h>
namespace DB
@ -20,6 +21,8 @@ struct BackupInfo
ASTPtr toAST() const;
static BackupInfo fromAST(const IAST & ast);
String toStringForLogging(const ContextPtr & context) const;
};
}

View File

@ -166,9 +166,10 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
}
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
String backup_name_for_logging = backup_info.toStringForLogging(context);
try
{
addInfo(backup_id, backup_info.toString(), backup_settings.internal, BackupStatus::CREATING_BACKUP);
addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP);
/// Prepare context to use.
ContextPtr context_in_use = context;
@ -184,13 +185,14 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
if (backup_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, backup_query, backup_id, backup_settings, backup_info, backup_coordination, context_in_use, mutable_context]
[this, backup_query, backup_id, backup_name_for_logging, backup_info, backup_settings, backup_coordination, context_in_use, mutable_context]
{
doBackup(
backup_query,
backup_id,
backup_settings,
backup_name_for_logging,
backup_info,
backup_settings,
backup_coordination,
context_in_use,
mutable_context,
@ -202,8 +204,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
doBackup(
backup_query,
backup_id,
backup_settings,
backup_name_for_logging,
backup_info,
backup_settings,
backup_coordination,
context_in_use,
mutable_context,
@ -214,7 +217,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
}
catch (...)
{
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
/// Something bad happened, the backup has not built.
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
@ -226,8 +229,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
void BackupsWorker::doBackup(
const std::shared_ptr<ASTBackupQuery> & backup_query,
const OperationID & backup_id,
BackupSettings backup_settings,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
BackupSettings backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
@ -336,7 +340,7 @@ void BackupsWorker::doBackup(
/// Close the backup.
backup.reset();
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_info.toString());
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size);
}
@ -345,7 +349,7 @@ void BackupsWorker::doBackup(
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
}
@ -384,7 +388,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
try
{
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
addInfo(restore_id, backup_info.toString(), restore_settings.internal, BackupStatus::RESTORING);
String backup_name_for_logging = backup_info.toStringForLogging(context);
addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
@ -399,12 +404,14 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
if (restore_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, restore_query, restore_id, restore_settings, backup_info, restore_coordination, context_in_use] {
[this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
{
doRestore(
restore_query,
restore_id,
restore_settings,
backup_name_for_logging,
backup_info,
restore_settings,
restore_coordination,
context_in_use,
/* called_async= */ true);
@ -415,8 +422,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
doRestore(
restore_query,
restore_id,
restore_settings,
backup_name_for_logging,
backup_info,
restore_settings,
restore_coordination,
context_in_use,
/* called_async= */ false);
@ -437,8 +445,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
void BackupsWorker::doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
const OperationID & restore_id,
RestoreSettings restore_settings,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
RestoreSettings restore_settings,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async)
@ -541,7 +550,7 @@ void BackupsWorker::doRestore(
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
}
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString());
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
setStatus(restore_id, BackupStatus::RESTORED);
}
catch (...)
@ -549,7 +558,7 @@ void BackupsWorker::doRestore(
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
}

View File

@ -76,14 +76,28 @@ public:
private:
OperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
void doBackup(const std::shared_ptr<ASTBackupQuery> & backup_query, const OperationID & backup_id, BackupSettings backup_settings,
const BackupInfo & backup_info, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context,
ContextMutablePtr mutable_context, bool called_async);
void doBackup(
const std::shared_ptr<ASTBackupQuery> & backup_query,
const OperationID & backup_id,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
BackupSettings backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
bool called_async);
OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
void doRestore(const std::shared_ptr<ASTBackupQuery> & restore_query, const OperationID & restore_id, RestoreSettings restore_settings, const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination, ContextMutablePtr context, bool called_async);
void doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
const OperationID & restore_id,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
RestoreSettings restore_settings,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async);
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);

View File

@ -19,7 +19,8 @@ public:
virtual ~IBackup() = default;
/// Name of the backup.
virtual const String & getName() const = 0;
//virtual const String & getName() const = 0;
virtual const String & getNameForLogging() const = 0;
enum class OpenMode
{

View File

@ -47,7 +47,7 @@ void registerBackupEngineS3(BackupFactory & factory)
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
#if USE_AWS_S3
String backup_name = params.backup_info.toString();
String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args;
@ -111,12 +111,12 @@ void registerBackupEngineS3(BackupFactory & factory)
if (params.open_mode == IBackup::OpenMode::READ)
{
auto reader = std::make_shared<BackupReaderS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, reader, params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
}
else
{
auto writer = std::make_shared<BackupWriterS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
}
#else
throw Exception("S3 support is disabled", ErrorCodes::SUPPORT_IS_DISABLED);

View File

@ -99,7 +99,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
{
auto creator_fn = [](const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
String backup_name = params.backup_info.toString();
String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
const String & engine_name = params.backup_info.backup_engine_name;
if (!params.backup_info.id_arg.empty())
@ -172,7 +172,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
reader = std::make_shared<BackupReaderFile>(path);
else
reader = std::make_shared<BackupReaderDisk>(disk, path);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, reader, params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
}
else
{
@ -181,7 +181,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
writer = std::make_shared<BackupWriterFile>(path);
else
writer = std::make_shared<BackupWriterDisk>(disk, path);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
}
};

View File

@ -62,11 +62,12 @@ std::pair<String, StoragePtr> createTableFromAST(
if (ast_create_query.as_table_function)
{
const auto & factory = TableFunctionFactory::instance();
auto table_function = factory.get(ast_create_query.as_table_function, context);
auto table_function_ast = ast_create_query.as_table_function->ptr();
auto table_function = factory.get(table_function_ast, context);
ColumnsDescription columns;
if (ast_create_query.columns_list && ast_create_query.columns_list->columns)
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
StoragePtr storage = table_function->execute(ast_create_query.as_table_function, context, ast_create_query.getTable(), std::move(columns));
StoragePtr storage = table_function->execute(table_function_ast, context, ast_create_query.getTable(), std::move(columns));
storage->renameInMemory(ast_create_query);
return {ast_create_query.getTable(), storage};
}

View File

@ -87,7 +87,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_
parser,
statement_def.data(),
statement_def.data() + statement_def.size(),
"in file " + filepath + " from backup " + backup->getName(),
"in file " + filepath + " from backup " + backup->getNameForLogging(),
0,
context->getSettingsRef().max_parser_depth);
break;

View File

@ -8,12 +8,14 @@
#include <IO/ReadBufferFromString.h>
#include <Poco/Net/NetException.h>
#include <Common/logger_useful.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ASTQueryWithOnCluster.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTQueryWithTableAndOutput.h>
#include <Databases/DatabaseReplicated.h>
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
namespace DB
@ -168,6 +170,13 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context)
query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth);
}
void DDLTaskBase::formatRewrittenQuery(ContextPtr context)
{
/// Convert rewritten AST back to string.
query_str = queryToString(*query);
query_for_logging = maskSensitiveInfoInQueryForLogging(query_str, query, context);
}
ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/)
{
auto query_context = Context::createCopy(from_context);
@ -265,6 +274,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log)
host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name);
}
/// Rewrite AST without ON CLUSTER.
WithoutOnClusterASTRewriteParams params;
params.default_database = address_in_cluster.default_database;
params.host_id = address_in_cluster.toString();
@ -405,6 +415,7 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context)
chassert(!ddl_query->database);
ddl_query->setDatabase(database->getDatabaseName());
}
formatRewrittenQuery(context);
}
ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper)

View File

@ -99,6 +99,9 @@ struct DDLTaskBase
String host_id_str;
ASTPtr query;
String query_str;
String query_for_logging;
bool is_initial_query = false;
bool is_circular_replicated = false;
bool execute_on_leader = false;
@ -114,6 +117,7 @@ struct DDLTaskBase
virtual ~DDLTaskBase() = default;
virtual void parseQueryFromEntry(ContextPtr context);
void formatRewrittenQuery(ContextPtr context);
virtual String getShardID() const = 0;

View File

@ -10,8 +10,6 @@
#include <Parsers/ASTCreateIndexQuery.h>
#include <Parsers/ASTDropIndexQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
@ -207,6 +205,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
task->parseQueryFromEntry(context);
/// Stage 3.2: check cluster and find the host in cluster
task->setClusterInfo(context, log);
/// Stage 3.3: output rewritten query back to string
task->formatRewrittenQuery(context);
}
catch (...)
{
@ -431,11 +431,12 @@ DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
return *current_tasks.back();
}
bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
{
/// Add special comment at the start of query to easily identify DDL-produced queries in query_log
String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
String query_to_execute = query_prefix + query;
String query_to_execute = query_prefix + task.query_str;
String query_to_show_in_logs = query_prefix + task.query_for_logging;
ReadBufferFromString istr(query_to_execute);
String dummy_string;
@ -463,7 +464,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
throw;
task.execution_status = ExecutionStatus::fromCurrentException();
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
/// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
/// if replica has stopped being leader and we should retry query.
@ -484,7 +485,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
throw;
task.execution_status = ExecutionStatus::fromCurrentException();
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
/// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
/// so we consider unknown exception as retryable error.
@ -492,7 +493,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
}
task.execution_status = ExecutionStatus(0);
LOG_DEBUG(log, "Executed query: {}", query);
LOG_DEBUG(log, "Executed query: {}", query_to_show_in_logs);
return true;
}
@ -514,7 +515,7 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
{
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.query_for_logging);
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
@ -587,8 +588,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
try
{
String rewritten_query = queryToString(task.query);
LOG_DEBUG(log, "Executing query: {}", rewritten_query);
LOG_DEBUG(log, "Executing query: {}", task.query_for_logging);
StoragePtr storage;
if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get()); query_with_table)
@ -605,12 +605,12 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
if (task.execute_on_leader)
{
tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper, execute_on_leader_lock);
tryExecuteQueryOnLeaderReplica(task, storage, task.entry_path, zookeeper, execute_on_leader_lock);
}
else
{
storage.reset();
tryExecuteQuery(rewritten_query, task, zookeeper);
tryExecuteQuery(task, zookeeper);
}
}
catch (const Coordination::Exception &)
@ -694,7 +694,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const Stora
bool DDLWorker::tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,
StoragePtr storage,
const String & rewritten_query,
const String & /*node_path*/,
const ZooKeeperPtr & zookeeper,
std::unique_ptr<zkutil::ZooKeeperLock> & execute_on_leader_lock)
@ -793,7 +792,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
/// If the leader will unexpectedly changed this method will return false
/// and on the next iteration new leader will take lock
if (tryExecuteQuery(rewritten_query, task, zookeeper))
if (tryExecuteQuery(task, zookeeper))
{
executed_by_us = true;
break;

View File

@ -101,12 +101,11 @@ protected:
bool tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,
StoragePtr storage,
const String & rewritten_query,
const String & node_path,
const ZooKeeperPtr & zookeeper,
std::unique_ptr<zkutil::ZooKeeperLock> & execute_on_leader_lock);
bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
bool tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
/// Checks and cleanups queue's nodes
void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);

View File

@ -25,13 +25,47 @@ public:
{}
void visit(T & ast)
{
if (ostr)
visitImpl</* with_dump= */ true>(ast);
else
visitImpl</* with_dump= */ false>(ast);
}
private:
Data & data;
size_t visit_depth;
WriteBuffer * ostr;
template <bool with_dump>
void visitImpl(T & ast)
{
checkStackSize();
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
if constexpr (with_dump)
{
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
visitImplMain</* with_dump= */ true>(ast);
}
else
{
visitImplMain</* with_dump= */ false>(ast);
}
}
template <bool with_dump>
void visitImplMain(T & ast)
{
if constexpr (!_top_to_bottom)
visitChildren(ast);
visitChildren<with_dump>(ast);
doVisit(ast);
if constexpr (_top_to_bottom)
visitChildren<with_dump>(ast);
}
void doVisit(T & ast)
{
try
{
Matcher::visit(ast, data);
@ -41,16 +75,9 @@ public:
e.addMessage("While processing {}", ast->formatForErrorMessage());
throw;
}
if constexpr (_top_to_bottom)
visitChildren(ast);
}
private:
Data & data;
size_t visit_depth;
WriteBuffer * ostr;
template <bool with_dump>
void visitChildren(T & ast)
{
for (auto & child : ast->children)
@ -62,7 +89,7 @@ private:
need_visit_child = Matcher::needChildVisit(ast, child);
if (need_visit_child)
visit(child);
visitImpl<with_dump>(child);
}
}
};

View File

@ -726,7 +726,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
else if (create.as_table_function)
{
/// Table function without columns list.
auto table_function = TableFunctionFactory::instance().get(create.as_table_function, getContext());
auto table_function_ast = create.as_table_function->ptr();
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
properties.columns = table_function->getActualTableStructure(getContext());
}
else if (create.is_dictionary)
@ -967,7 +968,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (as_create.storage)
create.set(create.storage, as_create.storage->ptr());
else if (as_create.as_table_function)
create.as_table_function = as_create.as_table_function->clone();
create.set(create.as_table_function, as_create.as_table_function->ptr());
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
@ -1343,12 +1344,12 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
/// NOTE: CREATE query may be rewritten by Storage creator or table function
if (create.as_table_function)
{
const auto & factory = TableFunctionFactory::instance();
auto table_func = factory.get(create.as_table_function, getContext());
auto table_function_ast = create.as_table_function->ptr();
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
/// In case of CREATE AS table_function() query we should use global context
/// in storage creation because there will be no query context on server startup
/// and because storage lifetime is bigger than query context lifetime.
res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
res = table_function->execute(table_function_ast, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
res->renameInMemory({create.getDatabase(), create.getTable(), create.uuid});
}
else

View File

@ -34,7 +34,6 @@
#include <Parsers/queryToString.h>
#include <Parsers/formatAST.h>
#include <Parsers/toOneLineQuery.h>
#include <Parsers/wipePasswordFromQuery.h>
#include <Formats/FormatFactory.h>
#include <Storages/StorageInput.h>
@ -56,9 +55,9 @@
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/TransactionLog.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
#include <Common/ProfileEvents.h>
#include <Common/SensitiveDataMasker.h>
#include <IO/CompressionMethod.h>
#include <Processors/Transforms/LimitsCheckingTransform.h>
@ -77,7 +76,6 @@
namespace ProfileEvents
{
extern const Event QueryMaskingRulesMatch;
extern const Event FailedQuery;
extern const Event FailedInsertQuery;
extern const Event FailedSelectQuery;
@ -109,37 +107,6 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings)
}
/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
/// The parameter `parsed query` can be nullptr if the query cannot be parsed.
static String prepareQueryForLogging(const String & query, const ASTPtr & parsed_query, ContextPtr context)
{
String res = query;
// Wiping a password or hash from CREATE/ALTER USER query because we don't want it to go to logs.
if (parsed_query && canContainPassword(*parsed_query))
{
ASTPtr ast_for_logging = parsed_query->clone();
wipePasswordFromQuery(ast_for_logging);
res = serializeAST(*ast_for_logging);
}
// Wiping sensitive data before cropping query by log_queries_cut_to_length,
// otherwise something like credit card without last digit can go to log.
if (auto * masker = SensitiveDataMasker::getInstance())
{
auto matches = masker->wipeSensitiveData(res);
if (matches > 0)
{
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
}
}
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
return res;
}
/// Log query into text log (not into system table).
static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage)
{
@ -425,14 +392,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// MUST go before any modification (except for prepared statements,
/// since it substitute parameters and without them query does not contain
/// parameters), to keep query as-is in query_log and server log.
query_for_logging = prepareQueryForLogging(query, ast, context);
query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
}
catch (...)
{
/// Anyway log the query.
if (query.empty())
query.assign(begin, std::min(end - begin, static_cast<ptrdiff_t>(max_query_size)));
query_for_logging = prepareQueryForLogging(query, ast, context);
query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
logQuery(query_for_logging, context, internal, stage);

View File

@ -0,0 +1,623 @@
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
#include <Formats/FormatFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTBackupQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/formatAST.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Common/ProfileEvents.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/typeid_cast.h>
namespace ProfileEvents
{
extern const Event QueryMaskingRulesMatch;
}
namespace DB
{
namespace
{
enum class PasswordWipingMode
{
Query,
BackupName,
};
template <bool check_only>
class PasswordWipingVisitor
{
public:
struct Data
{
bool can_contain_password = false;
bool password_was_hidden = false;
bool is_create_table_query = false;
bool is_create_database_query = false;
bool is_create_dictionary_query = false;
ContextPtr context;
PasswordWipingMode mode = PasswordWipingMode::Query;
};
using Visitor = std::conditional_t<
check_only,
ConstInDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>,
InDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>>;
static bool needChildVisit(const ASTPtr & /* ast */, const ASTPtr & /* child */, Data & data)
{
if constexpr (check_only)
{
return !data.can_contain_password;
}
else
{
return true;
}
}
static void visit(ASTPtr ast, Data & data)
{
if (auto * create_user_query = ast->as<ASTCreateUserQuery>())
{
visitCreateUserQuery(*create_user_query, data);
}
else if (auto * create_query = ast->as<ASTCreateQuery>())
{
visitCreateQuery(*create_query, data);
}
else if (auto * backup_query = ast->as<ASTBackupQuery>())
{
visitBackupQuery(*backup_query, data);
}
else if (auto * storage = ast->as<ASTStorage>())
{
if (data.is_create_table_query)
visitTableEngine(*storage, data);
else if (data.is_create_database_query)
visitDatabaseEngine(*storage, data);
}
else if (auto * dictionary = ast->as<ASTDictionary>())
{
if (data.is_create_dictionary_query)
visitDictionaryDef(*dictionary, data);
}
else if (auto * function = ast->as<ASTFunction>())
{
if (data.mode == PasswordWipingMode::BackupName)
wipePasswordFromBackupEngineArguments(*function, data);
else
visitFunction(*function, data);
}
}
private:
static void visitCreateUserQuery(ASTCreateUserQuery & query, Data & data)
{
if (!query.auth_data)
return;
auto auth_type = query.auth_data->getType();
if (auth_type == AuthenticationType::NO_PASSWORD || auth_type == AuthenticationType::LDAP
|| auth_type == AuthenticationType::KERBEROS || auth_type == AuthenticationType::SSL_CERTIFICATE)
return; /// No password, nothing to hide.
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
query.show_password = false;
data.password_was_hidden = true;
}
static void visitCreateQuery(ASTCreateQuery & query, Data & data)
{
if (query.is_dictionary)
data.is_create_dictionary_query = true;
else if (query.table)
data.is_create_table_query = true;
else if (query.database)
data.is_create_database_query = true;
}
static void visitTableEngine(ASTStorage & storage, Data & data)
{
if (!storage.engine)
return;
const String & engine_name = storage.engine->name;
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
wipePasswordFromArgument(*storage.engine, data, 5);
}
else if (engine_name == "MySQL")
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "PostgreSQL")
{
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "MaterializedPostgreSQL")
{
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "MongoDB")
{
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "S3" || engine_name == "COSN")
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
wipePasswordFromS3TableEngineArguments(*storage.engine, data);
}
}
static void wipePasswordFromS3TableEngineArguments(ASTFunction & engine, Data & data)
{
/// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
/// But we should check the number of arguments first because we don't need to do that replacements in case of
/// S3('url' [, 'format' [, 'compression']])
size_t num_arguments;
if (!tryGetNumArguments(engine, &num_arguments) || (num_arguments < 4))
return;
wipePasswordFromArgument(engine, data, 2);
}
static void visitDatabaseEngine(ASTStorage & storage, Data & data)
{
if (!storage.engine)
return;
const String & engine_name = storage.engine->name;
if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL")
{
/// MySQL('host:port', 'database', 'user', 'password')
wipePasswordFromArgument(*storage.engine, data, 3);
}
else if (engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL")
{
/// PostgreSQL('host:port', 'database', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 3);
}
}
static void visitFunction(ASTFunction & function, Data & data)
{
if (function.name == "mysql")
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(function, data, 4);
}
else if (function.name == "postgresql")
{
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(function, data, 4);
}
else if (function.name == "mongodb")
{
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
wipePasswordFromArgument(function, data, 4);
}
else if (function.name == "s3" || function.name == "cosn")
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false);
}
else if (function.name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ true);
}
else if (function.name == "remote" || function.name == "remoteSecure")
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
wipePasswordFromRemoteFunctionArguments(function, data);
}
else if (
function.name == "encrypt" || function.name == "decrypt" || function.name == "aes_encrypt_mysql"
|| function.name == "aes_decrypt_mysql" || function.name == "tryDecrypt")
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
wipePasswordFromEncryptionFunctionArguments(function, data);
}
}
static void wipePasswordFromS3FunctionArguments(ASTFunction & function, Data & data, bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
size_t num_arguments;
if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < url_arg_idx + 3))
return;
if (num_arguments >= url_arg_idx + 5)
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
wipePasswordFromArgument(function, data, url_arg_idx + 2);
}
else
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']).
/// So we will check whether the argument after 'url' is a format.
String format;
if (!tryGetEvaluatedConstStringFromArgument(function, data, url_arg_idx + 1, &format))
return;
if (FormatFactory::instance().getAllFormats().contains(format))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
/// The argument after 'url' is not a format so we do our replacement:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
wipePasswordFromArgument(function, data, url_arg_idx + 2);
}
}
static void wipePasswordFromRemoteFunctionArguments(ASTFunction & function, Data & data)
{
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
size_t num_arguments;
if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < 3))
return;
auto & arguments = assert_cast<ASTExpressionList &>(*function.arguments).children;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments[arg_num]->as<ASTFunction>();
if (table_function && TableFunctionFactory::instance().isTableFunctionName(table_function->name))
{
++arg_num;
}
else
{
String database;
if (!tryGetEvaluatedConstDatabaseNameFromArgument(function, data, arg_num, &database))
return;
++arg_num;
auto qualified_name = QualifiedTableName::parseFromString(database);
if (qualified_name.database.empty())
++arg_num; /// skip 'table' argument
}
/// Check if username and password are specified
/// (sharding_key can be of any type so while we're getting string literals they're username & password).
String username, password;
bool username_specified = tryGetStringFromArgument(function, arg_num, &username);
bool password_specified = username_specified && tryGetStringFromArgument(function, arg_num + 1, &password);
if (password_specified)
{
/// Password is specified so we do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
wipePasswordFromArgument(function, data, arg_num + 1);
}
}
static void wipePasswordFromEncryptionFunctionArguments(ASTFunction & function, Data & data)
{
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
wipePasswordFromArgument(function, data, 1);
removeArgumentsAfter(function, data, 2);
}
static void visitBackupQuery(ASTBackupQuery & query, Data & data)
{
if (query.backup_name)
{
if (auto * backup_engine = query.backup_name->as<ASTFunction>())
wipePasswordFromBackupEngineArguments(*backup_engine, data);
}
if (query.base_backup_name)
{
if (auto * base_backup_engine = query.base_backup_name->as<ASTFunction>())
wipePasswordFromBackupEngineArguments(*base_backup_engine, data);
}
}
static void wipePasswordFromBackupEngineArguments(ASTFunction & engine, Data & data)
{
if (engine.name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
wipePasswordFromArgument(engine, data, 2);
}
}
static void wipePasswordFromArgument(ASTFunction & function, Data & data, size_t arg_idx)
{
if (!function.arguments)
return;
auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return; /// return because we don't want to validate query here
auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return;
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
arguments[arg_idx] = std::make_shared<ASTLiteral>("[HIDDEN]");
data.password_was_hidden = true;
}
static void removeArgumentsAfter(ASTFunction & function, Data & data, size_t new_num_arguments)
{
if (!function.arguments)
return;
auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return; /// return because we don't want to validate query here
auto & arguments = expr_list->children;
if (new_num_arguments >= arguments.size())
return;
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
arguments.resize(new_num_arguments);
data.password_was_hidden = true;
}
static bool tryGetNumArguments(const ASTFunction & function, size_t * num_arguments)
{
if (!function.arguments)
return false;
auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
*num_arguments = arguments.size();
return true;
}
static bool tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, String * value)
{
if (!function.arguments)
return false;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return false;
const auto * literal = arguments[arg_idx]->as<ASTLiteral>();
if (!literal || literal->value.getType() != Field::Types::String)
return false;
*value = literal->value.safeGet<String>();
return true;
}
static bool tryGetEvaluatedConstStringFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
{
if (!function.arguments)
return false;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return false;
if constexpr (check_only)
{
data.can_contain_password = true;
return false;
}
ASTPtr argument = arguments[arg_idx];
try
{
argument = evaluateConstantExpressionOrIdentifierAsLiteral(argument, data.context);
}
catch (...)
{
return false;
}
const auto & literal = assert_cast<const ASTLiteral &>(*argument);
if (literal.value.getType() != Field::Types::String)
return false;
*value = literal.value.safeGet<String>();
return true;
}
static bool tryGetEvaluatedConstDatabaseNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
{
if (!function.arguments)
return false;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return false;
if constexpr (check_only)
{
data.can_contain_password = true;
return false;
}
ASTPtr argument = arguments[arg_idx];
try
{
argument = evaluateConstantExpressionForDatabaseName(argument, data.context);
}
catch (...)
{
return false;
}
const auto & literal = assert_cast<const ASTLiteral &>(*argument);
if (literal.value.getType() != Field::Types::String)
return false;
*value = literal.value.safeGet<String>();
return true;
}
static void visitDictionaryDef(ASTDictionary & dictionary, Data & data)
{
if (!dictionary.source || !dictionary.source->elements)
return;
const auto * expr_list = dictionary.source->elements->as<ASTExpressionList>();
if (!expr_list)
return; /// return because we don't want to validate query here
const auto & elements = expr_list->children;
/// We replace password in the dictionary's definition:
/// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password 'qwe123' db 'default' table 'ids')) ->
/// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids'))
for (const auto & element : elements)
{
auto * pair = element->as<ASTPair>();
if (!pair)
continue; /// just skip because we don't want to validate query here
if (pair->first == "password")
{
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
pair->set(pair->second, std::make_shared<ASTLiteral>("[HIDDEN]"));
data.password_was_hidden = true;
}
}
}
};
/// Checks the type of a specified AST and returns true if it can contain a password.
bool canContainPassword(const IAST & ast, PasswordWipingMode mode)
{
using WipingVisitor = PasswordWipingVisitor</*check_only= */ true>;
WipingVisitor::Data data;
data.mode = mode;
WipingVisitor::Visitor visitor{data};
ASTPtr ast_ptr = std::const_pointer_cast<IAST>(ast.shared_from_this());
visitor.visit(ast_ptr);
return data.can_contain_password;
}
/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
bool wipePasswordFromQuery(ASTPtr ast, PasswordWipingMode mode, const ContextPtr & context)
{
using WipingVisitor = PasswordWipingVisitor</*check_only= */ false>;
WipingVisitor::Data data;
data.context = context;
data.mode = mode;
WipingVisitor::Visitor visitor{data};
visitor.visit(ast);
return data.password_was_hidden;
}
/// Common utility for masking sensitive information.
String maskSensitiveInfoImpl(const String & query, const ASTPtr & parsed_query, PasswordWipingMode mode, const ContextPtr & context)
{
String res = query;
// Wiping a password or hash from the query because we don't want it to go to logs.
if (parsed_query && canContainPassword(*parsed_query, mode))
{
ASTPtr ast_without_password = parsed_query->clone();
if (wipePasswordFromQuery(ast_without_password, mode, context))
res = serializeAST(*ast_without_password);
}
// Wiping sensitive data before cropping query by log_queries_cut_to_length,
// otherwise something like credit card without last digit can go to log.
if (auto * masker = SensitiveDataMasker::getInstance())
{
auto matches = masker->wipeSensitiveData(res);
if (matches > 0)
{
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
}
}
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
return res;
}
}
String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context)
{
return maskSensitiveInfoImpl(query, parsed_query, PasswordWipingMode::Query, context);
}
String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context)
{
return maskSensitiveInfoImpl(backup_name, ast, PasswordWipingMode::BackupName, context);
}
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
/// The parameter `parsed query` is allowed to be nullptr if the query cannot be parsed.
/// Does not validate AST, works a best-effort way.
String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context);
/// Makes a version of backup name without sensitive information (e.g. passwords) for logging.
/// Does not validate AST, works a best-effort way.
String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context);
}

View File

@ -245,7 +245,21 @@ String ASTBackupQuery::getID(char) const
ASTPtr ASTBackupQuery::clone() const
{
return std::make_shared<ASTBackupQuery>(*this);
auto res = std::make_shared<ASTBackupQuery>(*this);
if (backup_name)
res->backup_name = backup_name->clone();
if (base_backup_name)
res->base_backup_name = base_backup_name->clone();
if (cluster_host_ids)
res->cluster_host_ids = cluster_host_ids->clone();
if (settings)
res->settings = settings->clone();
return res;
}

View File

@ -210,6 +210,8 @@ ASTPtr ASTCreateQuery::clone() const
res->set(res->dictionary, dictionary->clone());
}
if (as_table_function)
res->set(res->as_table_function, as_table_function->clone());
if (comment)
res->set(res->comment, comment->clone());

View File

@ -83,7 +83,7 @@ public:
ASTPtr lateness_function;
String as_database;
String as_table;
ASTPtr as_table_function;
IAST * as_table_function = nullptr;
ASTSelectWithUnionQuery * select = nullptr;
IAST * comment = nullptr;

View File

@ -141,7 +141,12 @@ String ASTCreateQuotaQuery::getID(char) const
ASTPtr ASTCreateQuotaQuery::clone() const
{
return std::make_shared<ASTCreateQuotaQuery>(*this);
auto res = std::make_shared<ASTCreateQuotaQuery>(*this);
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
return res;
}

View File

@ -42,7 +42,12 @@ String ASTCreateRoleQuery::getID(char) const
ASTPtr ASTCreateRoleQuery::clone() const
{
return std::make_shared<ASTCreateRoleQuery>(*this);
auto res = std::make_shared<ASTCreateRoleQuery>(*this);
if (settings)
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
return res;
}

View File

@ -124,7 +124,25 @@ String ASTCreateRowPolicyQuery::getID(char) const
ASTPtr ASTCreateRowPolicyQuery::clone() const
{
return std::make_shared<ASTCreateRowPolicyQuery>(*this);
auto res = std::make_shared<ASTCreateRowPolicyQuery>(*this);
if (names)
res->names = std::static_pointer_cast<ASTRowPolicyNames>(names->clone());
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
/// `res->filters` is already initialized by the copy constructor of ASTCreateRowPolicyQuery (see the first line of this function).
/// But the copy constructor just copied the pointers inside `filters` instead of cloning.
/// We need to make a deep copy and not a shallow copy, so we have to manually clone each pointer in `res->filters`.
chassert(res->filters.size() == filters.size());
for (auto & [_, res_filter] : res->filters)
{
if (res_filter)
res_filter = res_filter->clone();
}
return res;
}

View File

@ -49,7 +49,15 @@ String ASTCreateSettingsProfileQuery::getID(char) const
ASTPtr ASTCreateSettingsProfileQuery::clone() const
{
return std::make_shared<ASTCreateSettingsProfileQuery>(*this);
auto res = std::make_shared<ASTCreateSettingsProfileQuery>(*this);
if (to_roles)
res->to_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(to_roles->clone());
if (settings)
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
return res;
}

View File

@ -275,7 +275,24 @@ String ASTCreateUserQuery::getID(char) const
ASTPtr ASTCreateUserQuery::clone() const
{
return std::make_shared<ASTCreateUserQuery>(*this);
auto res = std::make_shared<ASTCreateUserQuery>(*this);
if (names)
res->names = std::static_pointer_cast<ASTUserNamesWithHost>(names->clone());
if (default_roles)
res->default_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(default_roles->clone());
if (default_database)
res->default_database = std::static_pointer_cast<ASTDatabaseOrNone>(default_database->clone());
if (grantees)
res->grantees = std::static_pointer_cast<ASTRolesOrUsersSet>(grantees->clone());
if (settings)
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
return res;
}

View File

@ -29,7 +29,12 @@ String ASTDropAccessEntityQuery::getID(char) const
ASTPtr ASTDropAccessEntityQuery::clone() const
{
return std::make_shared<ASTDropAccessEntityQuery>(*this);
auto res = std::make_shared<ASTDropAccessEntityQuery>(*this);
if (row_policy_names)
res->row_policy_names = std::static_pointer_cast<ASTRowPolicyNames>(row_policy_names->clone());
return res;
}

View File

@ -96,7 +96,15 @@ String ASTGrantQuery::getID(char) const
ASTPtr ASTGrantQuery::clone() const
{
return std::make_shared<ASTGrantQuery>(*this);
auto res = std::make_shared<ASTGrantQuery>(*this);
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
if (grantees)
res->grantees = std::static_pointer_cast<ASTRolesOrUsersSet>(grantees->clone());
return res;
}

View File

@ -14,7 +14,15 @@ String ASTSetRoleQuery::getID(char) const
ASTPtr ASTSetRoleQuery::clone() const
{
return std::make_shared<ASTSetRoleQuery>(*this);
auto res = std::make_shared<ASTSetRoleQuery>(*this);
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
if (to_users)
res->to_users = std::static_pointer_cast<ASTRolesOrUsersSet>(to_users->clone());
return res;
}

View File

@ -38,7 +38,12 @@ String ASTShowCreateAccessEntityQuery::getID(char) const
ASTPtr ASTShowCreateAccessEntityQuery::clone() const
{
return std::make_shared<ASTShowCreateAccessEntityQuery>(*this);
auto res = std::make_shared<ASTShowCreateAccessEntityQuery>(*this);
if (row_policy_names)
res->row_policy_names = std::static_pointer_cast<ASTRowPolicyNames>(row_policy_names->clone());
return res;
}

View File

@ -14,7 +14,12 @@ String ASTShowGrantsQuery::getID(char) const
ASTPtr ASTShowGrantsQuery::clone() const
{
return std::make_shared<ASTShowGrantsQuery>(*this);
auto res = std::make_shared<ASTShowGrantsQuery>(*this);
if (for_roles)
res->for_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(for_roles->clone());
return res;
}

View File

@ -640,9 +640,6 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
auto query = std::make_shared<ASTCreateQuery>();
node = query;
if (as_table_function)
query->as_table_function = as_table_function;
query->attach = attach;
query->replace_table = replace;
query->create_or_replace = or_replace;
@ -661,6 +658,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
query->set(query->columns_list, columns_list);
query->set(query->storage, storage);
query->set(query->as_table_function, as_table_function);
if (comment)
query->set(query->comment, comment);

View File

@ -1,22 +0,0 @@
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/wipePasswordFromQuery.h>
#include <Common/typeid_cast.h>
namespace DB
{
bool canContainPassword(const IAST & ast)
{
return ast.as<ASTCreateUserQuery>();
}
void wipePasswordFromQuery(ASTPtr ast)
{
if (auto * create_query = ast->as<ASTCreateUserQuery>())
{
create_query->show_password = false;
}
}
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <Parsers/IAST_fwd.h>
namespace DB
{
/// Checks the type of a specified AST and returns true if it can contain a password.
bool canContainPassword(const IAST & ast);
/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
/// The function changes only following types of queries:
/// CREATE/ALTER USER.
void wipePasswordFromQuery(ASTPtr ast);
}

View File

@ -1111,6 +1111,14 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
}
else
{
/// Supported signatures:
///
/// S3('url')
/// S3('url', 'format')
/// S3('url', 'format', 'compression')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (engine_args.empty() || engine_args.size() > 5)
throw Exception(
"Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",

View File

@ -94,6 +94,30 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
}
else
{
/// Supported signatures:
///
/// remote('addresses_expr', db.table)
/// remote('addresses_expr', 'db', 'table')
/// remote('addresses_expr', db.table, 'user')
/// remote('addresses_expr', 'db', 'table', 'user')
/// remote('addresses_expr', db.table, 'user', 'password')
/// remote('addresses_expr', 'db', 'table', 'user', 'password')
/// remote('addresses_expr', db.table, sharding_key)
/// remote('addresses_expr', 'db', 'table', sharding_key)
/// remote('addresses_expr', db.table, 'user', sharding_key)
/// remote('addresses_expr', 'db', 'table', 'user', sharding_key)
/// remote('addresses_expr', db.table, 'user', 'password', sharding_key)
/// remote('addresses_expr', 'db', 'table', 'user', 'password', sharding_key)
///
/// remoteSecure() - same as remote()
///
/// cluster('cluster_name', db.table)
/// cluster('cluster_name', 'db', 'table')
/// cluster('cluster_name', db.table, sharding_key)
/// cluster('cluster_name', 'db', 'table', sharding_key)
///
/// clusterAllReplicas() - same as cluster()
if (args.size() < 2 || args.size() > max_args)
throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
@ -318,7 +342,6 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_
is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]");
}
void registerTableFunctionRemote(TableFunctionFactory & factory)
{
factory.registerFunction("remote", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("remote"); });

View File

@ -1,75 +0,0 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node")
@pytest.fixture(scope="module", autouse=True)
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def check_logs(must_contain, must_not_contain):
node.query("SYSTEM FLUSH LOGS")
for str in must_contain:
assert node.contains_in_log(str)
assert (
int(
node.query(
f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'"
).strip()
)
>= 1
)
for str in must_not_contain:
assert not node.contains_in_log(str)
assert (
int(
node.query(
f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'"
).strip()
)
== 0
)
# Passwords in CREATE/ALTER queries must be hidden in logs.
def test_create_alter_user():
node.query("CREATE USER u1 IDENTIFIED BY 'qwe123' SETTINGS custom_a = 'a'")
node.query("ALTER USER u1 IDENTIFIED BY '123qwe' SETTINGS custom_b = 'b'")
node.query(
"CREATE USER u2 IDENTIFIED WITH plaintext_password BY 'plainpasswd' SETTINGS custom_c = 'c'"
)
assert (
node.query("SHOW CREATE USER u1")
== "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n"
)
assert (
node.query("SHOW CREATE USER u2")
== "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n"
)
check_logs(
must_contain=[
"CREATE USER u1 IDENTIFIED WITH sha256_password",
"ALTER USER u1 IDENTIFIED WITH sha256_password",
"CREATE USER u2 IDENTIFIED WITH plaintext_password",
],
must_not_contain=[
"qwe123",
"123qwe",
"plainpasswd",
"IDENTIFIED WITH sha256_password BY",
"IDENTIFIED WITH sha256_hash BY",
"IDENTIFIED WITH plaintext_password BY",
],
)

View File

@ -0,0 +1,340 @@
import pytest
import random, string
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node", with_zookeeper=True)
@pytest.fixture(scope="module", autouse=True)
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def check_logs(must_contain=[], must_not_contain=[]):
node.query("SYSTEM FLUSH LOGS")
for str in must_contain:
escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]")
assert node.contains_in_log(escaped_str)
for str in must_not_contain:
escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]")
assert not node.contains_in_log(escaped_str)
for str in must_contain:
escaped_str = str.replace("'", "\\'")
assert system_query_log_contains_search_pattern(escaped_str)
for str in must_not_contain:
escaped_str = str.replace("'", "\\'")
assert not system_query_log_contains_search_pattern(escaped_str)
# Returns true if "system.query_log" has a query matching a specified pattern.
def system_query_log_contains_search_pattern(search_pattern):
return (
int(
node.query(
f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{search_pattern}%'"
).strip()
)
>= 1
)
# Generates a random string.
def new_password(len=16):
return "".join(
random.choice(string.ascii_uppercase + string.digits) for _ in range(len)
)
# Passwords in CREATE/ALTER queries must be hidden in logs.
def test_create_alter_user():
password = new_password()
node.query(f"CREATE USER u1 IDENTIFIED BY '{password}' SETTINGS custom_a = 'a'")
node.query(
f"ALTER USER u1 IDENTIFIED BY '{password}{password}' SETTINGS custom_b = 'b'"
)
node.query(
f"CREATE USER u2 IDENTIFIED WITH plaintext_password BY '{password}' SETTINGS custom_c = 'c'"
)
assert (
node.query("SHOW CREATE USER u1")
== "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n"
)
assert (
node.query("SHOW CREATE USER u2")
== "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n"
)
check_logs(
must_contain=[
"CREATE USER u1 IDENTIFIED WITH sha256_password",
"ALTER USER u1 IDENTIFIED WITH sha256_password",
"CREATE USER u2 IDENTIFIED WITH plaintext_password",
],
must_not_contain=[
password,
"IDENTIFIED WITH sha256_password BY",
"IDENTIFIED WITH sha256_hash BY",
"IDENTIFIED WITH plaintext_password BY",
],
)
node.query("DROP USER u1, u2")
def test_create_table():
password = new_password()
table_engines = [
f"MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
f"PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
f"MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}')",
f"S3('http://minio1:9001/root/data/test1.csv')",
f"S3('http://minio1:9001/root/data/test2.csv', 'CSV')",
f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')",
f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')",
]
for i, table_engine in enumerate(table_engines):
node.query(f"CREATE TABLE table{i} (x int) ENGINE = {table_engine}")
check_logs(
must_contain=[
"CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
"CREATE TABLE table1 (`x` int) ENGINE = PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
"CREATE TABLE table2 (`x` int) ENGINE = MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]')",
"CREATE TABLE table3 (x int) ENGINE = S3('http://minio1:9001/root/data/test1.csv')",
"CREATE TABLE table4 (x int) ENGINE = S3('http://minio1:9001/root/data/test2.csv', 'CSV')",
"CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
"CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')",
"CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')",
],
must_not_contain=[password],
)
for i in range(0, len(table_engines)):
node.query(f"DROP TABLE table{i}")
def test_create_database():
password = new_password()
database_engines = [
f"MySQL('localhost:3306', 'mysql_db', 'mysql_user', '{password}') SETTINGS connect_timeout=1, connection_max_tries=1",
# f"PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '{password}')",
]
for i, database_engine in enumerate(database_engines):
# query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to MySQL server".
# We test logging here and not actual work with MySQL server.
node.query_and_get_answer_with_error(
f"CREATE DATABASE database{i} ENGINE = {database_engine}"
)
check_logs(
must_contain=[
"CREATE DATABASE database0 ENGINE = MySQL('localhost:3306', 'mysql_db', 'mysql_user', '[HIDDEN]')",
# "CREATE DATABASE database1 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')",
],
must_not_contain=[password],
)
for i in range(0, len(database_engines)):
node.query(f"DROP DATABASE IF EXISTS database{i}")
def test_table_functions():
password = new_password()
table_functions = [
f"mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
f"postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
f"mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}', 'x int')",
f"s3('http://minio1:9001/root/data/test1.csv')",
f"s3('http://minio1:9001/root/data/test2.csv', 'CSV')",
f"s3('http://minio1:9001/root/data/test3.csv', 'minio', '{password}')",
f"s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')",
f"s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')",
f"s3('http://minio1:9001/root/data/test6.csv', 'minio', '{password}', 'CSV')",
f"s3('http://minio1:9001/root/data/test7.csv', 'minio', '{password}', 'CSV', 'x int')",
f"s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '{password}', 'CSV', 'x int', 'gzip')",
f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '{password}')",
f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')",
f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '{password}', 'CSV')",
f"remote('127.{{2..11}}', default.remote_table)",
f"remote('127.{{2..11}}', default.remote_table, rand())",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user')",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}')",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user', rand())",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}', rand())",
f"remote('127.{{2..11}}', 'default.remote_table', 'remote_user', '{password}', rand())",
f"remote('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}', rand())",
f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())",
f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')",
f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())",
]
for i, table_function in enumerate(table_functions):
node.query(f"CREATE TABLE tablefunc{i} (x int) AS {table_function}")
check_logs(
must_contain=[
"CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
"CREATE TABLE tablefunc1 (`x` int) AS postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
"CREATE TABLE tablefunc2 (`x` int) AS mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]', 'x int')",
"CREATE TABLE tablefunc3 (x int) AS s3('http://minio1:9001/root/data/test1.csv')",
"CREATE TABLE tablefunc4 (x int) AS s3('http://minio1:9001/root/data/test2.csv', 'CSV')",
"CREATE TABLE tablefunc5 (`x` int) AS s3('http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc6 (x int) AS s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')",
"CREATE TABLE tablefunc7 (x int) AS s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')",
"CREATE TABLE tablefunc8 (`x` int) AS s3('http://minio1:9001/root/data/test6.csv', 'minio', '[HIDDEN]', 'CSV')",
"CREATE TABLE tablefunc9 (`x` int) AS s3('http://minio1:9001/root/data/test7.csv', 'minio', '[HIDDEN]', 'CSV', 'x int')",
"CREATE TABLE tablefunc10 (`x` int) AS s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'x int', 'gzip')",
"CREATE TABLE tablefunc11 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc12 (x int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')",
"CREATE TABLE tablefunc13 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]', 'CSV')",
"CREATE TABLE tablefunc14 (x int) AS remote('127.{2..11}', default.remote_table)",
"CREATE TABLE tablefunc15 (x int) AS remote('127.{2..11}', default.remote_table, rand())",
"CREATE TABLE tablefunc16 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user')",
"CREATE TABLE tablefunc17 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]')",
"CREATE TABLE tablefunc18 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user', rand())",
"CREATE TABLE tablefunc19 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc20 (`x` int) AS remote('127.{2..11}', 'default.remote_table', 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc21 (`x` int) AS remote('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')",
"CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())",
],
must_not_contain=[password],
)
for i in range(0, len(table_functions)):
node.query(f"DROP TABLE tablefunc{i}")
def test_encryption_functions():
plaintext = new_password()
cipher = new_password()
key = new_password(32)
iv8 = new_password(8)
iv16 = new_password(16)
add = new_password()
encryption_functions = [
f"encrypt('aes-256-ofb', '{plaintext}', '{key}')",
f"encrypt('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')",
f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}')",
f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}', '{add}')",
f"decrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
f"aes_encrypt_mysql('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')",
f"aes_decrypt_mysql('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
f"tryDecrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
]
for encryption_function in encryption_functions:
node.query(f"SELECT {encryption_function}")
check_logs(
must_contain=[
"SELECT encrypt('aes-256-ofb', '[HIDDEN]')",
"SELECT encrypt('aes-256-gcm', '[HIDDEN]')",
"SELECT decrypt('aes-256-ofb', '[HIDDEN]')",
"SELECT aes_encrypt_mysql('aes-256-ofb', '[HIDDEN]')",
"SELECT aes_decrypt_mysql('aes-256-ofb', '[HIDDEN]')",
"SELECT tryDecrypt('aes-256-ofb', '[HIDDEN]')",
],
must_not_contain=[plaintext, cipher, key, iv8, iv16, add],
)
def test_create_dictionary():
password = new_password()
node.query(
f"CREATE DICTIONARY dict1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n "
f"SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '{password}' DB 'default')) "
f"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
)
check_logs(
must_contain=[
"CREATE DICTIONARY dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n "
"SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '[HIDDEN]' DB 'default')) "
"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
],
must_not_contain=[password],
)
node.query("DROP DICTIONARY dict1")
def test_backup_to_s3():
node.query("CREATE TABLE temptbl (x int) ENGINE=Log")
password = new_password()
queries = [
f"BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')",
f"RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')",
]
for query in queries:
# query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to AWS".
# We test logging here and not actual work with AWS server.
node.query_and_get_answer_with_error(query)
check_logs(
must_contain=[
"BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')",
"RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')",
],
must_not_contain=[password],
)
node.query("DROP TABLE IF EXISTS temptbl")
node.query("DROP TABLE IF EXISTS temptbl2")
def test_on_cluster():
password = new_password()
node.query(
f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')"
)
check_logs(
must_contain=[
"CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
],
must_not_contain=[password],
)
# Check logs of DDLWorker during executing of this query.
assert node.contains_in_log(
"DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert node.contains_in_log(
"DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert node.contains_in_log(
"executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert node.contains_in_log(
"DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert system_query_log_contains_search_pattern(
"%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')"
)
node.query(f"DROP TABLE table_oncl")