This commit is contained in:
Alexey Katsman 2024-09-16 21:26:53 +08:00 committed by GitHub
commit 9e7fb84800
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 200 additions and 25 deletions

View File

@ -7,8 +7,10 @@
#include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h>
#include <Common/KnownObjectNames.h>
#include <Common/re2.h>
#include <Core/QualifiedTableName.h>
#include <azure/storage/common/storage_credential.hpp>
#include <boost/algorithm/string/predicate.hpp>
@ -89,6 +91,16 @@ private:
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (name == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (name == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((name == "remote") || (name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
@ -177,6 +189,86 @@ private:
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorageCluster('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
if (maskAzureConnectionString(-1, true, 1))
return;
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
if (maskAzureConnectionString(-1, true, 2))
return;
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
if (maskAzureConnectionString(url_arg_idx))
return;
/// We should check other arguments first because we don't need to do any replacement in case of
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
size_t count = arguments.getNodes().size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String fourth_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg))
{
if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
bool maskAzureConnectionString(ssize_t url_arg_idx, bool argument_is_named = false, size_t start = 0)
{
String * url_arg = nullptr;
if (argument_is_named)
{
url_arg_idx = findNamedArgument(url_arg, "connection_string", start);
if (url_arg_idx == -1 || !url_arg)
url_arg_idx = findNamedArgument(url_arg, "storage_account_url", start);
if (url_arg_idx == -1 || !url_arg)
return false;
}
else
{
if (!tryGetStringFromArgument(url_arg_idx, url_arg))
return false;
}
if (!(*url_arg).starts_with("http"))
{
auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(*url_arg);
if (!parsed_connection_string.AccountKey.empty())
{
result.start = url_arg_idx;
result.are_named = argument_is_named;
result.count = 1;
static re2::RE2 account_key_pattern = "AccountKey=.*?(;|$)";
String url_arg_copy = *url_arg;
RE2::Replace(&url_arg_copy, account_key_pattern, "AccountKey=[HIDDEN]\\1");
result.replacement = &url_arg_copy;
return true;
}
}
return false;
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
@ -341,8 +433,8 @@ private:
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
/// Looks for an argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
ssize_t findNamedArgument(String * res, const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments.getNodes().size(); ++i)
{
@ -364,9 +456,22 @@ private:
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
{
tryGetStringFromArgument(equal_args[1], res);
return i;
}
}
return -1;
}
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
String arg;
ssize_t arg_idx = findNamedArgument(&arg, key, start);
if (arg_idx >= 0)
markSecretArgument(arg_idx, /* argument_is_named= */ true);
}
};
}

View File

@ -722,7 +722,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
}
if (secret_arguments.replacement)
{
settings.ostr << "'" << (*secret_arguments.replacement) << "'";
}
else
{
settings.ostr << "'[HIDDEN]'";
}
if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
break; /// All other arguments should also be hidden.
continue;

View File

@ -17,6 +17,7 @@ public:
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
std::string * replacement = nullptr;
bool hasSecrets() const
{

View File

@ -6,7 +6,9 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/KnownObjectNames.h>
#include <Common/re2.h>
#include <azure/storage/common/storage_credential.hpp>
#include <boost/algorithm/string/predicate.hpp>
@ -184,32 +186,39 @@ private:
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
/// azureBlobStorageCluster('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
if (maskAzureConnectionString(-1, true, 1))
return;
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
if (maskAzureConnectionString(-1, true, 2))
return;
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
if (maskAzureConnectionString(url_arg_idx))
return;
/// We should check other arguments first because we don't need to do any replacement in case of
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
size_t count = arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
String fourth_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
@ -219,6 +228,42 @@ private:
markSecretArgument(url_arg_idx + 4);
}
bool maskAzureConnectionString(ssize_t url_arg_idx, bool argument_is_named = false, size_t start = 0)
{
String url_arg;
if (argument_is_named)
{
url_arg_idx = findNamedArgument(&url_arg, "connection_string", start);
if (url_arg_idx == -1 || url_arg.empty())
url_arg_idx = findNamedArgument(&url_arg, "storage_account_url", start);
if (url_arg_idx == -1 || url_arg.empty())
return false;
}
else
{
if (!tryGetStringFromArgument(url_arg_idx, &url_arg))
return false;
}
if (!url_arg.starts_with("http"))
{
auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(url_arg);
if (!parsed_connection_string.AccountKey.empty())
{
result.start = url_arg_idx;
result.are_named = argument_is_named;
result.count = 1;
static re2::RE2 account_key_pattern = "AccountKey=.*?(;|$)";
String url_arg_copy = url_arg;
RE2::Replace(&url_arg_copy, account_key_pattern, "AccountKey=[HIDDEN]\\1");
result.replacement = &url_arg_copy;
return true;
}
}
return false;
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
@ -518,8 +563,8 @@ private:
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
/// Looks for an argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
ssize_t findNamedArgument(String * res, const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments->size(); ++i)
{
@ -541,9 +586,21 @@ private:
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
{
tryGetStringFromArgument(*equal_args[1], res);
return i;
}
}
return -1;
}
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
ssize_t arg_idx = findNamedArgument(nullptr, key, start);
if (arg_idx >= 0)
markSecretArgument(arg_idx, /* argument_is_named= */ true);
}
};
}

View File

@ -223,7 +223,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
{
account_name = fourth_arg;
account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/structure");
if (is_format_arg(sixth_arg))
{
format = sixth_arg;
@ -257,10 +257,10 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
}
else if (with_structure && engine_args.size() == 8)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "account_name");
account_name = fourth_arg;
account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format");
if (!is_format_arg(sixth_arg))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
format = sixth_arg;

View File

@ -1,5 +1,6 @@
import pytest
import random, string
import re
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
@ -336,6 +337,10 @@ def test_create_database():
def test_table_functions():
password = new_password()
azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
account_key_pattern = re.compile("AccountKey=.*?(;|$)")
masked_azure_conn_string = re.sub(
account_key_pattern, "AccountKey=[HIDDEN]\\1", azure_conn_string
)
azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
azure_account_name = "devstoreaccount1"
azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
@ -467,23 +472,23 @@ def test_table_functions():
"CREATE TABLE tablefunc30 (x int) AS s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')",
"CREATE TABLE tablefunc31 (`x` int) AS s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc32 (`x` int) AS deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
f"CREATE TABLE tablefunc33 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"CREATE TABLE tablefunc34 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc35 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc33 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"CREATE TABLE tablefunc34 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc35 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc36 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '[HIDDEN]')",
f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
f"CREATE TABLE tablefunc39 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc40 (x int) AS azureBlobStorage(named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')",
f"CREATE TABLE tablefunc40 (`x` int) AS azureBlobStorage(named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')",
f"CREATE TABLE tablefunc41 (`x` int) AS azureBlobStorage(named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_8.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
f"CREATE TABLE tablefunc42 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')",
f"CREATE TABLE tablefunc43 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc44 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc42 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')",
f"CREATE TABLE tablefunc43 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc44 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc45 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_12.csv', '{azure_account_name}', '[HIDDEN]')",
f"CREATE TABLE tablefunc46 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_13.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
f"CREATE TABLE tablefunc47 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_14.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc49 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
"CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
],