diff --git a/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h index 439ddffe5e5..fb7bba8d3c6 100644 --- a/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h +++ b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h @@ -7,8 +7,10 @@ #include #include #include +#include #include +#include #include @@ -89,6 +91,16 @@ private: /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) findS3FunctionSecretArguments(/* is_cluster_function= */ true); } + else if (name == "azureBlobStorage") + { + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (name == "azureBlobStorageCluster") + { + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true); + } else if ((name == "remote") || (name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) @@ -177,6 +189,86 @@ private: markSecretArgument(url_arg_idx + 2); } + void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function) + { + /// azureBlobStorageCluster('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + if (maskAzureConnectionString(-1, true, 1)) + return; + /// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 1); + return; + } + else if (is_cluster_function && isNamedCollectionName(1)) + { + if (maskAzureConnectionString(-1, true, 2)) + return; + /// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 2); + return; + } + + if (maskAzureConnectionString(url_arg_idx)) + return; + + /// We should check other arguments first because we don't need to do any replacement in case of + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...]) + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...]) + size_t count = arguments.getNodes().size(); + if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7)) + { + String fourth_arg; + if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg)) + { + if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature + if (url_arg_idx + 4 < count) + markSecretArgument(url_arg_idx + 4); + } + + bool maskAzureConnectionString(ssize_t url_arg_idx, bool argument_is_named = false, size_t start = 0) + { + String * url_arg = nullptr; + if (argument_is_named) + { + url_arg_idx = findNamedArgument(url_arg, "connection_string", start); + if (url_arg_idx == -1 || !url_arg) + url_arg_idx = findNamedArgument(url_arg, "storage_account_url", start); + if (url_arg_idx == -1 || !url_arg) + return false; + } + else + { + if (!tryGetStringFromArgument(url_arg_idx, url_arg)) + return false; + } + + if (!(*url_arg).starts_with("http")) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(*url_arg); + if (!parsed_connection_string.AccountKey.empty()) + { + result.start = url_arg_idx; + result.are_named = argument_is_named; + result.count = 1; + static re2::RE2 account_key_pattern = "AccountKey=.*?(;|$)"; + String url_arg_copy = *url_arg; + RE2::Replace(&url_arg_copy, account_key_pattern, "AccountKey=[HIDDEN]\\1"); + result.replacement = &url_arg_copy; + return true; + } + } + + return false; + } + void findURLSecretArguments() { if (!isNamedCollectionName(0)) @@ -341,8 +433,8 @@ private: return identifier != nullptr; } - /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. - void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + /// Looks for an argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + ssize_t findNamedArgument(String * res, const std::string_view & key, size_t start = 0) { for (size_t i = start; i < arguments.getNodes().size(); ++i) { @@ -364,8 +456,21 @@ private: continue; if (found_key == key) - markSecretArgument(i, /* argument_is_named= */ true); + { + tryGetStringFromArgument(equal_args[1], res); + return i; + } } + + return -1; + } + + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + String arg; + ssize_t arg_idx = findNamedArgument(&arg, key, start); + if (arg_idx >= 0) + markSecretArgument(arg_idx, /* argument_is_named= */ true); } }; diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index d42728addb7..0eb898b1bd8 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -722,7 +722,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format assert_cast(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : ""); } - settings.ostr << "'[HIDDEN]'"; + if (secret_arguments.replacement) + { + settings.ostr << "'" << (*secret_arguments.replacement) << "'"; + } + else + { + settings.ostr << "'[HIDDEN]'"; + } if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named) break; /// All other arguments should also be hidden. continue; diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h index 002ad94f6ea..c4af0c59615 100644 --- a/src/Parsers/FunctionSecretArgumentsFinder.h +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -17,6 +17,7 @@ public: bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` std::vector nested_maps; + std::string * replacement = nullptr; bool hasSecrets() const { diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h index 15d9a8d5add..c4486acc6fe 100644 --- a/src/Parsers/FunctionSecretArgumentsFinderAST.h +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -6,7 +6,9 @@ #include #include #include +#include +#include #include @@ -184,32 +186,39 @@ private: void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function) { - /// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. + /// azureBlobStorageCluster('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. size_t url_arg_idx = is_cluster_function ? 1 : 0; if (!is_cluster_function && isNamedCollectionName(0)) { + if (maskAzureConnectionString(-1, true, 1)) + return; /// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...) findSecretNamedArgument("account_key", 1); return; } else if (is_cluster_function && isNamedCollectionName(1)) { + if (maskAzureConnectionString(-1, true, 2)) + return; /// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...) findSecretNamedArgument("account_key", 2); return; } - /// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used - /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) - /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + if (maskAzureConnectionString(url_arg_idx)) + return; + + /// We should check other arguments first because we don't need to do any replacement in case of + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...]) + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...]) size_t count = arguments->size(); if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7)) { - String second_arg; - if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg)) + String fourth_arg; + if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg)) { - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg)) return; /// The argument after 'url' is a format: s3('url', 'format', ...) } } @@ -219,6 +228,42 @@ private: markSecretArgument(url_arg_idx + 4); } + bool maskAzureConnectionString(ssize_t url_arg_idx, bool argument_is_named = false, size_t start = 0) + { + String url_arg; + if (argument_is_named) + { + url_arg_idx = findNamedArgument(&url_arg, "connection_string", start); + if (url_arg_idx == -1 || url_arg.empty()) + url_arg_idx = findNamedArgument(&url_arg, "storage_account_url", start); + if (url_arg_idx == -1 || url_arg.empty()) + return false; + } + else + { + if (!tryGetStringFromArgument(url_arg_idx, &url_arg)) + return false; + } + + if (!url_arg.starts_with("http")) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(url_arg); + if (!parsed_connection_string.AccountKey.empty()) + { + result.start = url_arg_idx; + result.are_named = argument_is_named; + result.count = 1; + static re2::RE2 account_key_pattern = "AccountKey=.*?(;|$)"; + String url_arg_copy = url_arg; + RE2::Replace(&url_arg_copy, account_key_pattern, "AccountKey=[HIDDEN]\\1"); + result.replacement = &url_arg_copy; + return true; + } + } + + return false; + } + void findURLSecretArguments() { if (!isNamedCollectionName(0)) @@ -518,8 +563,8 @@ private: return identifier != nullptr; } - /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. - void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + /// Looks for an argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + ssize_t findNamedArgument(String * res, const std::string_view & key, size_t start = 0) { for (size_t i = start; i < arguments->size(); ++i) { @@ -541,8 +586,20 @@ private: continue; if (found_key == key) - markSecretArgument(i, /* argument_is_named= */ true); + { + tryGetStringFromArgument(*equal_args[1], res); + return i; + } } + + return -1; + } + + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + ssize_t arg_idx = findNamedArgument(nullptr, key, start); + if (arg_idx >= 0) + markSecretArgument(arg_idx, /* argument_is_named= */ true); } }; diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index 8121f389a8d..33d6af9c26b 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -223,7 +223,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, { account_name = fourth_arg; account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/structure"); if (is_format_arg(sixth_arg)) { format = sixth_arg; @@ -257,10 +257,10 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, } else if (with_structure && engine_args.size() == 8) { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "account_name"); account_name = fourth_arg; account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); format = sixth_arg; diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 5366de39ea7..797986bbf01 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -1,5 +1,6 @@ import pytest import random, string +import re from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -336,6 +337,10 @@ def test_create_database(): def test_table_functions(): password = new_password() azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"] + account_key_pattern = re.compile("AccountKey=.*?(;|$)") + masked_azure_conn_string = re.sub( + account_key_pattern, "AccountKey=[HIDDEN]\\1", azure_conn_string + ) azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] azure_account_name = "devstoreaccount1" azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" @@ -467,23 +472,23 @@ def test_table_functions(): "CREATE TABLE tablefunc30 (x int) AS s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')", "CREATE TABLE tablefunc31 (`x` int) AS s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')", "CREATE TABLE tablefunc32 (`x` int) AS deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", - f"CREATE TABLE tablefunc33 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", - f"CREATE TABLE tablefunc34 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", - f"CREATE TABLE tablefunc35 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')", + f"CREATE TABLE tablefunc33 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", + f"CREATE TABLE tablefunc34 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", + f"CREATE TABLE tablefunc35 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')", f"CREATE TABLE tablefunc36 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '[HIDDEN]')", f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')", f"CREATE TABLE tablefunc39 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", - f"CREATE TABLE tablefunc40 (x int) AS azureBlobStorage(named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')", + f"CREATE TABLE tablefunc40 (`x` int) AS azureBlobStorage(named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')", f"CREATE TABLE tablefunc41 (`x` int) AS azureBlobStorage(named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_8.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')", - f"CREATE TABLE tablefunc42 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')", - f"CREATE TABLE tablefunc43 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')", - f"CREATE TABLE tablefunc44 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')", + f"CREATE TABLE tablefunc42 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')", + f"CREATE TABLE tablefunc43 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')", + f"CREATE TABLE tablefunc44 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')", f"CREATE TABLE tablefunc45 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_12.csv', '{azure_account_name}', '[HIDDEN]')", f"CREATE TABLE tablefunc46 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_13.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')", f"CREATE TABLE tablefunc47 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_14.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')", f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", - f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", + f"CREATE TABLE tablefunc49 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')", "CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", ],