From 6ee997193121882e0e2255b77633ef8ce4c5266f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 25 Jan 2023 14:24:53 +0100 Subject: [PATCH 1/5] Mask sensitive info in named collection's overrides. --- src/Parsers/ASTFunction.cpp | 288 ++++++++++++------ .../test_mask_sensitive_info/test.py | 34 ++- 2 files changed, 228 insertions(+), 94 deletions(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index fccef01a2bc..ee9c9858e83 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -37,81 +37,117 @@ namespace { /// Finds arguments of a specified function which should not be displayed for most users for security reasons. /// That involves passwords and secret keys. - /// The member function getRange() returns a pair of numbers [first, last) specifying arguments - /// which must be hidden. If the function returns {-1, -1} that means no arguments must be hidden. class FunctionSecretArgumentsFinder { public: explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) { - if (function.arguments) - { - if (const auto * expr_list = function.arguments->as()) - arguments = &expr_list->children; - } - } + if (!function.arguments) + return; - std::pair getRange() const - { - if (!arguments) - return npos; + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + arguments = &expr_list->children; switch (function.kind) { - case ASTFunction::Kind::ORDINARY_FUNCTION: return findOrdinaryFunctionSecretArguments(); - case ASTFunction::Kind::WINDOW_FUNCTION: return npos; - case ASTFunction::Kind::LAMBDA_FUNCTION: return npos; - case ASTFunction::Kind::TABLE_ENGINE: return findTableEngineSecretArguments(); - case ASTFunction::Kind::DATABASE_ENGINE: return findDatabaseEngineSecretArguments(); - case ASTFunction::Kind::BACKUP_NAME: return findBackupNameSecretArguments(); + case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; + case ASTFunction::Kind::WINDOW_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; + case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; + case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; } } - static const constexpr std::pair npos{static_cast(-1), static_cast(-1)}; + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + }; + + Result getResult() const { return result; } private: - std::pair findOrdinaryFunctionSecretArguments() const + const ASTFunction & function; + const ASTs * arguments = nullptr; + Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findOrdinaryFunctionSecretArguments() { if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) { /// mysql('host:port', 'database', 'table', 'user', 'password', ...) /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) - return {4, 5}; + findMySQLFunctionSecretArguments(); } else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss")) { /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - return findS3FunctionSecretArguments(/* is_cluster_function= */ false); + findS3FunctionSecretArguments(/* is_cluster_function= */ false); } else if (function.name == "s3Cluster") { /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) - return findS3FunctionSecretArguments(/* is_cluster_function= */ true); + findS3FunctionSecretArguments(/* is_cluster_function= */ true); } else if ((function.name == "remote") || (function.name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) - return findRemoteFunctionSecretArguments(); + findRemoteFunctionSecretArguments(); } else if ((function.name == "encrypt") || (function.name == "decrypt") || (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || (function.name == "tryDecrypt")) { /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) - return findEncryptionFunctionSecretArguments(); - } - else - { - return npos; + findEncryptionFunctionSecretArguments(); } } - std::pair findS3FunctionSecretArguments(bool is_cluster_function) const + void findMySQLFunctionSecretArguments() + { + if (isCollectionNameArgument(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + void findS3FunctionSecretArguments(bool is_cluster_function) { /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. size_t url_arg_idx = is_cluster_function ? 1 : 0; + if (!is_cluster_function && isCollectionNameArgument(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures: /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') @@ -119,12 +155,12 @@ namespace /// But we should check the number of arguments first because we don't need to do any replacements in case of /// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format']) if (arguments->size() < url_arg_idx + 3) - return npos; + return; if (arguments->size() >= url_arg_idx + 5) { /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...) - return {url_arg_idx + 2, url_arg_idx + 3}; + markSecretArgument(url_arg_idx + 2); } else { @@ -136,15 +172,16 @@ namespace { /// We couldn't evaluate the argument after 'url' so we don't know whether it is a format or `aws_access_key_id`. /// So it's safer to wipe the next argument just in case. - return {url_arg_idx + 2, url_arg_idx + 3}; /// Wipe either `aws_secret_access_key` or `structure`. + markSecretArgument(url_arg_idx + 2); /// Wipe either `aws_secret_access_key` or `structure`. + return; } if (KnownFormatNames::instance().exists(format)) - return npos; /// The argument after 'url' is a format: s3('url', 'format', ...) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) /// The argument after 'url' is not a format so we do our replacement: /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...) - return {url_arg_idx + 2, url_arg_idx + 3}; + markSecretArgument(url_arg_idx + 2); } } @@ -153,8 +190,12 @@ namespace if (arg_idx >= arguments->size()) return false; - ASTPtr argument = (*arguments)[arg_idx]; - if (const auto * literal = argument->as()) + return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument.as()) { if (literal->value.getType() != Field::Types::String) return false; @@ -165,7 +206,7 @@ namespace if (allow_identifier) { - if (const auto * id = argument->as()) + if (const auto * id = argument.as()) { if (res) *res = id->name(); @@ -176,8 +217,15 @@ namespace return false; } - std::pair findRemoteFunctionSecretArguments() const + void findRemoteFunctionSecretArguments() { + if (isCollectionNameArgument(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) @@ -186,7 +234,7 @@ namespace /// But we should check the number of arguments first because we don't need to do any replacements in case of /// remote('addresses_expr', db.table) if (arguments->size() < 3) - return npos; + return; size_t arg_num = 1; @@ -207,20 +255,17 @@ namespace /// before the argument 'password'. So it's safer to wipe two arguments just in case. /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string /// before wiping it (because the `password` argument is always a literal string). - auto res = npos; if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) { /// Wipe either `password` or `user`. - res = {arg_num + 2, arg_num + 3}; + markSecretArgument(arg_num + 2); } if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) { /// Wipe either `password` or `sharding_key`. - if (res == npos) - res.first = arg_num + 3; - res.second = arg_num + 4; + markSecretArgument(arg_num + 3); } - return res; + return; } /// Skip the current argument (which is either a database name or a qualified table name). @@ -241,9 +286,7 @@ namespace /// before wiping it (because the `password` argument is always a literal string). bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); if (can_be_password) - return {arg_num, arg_num + 1}; - - return npos; + markSecretArgument(arg_num); } /// Tries to get either a database name or a qualified table name from an argument. @@ -278,20 +321,24 @@ namespace return true; } - std::pair findEncryptionFunctionSecretArguments() const + void findEncryptionFunctionSecretArguments() { + if (arguments->empty()) + return; + /// We replace all arguments after 'mode' with '[HIDDEN]': /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') - return {1, arguments->size()}; + result.start = 1; + result.count = arguments->size() - 1; } - std::pair findTableEngineSecretArguments() const + void findTableEngineSecretArguments() { const String & engine_name = function.name; if (engine_name == "ExternalDistributed") { /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - return {5, 6}; + findExternalDistributedTableEngineSecretArguments(); } else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) @@ -300,21 +347,38 @@ namespace /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) - return {4, 5}; + findMySQLFunctionSecretArguments(); } else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS")) { /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) - return findS3TableEngineSecretArguments(); - } - else - { - return npos; + findS3TableEngineSecretArguments(); } } - std::pair findS3TableEngineSecretArguments() const + void findExternalDistributedTableEngineSecretArguments() { + if (isCollectionNameArgument(1)) + { + /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 2); + } + else + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + markSecretArgument(5); + } + } + + void findS3TableEngineSecretArguments() + { + if (isCollectionNameArgument(0)) + { + /// S3(named_collection, ..., secret_access_key = 'secret_access_key') + findSecretNamedArgument("secret_access_key", 1); + return; + } + /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures: /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') @@ -322,12 +386,12 @@ namespace /// But we should check the number of arguments first because we don't need to do that replacements in case of /// S3('url' [, 'format' [, 'compression']]) if (arguments->size() < 4) - return npos; + return; - return {2, 3}; + markSecretArgument(2); } - std::pair findDatabaseEngineSecretArguments() const + void findDatabaseEngineSecretArguments() { const String & engine_name = function.name; if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || @@ -335,31 +399,71 @@ namespace (engine_name == "MaterializedPostgreSQL")) { /// MySQL('host:port', 'database', 'user', 'password') - /// PostgreSQL('host:port', 'database', 'user', 'password', ...) - return {3, 4}; - } - else - { - return npos; + /// PostgreSQL('host:port', 'database', 'user', 'password') + findMySQLDatabaseSecretArguments(); } } - std::pair findBackupNameSecretArguments() const + void findMySQLDatabaseSecretArguments() + { + if (isCollectionNameArgument(0)) + { + /// MySQL(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// MySQL('host:port', 'database', 'user', 'password') + markSecretArgument(3); + } + } + + void findBackupNameSecretArguments() { const String & engine_name = function.name; if (engine_name == "S3") { /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) - return {2, 3}; - } - else - { - return npos; + markSecretArgument(2); } } - const ASTFunction & function; - const ASTs * arguments = nullptr; + /// Whether a specified argument can be the name of a named collection? + bool isCollectionNameArgument(size_t arg_idx) const + { + if (arguments->size() <= arg_idx) + return false; + + const auto * identifier = (*arguments)[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments->size(); ++i) + { + const auto & argument = (*arguments)[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->name != "equals")) + continue; + + const auto * expr_list = equals_func->arguments->as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->children; + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(*equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } }; } @@ -966,32 +1070,39 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format && (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne" || name == "replaceRegexpAll"); - auto secret_arguments = std::make_pair(static_cast(-1), static_cast(-1)); + FunctionSecretArgumentsFinder::Result secret_arguments; if (!settings.show_secrets) - secret_arguments = FunctionSecretArgumentsFinder(*this).getRange(); + secret_arguments = FunctionSecretArgumentsFinder{*this}.getResult(); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { if (i != 0) settings.ostr << ", "; - if (arguments->children[i]->as()) + + const auto & argument = arguments->children[i]; + if (argument->as()) settings.ostr << "SETTINGS "; - if (!settings.show_secrets && (secret_arguments.first <= i) && (i < secret_arguments.second)) + if (!settings.show_secrets && (secret_arguments.start <= i) && (i < secret_arguments.start + secret_arguments.count)) { + if (secret_arguments.are_named) + { + assert_cast(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : ""); + } settings.ostr << "'[HIDDEN]'"; - if (size - 1 < secret_arguments.second) + if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named) break; /// All other arguments should also be hidden. continue; } if ((i == 1) && special_hilite_regexp - && highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-")) + && highlightStringLiteralWithMetacharacters(argument, settings, "|()^$.[]?*+{:-")) { continue; } - arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + argument->formatImpl(settings, state, nested_dont_need_parens); } } @@ -1005,14 +1116,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format bool ASTFunction::hasSecretParts() const { - if (arguments) - { - size_t num_arguments = arguments->children.size(); - auto secret_arguments = FunctionSecretArgumentsFinder(*this).getRange(); - if ((secret_arguments.first < num_arguments) && (secret_arguments.first < secret_arguments.second)) - return true; - } - return childrenHaveSecretParts(); + return (FunctionSecretArgumentsFinder{*this}.getResult().count > 0) || childrenHaveSecretParts(); } String getFunctionName(const IAST * ast) diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index f938148e5a0..3f71b047213 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -4,7 +4,13 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", with_zookeeper=True) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/named_collections.xml", + ], + with_zookeeper=True, +) @pytest.fixture(scope="module", autouse=True) @@ -116,6 +122,12 @@ def test_create_table(): f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')", f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')", + f"MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", + f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')", + f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')", + f"PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '{password}')", + f"MongoDB(named_collection_5, host = 'mongo1', port = 5432, database = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '{password}')", + f"S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '{password}', format = 'CSV')", ] for i, table_engine in enumerate(table_engines): @@ -147,6 +159,12 @@ def test_create_table(): "CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", "CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')", "CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')", + "CREATE TABLE table8 (`x` int) ENGINE = MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", + "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '[HIDDEN]', table = 'mysql_table', user = 'mysql_user')", + "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')", + "CREATE TABLE table11 (`x` int) ENGINE = PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '[HIDDEN]')", + "CREATE TABLE table12 (`x` int) ENGINE = MongoDB(named_collection_5, host = 'mongo1', port = 5432, database = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '[HIDDEN]'", + "CREATE TABLE table13 (`x` int) ENGINE = S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]', format = 'CSV')", ], must_not_contain=[password], ) @@ -160,6 +178,7 @@ def test_create_database(): database_engines = [ f"MySQL('localhost:3306', 'mysql_db', 'mysql_user', '{password}') SETTINGS connect_timeout=1, connection_max_tries=1", + f"MySQL(named_collection_1, host = 'localhost', port = 3306, database = 'mysql_db', user = 'mysql_user', password = '{password}') SETTINGS connect_timeout=1, connection_max_tries=1", # f"PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '{password}')", ] @@ -173,7 +192,8 @@ def test_create_database(): check_logs( must_contain=[ "CREATE DATABASE database0 ENGINE = MySQL('localhost:3306', 'mysql_db', 'mysql_user', '[HIDDEN]')", - # "CREATE DATABASE database1 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')", + "CREATE DATABASE database1 ENGINE = MySQL(named_collection_1, host = 'localhost', port = 3306, database = 'mysql_db', user = 'mysql_user', password = '[HIDDEN]')", + # "CREATE DATABASE database2 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')", ], must_not_contain=[password], ) @@ -211,6 +231,11 @@ def test_table_functions(): f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())", f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')", f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())", + f"mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", + f"postgresql(named_collection_2, password = '{password}', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')", + f"s3(named_collection_3, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '{password}')", + f"remote(named_collection_4, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}', sharding_key = rand())", + f"remoteSecure(named_collection_5, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}')", ] for i, table_function in enumerate(table_functions): @@ -259,6 +284,11 @@ def test_table_functions(): "CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())", "CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')", "CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())", + "CREATE TABLE tablefunc25 (`x` int) AS mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", + "CREATE TABLE tablefunc26 (`x` int) AS postgresql(named_collection_2, password = '[HIDDEN]', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')", + "CREATE TABLE tablefunc27 (`x` int) AS s3(named_collection_3, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]')", + "CREATE TABLE tablefunc28 (`x` int) AS remote(named_collection_4, addresses_expr = '127.{2..11}', database = 'default', table = 'remote_table', user = 'remote_user', password = '[HIDDEN]', sharding_key = rand())", + "CREATE TABLE tablefunc29 (`x` int) AS remoteSecure(named_collection_5, addresses_expr = '127.{2..11}', database = 'default', table = 'remote_table', user = 'remote_user', password = '[HIDDEN]')", ], must_not_contain=[password], ) From d95d8bcb262fb044511b5f2c4c1fbe7c0967dfc1 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 30 Jan 2023 19:31:32 +0100 Subject: [PATCH 2/5] Rename isCollectionNameArgument() -> isNamedCollectionName() and add comments. --- src/Parsers/ASTFunction.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index ee9c9858e83..a711acf9b13 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -65,7 +65,8 @@ namespace { /// Result constructed by default means no arguments will be hidden. size_t start = static_cast(-1); - size_t count = 0; + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consequent. bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. }; @@ -83,7 +84,7 @@ namespace result.start = index; result.are_named = argument_is_named; } - chassert(index >= result.start); + chassert(index >= result.start); /// We always check arguments consequently. result.count = index + 1 - result.start; if (!argument_is_named) result.are_named = false; @@ -124,7 +125,7 @@ namespace void findMySQLFunctionSecretArguments() { - if (isCollectionNameArgument(0)) + if (isNamedCollectionName(0)) { /// mysql(named_collection, ..., password = 'password', ...) findSecretNamedArgument("password", 1); @@ -141,7 +142,7 @@ namespace /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. size_t url_arg_idx = is_cluster_function ? 1 : 0; - if (!is_cluster_function && isCollectionNameArgument(0)) + if (!is_cluster_function && isNamedCollectionName(0)) { /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) findSecretNamedArgument("secret_access_key", 1); @@ -219,7 +220,7 @@ namespace void findRemoteFunctionSecretArguments() { - if (isCollectionNameArgument(0)) + if (isNamedCollectionName(0)) { /// remote(named_collection, ..., password = 'password', ...) findSecretNamedArgument("password", 1); @@ -358,7 +359,7 @@ namespace void findExternalDistributedTableEngineSecretArguments() { - if (isCollectionNameArgument(1)) + if (isNamedCollectionName(1)) { /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) findSecretNamedArgument("password", 2); @@ -372,7 +373,7 @@ namespace void findS3TableEngineSecretArguments() { - if (isCollectionNameArgument(0)) + if (isNamedCollectionName(0)) { /// S3(named_collection, ..., secret_access_key = 'secret_access_key') findSecretNamedArgument("secret_access_key", 1); @@ -406,7 +407,7 @@ namespace void findMySQLDatabaseSecretArguments() { - if (isCollectionNameArgument(0)) + if (isNamedCollectionName(0)) { /// MySQL(named_collection, ..., password = 'password', ...) findSecretNamedArgument("password", 1); @@ -429,7 +430,7 @@ namespace } /// Whether a specified argument can be the name of a named collection? - bool isCollectionNameArgument(size_t arg_idx) const + bool isNamedCollectionName(size_t arg_idx) const { if (arguments->size() <= arg_idx) return false; From 48023a297f0bdcc28ecb4a62f8f931f84c191bb9 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 30 Jan 2023 21:47:30 +0100 Subject: [PATCH 3/5] Add missed test config. --- .../configs/named_collections.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/integration/test_mask_sensitive_info/configs/named_collections.xml diff --git a/tests/integration/test_mask_sensitive_info/configs/named_collections.xml b/tests/integration/test_mask_sensitive_info/configs/named_collections.xml new file mode 100644 index 00000000000..ee923a90171 --- /dev/null +++ b/tests/integration/test_mask_sensitive_info/configs/named_collections.xml @@ -0,0 +1,10 @@ + + + + + + + + + + From c6be69568ad8db3ff69213dd40abc79114e9dca4 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 31 Jan 2023 14:43:30 +0100 Subject: [PATCH 4/5] Update src/Parsers/ASTFunction.cpp Co-authored-by: SmitaRKulkarni <64093672+SmitaRKulkarni@users.noreply.github.com> --- src/Parsers/ASTFunction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index a711acf9b13..c1fb19dd921 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -84,7 +84,7 @@ namespace result.start = index; result.are_named = argument_is_named; } - chassert(index >= result.start); /// We always check arguments consequently. + chassert(index >= result.start); /// We always check arguments consecutively result.count = index + 1 - result.start; if (!argument_is_named) result.are_named = false; From 6165625e278217f5497d4a06c74188b43e4ae1e1 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 31 Jan 2023 14:43:48 +0100 Subject: [PATCH 5/5] Update src/Parsers/ASTFunction.cpp Co-authored-by: SmitaRKulkarni <64093672+SmitaRKulkarni@users.noreply.github.com> --- src/Parsers/ASTFunction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index c1fb19dd921..7a19cba0f75 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -66,7 +66,7 @@ namespace /// Result constructed by default means no arguments will be hidden. size_t start = static_cast(-1); size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). - /// In all known cases secret arguments are consequent. + /// In all known cases secret arguments are consecutive bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. };