From 679a0e1300a72bcb770d7f8831906f4e797a336f Mon Sep 17 00:00:00 2001 From: Shani Elharrar Date: Thu, 14 Dec 2023 10:05:01 +0200 Subject: [PATCH] StorageS3 / TableFunctionS3: Allow passing session_token to AuthSettings This can help users that want to pass temporary credentials that issued by AWS in order to load data from S3 without changing configuration or creating an IAM User. Fixes #57848 --- docs/en/sql-reference/table-functions/s3.md | 4 +- .../table-functions/s3Cluster.md | 5 +- docs/ru/sql-reference/table-functions/s3.md | 2 +- .../table-functions/s3Cluster.md | 4 +- docs/zh/sql-reference/table-functions/s3.md | 2 +- src/Storages/StorageS3.cpp | 42 +++++++++++-- src/TableFunctions/TableFunctionS3.cpp | 62 ++++++++++++++++--- src/TableFunctions/TableFunctionS3.h | 6 +- src/TableFunctions/TableFunctionS3Cluster.h | 1 + 9 files changed, 107 insertions(+), 21 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index dc11259c626..61a9187575d 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -16,7 +16,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer **Syntax** ``` sql -s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression]) ``` :::tip GCS @@ -38,6 +38,8 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ ::: - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 799eb31446a..080c9860519 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -10,14 +10,15 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google **Syntax** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure]) +s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure]) ``` **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 7deef68f47f..fe40cb0c507 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -11,7 +11,7 @@ sidebar_label: s3 **Синтаксис** ``` sql -s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [,access_key_id, secret_access_key [,session_token]] [,format] [,structure] [,compression]) ``` **Aргументы** diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index b8f34d805ff..b382bf5e384 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -11,14 +11,14 @@ sidebar_label: s3Cluster **Синтаксис** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure]) +s3Cluster(cluster_name, source, [,access_key_id, secret_access_key [,session_token]] [,format] [,structure]) ``` **Аргументы** - `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. - `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. +- `access_key_id`, `secret_access_key` и `session_token` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index a62fa9ebb19..f7384a7526e 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -11,7 +11,7 @@ sidebar_label: s3 **语法** ``` sql -s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +s3(path [,access_key_id, secret_access_key [,session_token]] ,format, structure, [compression]) ``` **参数** diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index e8f460525db..096e2e88f91 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -104,6 +104,7 @@ static const std::unordered_set optional_configuration_keys = "structure", "access_key_id", "secret_access_key", + "session_token", "filename", "use_environment_credentials", "max_single_read_retries", @@ -1521,11 +1522,14 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// S3('url', NOSIGN, 'format') /// S3('url', NOSIGN, 'format', 'compression') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') /// with optional headers() function - if (engine_args.empty() || engine_args.size() > 5) + if (engine_args.empty() || engine_args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage S3 requires 1 to 5 arguments: " "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); @@ -1541,7 +1545,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context static std::unordered_map> size_to_engine_args { {1, {{}}}, - {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}} + {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}} }; std::unordered_map engine_args_to_idx; @@ -1577,7 +1581,8 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context else engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; } - /// For 4 arguments we support 2 possible variants: + /// For 4 arguments we support 3 possible variants: + /// - s3(source, access_key_id, secret_access_key, session_token) /// - s3(source, access_key_id, secret_access_key, format) /// - s3(source, NOSIGN, format, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. @@ -1590,7 +1595,32 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; } else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } + } + /// For 5 arguments we support 2 possible variants: + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, compression) + else if (engine_args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } } else { @@ -1612,6 +1642,10 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context if (engine_args_to_idx.contains("secret_access_key")) configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); + if (engine_args_to_idx.contains("session_token")) + configuration.auth_settings.session_token = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); + + configuration.auth_settings.no_sign_request = no_sign_request; } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index e6ae75a5fd5..c52256fb984 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -71,7 +71,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (header_it != args.end()) args.erase(header_it); - if (args.empty() || args.size() > 6) + if (args.empty() || args.size() > 7) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); for (auto & arg : args) @@ -81,7 +81,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context static std::unordered_map> size_to_args { {1, {{}}}, - {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}} + {7, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}} }; std::unordered_map args_to_idx; @@ -118,11 +118,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; } - /// For 4 arguments we support 3 possible variants: + /// For 4 arguments we support 4 possible variants: /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, access_key_id, format) + /// - s3(source, access_key_id, access_key_id, format), + /// - s3(source, access_key_id, access_key_id, session_token) /// - s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. + /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. else if (args.size() == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); @@ -132,14 +133,28 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context args_to_idx = {{"format", 2}, {"structure", 3}}; } else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; + } else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } } - /// For 5 arguments we support 2 possible variants: + /// For 5 arguments we support 3 possible variants: /// - s3(source, access_key_id, access_key_id, format, structure) + /// - s3(source, access_key_id, access_key_id, session_token, format) /// - s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, + /// and by the 4-th argument, check if it's a format name or not else if (args.size() == 5) { auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); @@ -149,7 +164,33 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; } else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + } + else + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + // For 6 arguments we support 2 possible variants: + /// - s3(source, access_key_id, access_key_id, format, structure, compression_method) + /// - s3(source, access_key_id, access_key_id, session_token, format, structure) + /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; + } + else + { + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; + } } else { @@ -181,6 +222,9 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (args_to_idx.contains("secret_access_key")) configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); + if (args_to_idx.contains("session_token")) + configuration.auth_settings.session_token = checkAndGetLiteralArgument(args[args_to_idx["session_token"]], "session_token"); + configuration.auth_settings.no_sign_request = no_sign_request; if (configuration.format == "auto") diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index fc384176007..fa73c1d313e 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -22,11 +22,15 @@ public: static constexpr auto signature = " - url\n" " - url, format\n" " - url, format, structure\n" - " - url, access_key_id, secret_access_key\n" " - url, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key\n" + " - url, access_key_id, secret_access_key, session_token\n" " - url, access_key_id, secret_access_key, format\n" + " - url, access_key_id, secret_access_key, session_token, format\n" " - url, access_key_id, secret_access_key, format, structure\n" + " - url, access_key_id, secret_access_key, session_token, format, structure\n" " - url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; static size_t getMaxNumberOfArguments() { return 6; } diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 4fe25079cf4..718b0d90de8 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -35,6 +35,7 @@ public: " - cluster, url, access_key_id, secret_access_key, format\n" " - cluster, url, access_key_id, secret_access_key, format, structure\n" " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; String getName() const override