Merge pull request #18812 from excitoon-favorites/betters3endpoints

Added prefix-based S3 endpoint settings
This commit is contained in:
vdimir 2021-01-26 20:47:57 +03:00 committed by GitHub
commit fc8b8d23b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 90 additions and 31 deletions

View File

@ -114,6 +114,10 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
@ -124,8 +128,29 @@ The following settings can be set before query execution or placed into configur
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
### Endpoint-based settings {#endpointsettings}
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL):
- `endpoint` — Mandatory. Specifies prefix of an endpoint.
- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
This configuration also applies to S3 disks in `MergeTree` table engine family.
Example:
```
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
<!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
<!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
<!-- <use_environment_credentials>false</use_environment_credentials> -->
<!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
</endpoint-name>
</s3>
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/s3/) <!--hide-->

View File

@ -216,7 +216,8 @@ StorageS3::StorageS3(
storage_metadata.setConstraints(constraints_);
setInMemoryMetadata(storage_metadata);
auto settings = context_.getStorageS3Settings().getSettings(uri.endpoint);
auto settings = context_.getStorageS3Settings().getSettings(uri.uri.toString());
Aws::Auth::AWSCredentials credentials(access_key_id_, secret_access_key_);
if (access_key_id_.empty())
credentials = Aws::Auth::AWSCredentials(std::move(settings.access_key_id), std::move(settings.secret_access_key));

View File

@ -3,6 +3,8 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/Exception.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
@ -22,6 +24,8 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
config.keys(config_elem, config_keys);
for (const String & key : config_keys)
{
if (config.has(config_elem + "." + key + ".endpoint"))
{
auto endpoint = config.getString(config_elem + "." + key + ".endpoint");
auto access_key_id = config.getString(config_elem + "." + key + ".access_key_id", "");
@ -49,13 +53,22 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
settings.emplace(endpoint, S3AuthSettings{std::move(access_key_id), std::move(secret_access_key), std::move(headers), use_environment_credentials});
}
}
}
S3AuthSettings StorageS3Settings::getSettings(const String & endpoint) const
{
std::lock_guard lock(mutex);
if (auto setting = settings.find(endpoint); setting != settings.end())
return setting->second;
auto next_prefix_setting = settings.upper_bound(endpoint);
/// Linear time algorithm may be replaced with logarithmic with prefix tree map.
for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != settings.begin();)
{
std::advance(possible_prefix_setting, -1);
if (boost::algorithm::starts_with(endpoint, possible_prefix_setting->first))
return possible_prefix_setting->second;
}
return {};
}

View File

@ -61,7 +61,10 @@ class QueryTimeoutExceedException(Exception):
class QueryRuntimeException(Exception):
pass
def __init__(self, message, returncode, stderr):
super(QueryRuntimeException, self).__init__(message)
self.returncode = returncode
self.stderr = stderr
class CommandRequest:
@ -106,7 +109,7 @@ class CommandRequest:
if (self.process.returncode != 0 or stderr) and not self.ignore_error:
raise QueryRuntimeException(
'Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr))
'Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr), self.process.returncode, stderr)
return stdout
@ -122,7 +125,7 @@ class CommandRequest:
raise QueryTimeoutExceedException('Client timed out!')
if (self.process.returncode == 0):
raise QueryRuntimeException('Client expected to be failed but succeeded! stdout: {}'.format(stdout))
raise QueryRuntimeException('Client expected to be failed but succeeded! stdout: {}'.format(stdout), self.process.returncode, stderr)
return stderr

View File

@ -4,5 +4,8 @@
<endpoint>http://resolver:8080</endpoint>
<header>Authorization: Bearer TOKEN</header>
</s3_mock>
<s3_mock_restricted_directory>
<endpoint>http://resolver:8080/root-with-auth/restricteddirectory/</endpoint>
</s3_mock_restricted_directory>
</s3>
</yandex>

View File

@ -1,14 +1,14 @@
from bottle import abort, route, run, request, response
@route('/redirected/<_path>')
@route('/redirected/<_path:path>')
def infinite_redirect(_path):
response.set_header("Location", request.url)
response.status = 307
return 'Redirected'
@route('/<_bucket>/<_path>')
@route('/<_bucket>/<_path:path>')
def server(_bucket, _path):
for name in request.headers:
if name == 'Authorization' and request.headers[name] == 'Bearer TOKEN':

View File

@ -410,6 +410,20 @@ def test_custom_auth_headers(cluster):
assert result == '1\t2\t3\n'
def test_custom_auth_headers_exclusion(cluster):
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
filename = "test.csv"
get_query = f"SELECT * FROM s3('http://resolver:8080/{cluster.minio_restricted_bucket}/restricteddirectory/{filename}', 'CSV', '{table_format}')"
instance = cluster.instances["dummy"] # type: ClickHouseInstance
with pytest.raises(helpers.client.QueryRuntimeException) as ei:
result = run_query(instance, get_query)
print(result)
assert ei.value.returncode == 243
assert '403 Forbidden' in ei.value.stderr
def test_infinite_redirect(cluster):
bucket = "redirected"
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"