mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge pull request #46083 from kssenii/simplify-storage-s3-configuration
Simplify code around storages s3/hudi/delta-lake
This commit is contained in:
commit
693006ba42
@ -30,6 +30,7 @@ struct URI
|
||||
|
||||
bool is_virtual_hosted_style;
|
||||
|
||||
URI() = default;
|
||||
explicit URI(const std::string & uri_);
|
||||
|
||||
static void validateBucket(const std::string & bucket, const Poco::URI & uri);
|
||||
|
@ -109,18 +109,6 @@ struct URLBasedDataSourceConfiguration
|
||||
void set(const URLBasedDataSourceConfiguration & conf);
|
||||
};
|
||||
|
||||
struct StorageS3Configuration : URLBasedDataSourceConfiguration
|
||||
{
|
||||
S3::AuthSettings auth_settings;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
};
|
||||
|
||||
|
||||
struct StorageS3ClusterConfiguration : StorageS3Configuration
|
||||
{
|
||||
String cluster_name;
|
||||
};
|
||||
|
||||
struct URLBasedDataSourceConfig
|
||||
{
|
||||
URLBasedDataSourceConfiguration configuration;
|
||||
|
15
src/Storages/StorageConfiguration.h
Normal file
15
src/Storages/StorageConfiguration.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// A base class for stateless table engines configurations.
|
||||
struct StatelessTableEngineConfiguration
|
||||
{
|
||||
String format = "auto";
|
||||
String compression_method = "auto";
|
||||
String structure = "auto";
|
||||
};
|
||||
|
||||
}
|
@ -57,8 +57,8 @@ std::vector<String> DeltaLakeMetadata::listCurrentFiles() &&
|
||||
return keys;
|
||||
}
|
||||
|
||||
JsonMetadataGetter::JsonMetadataGetter(StorageS3::S3Configuration & configuration_, const String & table_path_, ContextPtr context)
|
||||
: base_configuration(configuration_), table_path(table_path_)
|
||||
JsonMetadataGetter::JsonMetadataGetter(const StorageS3::Configuration & configuration_, ContextPtr context)
|
||||
: base_configuration(configuration_)
|
||||
{
|
||||
init(context);
|
||||
}
|
||||
@ -94,17 +94,17 @@ void JsonMetadataGetter::init(ContextPtr context)
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<String> JsonMetadataGetter::getJsonLogFiles()
|
||||
std::vector<String> JsonMetadataGetter::getJsonLogFiles() const
|
||||
{
|
||||
std::vector<String> keys;
|
||||
|
||||
const auto & client = base_configuration.client;
|
||||
const auto table_path = base_configuration.url.key;
|
||||
const auto bucket = base_configuration.url.bucket;
|
||||
|
||||
std::vector<String> keys;
|
||||
S3::ListObjectsV2Request request;
|
||||
Aws::S3::Model::ListObjectsV2Outcome outcome;
|
||||
|
||||
bool is_finished{false};
|
||||
const auto bucket{base_configuration.uri.bucket};
|
||||
|
||||
request.SetBucket(bucket);
|
||||
|
||||
@ -154,9 +154,9 @@ std::shared_ptr<ReadBuffer> JsonMetadataGetter::createS3ReadBuffer(const String
|
||||
request_settings.max_single_read_retries = 10;
|
||||
return std::make_shared<ReadBufferFromS3>(
|
||||
base_configuration.client,
|
||||
base_configuration.uri.bucket,
|
||||
base_configuration.url.bucket,
|
||||
key,
|
||||
base_configuration.uri.version_id,
|
||||
base_configuration.url.version_id,
|
||||
request_settings,
|
||||
context->getReadSettings());
|
||||
}
|
||||
@ -182,11 +182,6 @@ void JsonMetadataGetter::handleJSON(const JSON & json)
|
||||
namespace
|
||||
{
|
||||
|
||||
StorageS3::S3Configuration getBaseConfiguration(const StorageS3Configuration & configuration)
|
||||
{
|
||||
return {configuration.url, configuration.auth_settings, configuration.request_settings, configuration.headers};
|
||||
}
|
||||
|
||||
// DeltaLake stores data in parts in different files
|
||||
// keys is vector of parts with latest version
|
||||
// generateQueryFromKeys constructs query from parts filenames for
|
||||
@ -198,35 +193,25 @@ String generateQueryFromKeys(const std::vector<String> & keys)
|
||||
}
|
||||
|
||||
|
||||
StorageS3Configuration getAdjustedS3Configuration(
|
||||
const ContextPtr & context,
|
||||
StorageS3::S3Configuration & base_configuration,
|
||||
const StorageS3Configuration & configuration,
|
||||
const std::string & table_path,
|
||||
Poco::Logger * log)
|
||||
StorageS3::Configuration getAdjustedS3Configuration(
|
||||
const ContextPtr & context, const StorageS3::Configuration & configuration, Poco::Logger * log)
|
||||
{
|
||||
JsonMetadataGetter getter{base_configuration, table_path, context};
|
||||
|
||||
auto keys = getter.getFiles();
|
||||
auto new_uri = base_configuration.uri.uri.toString() + generateQueryFromKeys(keys);
|
||||
|
||||
LOG_DEBUG(log, "New uri: {}", new_uri);
|
||||
LOG_DEBUG(log, "Table path: {}", table_path);
|
||||
JsonMetadataGetter getter{configuration, context};
|
||||
const auto keys = getter.getFiles();
|
||||
const auto new_uri = configuration.url.uri.toString() + generateQueryFromKeys(keys);
|
||||
|
||||
// set new url in configuration
|
||||
StorageS3Configuration new_configuration;
|
||||
new_configuration.url = new_uri;
|
||||
new_configuration.auth_settings.access_key_id = configuration.auth_settings.access_key_id;
|
||||
new_configuration.auth_settings.secret_access_key = configuration.auth_settings.secret_access_key;
|
||||
new_configuration.format = configuration.format;
|
||||
StorageS3::Configuration new_configuration(configuration);
|
||||
new_configuration.url = S3::URI(new_uri);
|
||||
|
||||
LOG_DEBUG(log, "Table path: {}, new uri: {}", configuration.url.key, new_uri);
|
||||
return new_configuration;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
StorageDeltaLake::StorageDeltaLake(
|
||||
const StorageS3Configuration & configuration_,
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
ColumnsDescription columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -234,14 +219,13 @@ StorageDeltaLake::StorageDeltaLake(
|
||||
ContextPtr context_,
|
||||
std::optional<FormatSettings> format_settings_)
|
||||
: IStorage(table_id_)
|
||||
, base_configuration{getBaseConfiguration(configuration_)}
|
||||
, base_configuration{configuration_}
|
||||
, log(&Poco::Logger::get("StorageDeltaLake (" + table_id_.table_name + ")"))
|
||||
, table_path(base_configuration.uri.key)
|
||||
, table_path(base_configuration.url.key)
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
StorageS3::updateS3Configuration(context_, base_configuration);
|
||||
|
||||
auto new_configuration = getAdjustedS3Configuration(context_, base_configuration, configuration_, table_path, log);
|
||||
auto new_configuration = getAdjustedS3Configuration(context_, base_configuration, log);
|
||||
|
||||
if (columns_.empty())
|
||||
{
|
||||
@ -284,14 +268,11 @@ Pipe StorageDeltaLake::read(
|
||||
}
|
||||
|
||||
ColumnsDescription StorageDeltaLake::getTableStructureFromData(
|
||||
const StorageS3Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
|
||||
StorageS3::Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
|
||||
{
|
||||
auto base_configuration = getBaseConfiguration(configuration);
|
||||
StorageS3::updateS3Configuration(ctx, base_configuration);
|
||||
auto new_configuration = getAdjustedS3Configuration(
|
||||
ctx, base_configuration, configuration, base_configuration.uri.key, &Poco::Logger::get("StorageDeltaLake"));
|
||||
return StorageS3::getTableStructureFromData(
|
||||
new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
|
||||
StorageS3::updateS3Configuration(ctx, configuration);
|
||||
auto new_configuration = getAdjustedS3Configuration(ctx, configuration, &Poco::Logger::get("StorageDeltaLake"));
|
||||
return StorageS3::getTableStructureFromData(new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
|
||||
}
|
||||
|
||||
void registerStorageDeltaLake(StorageFactory & factory)
|
||||
@ -306,9 +287,9 @@ void registerStorageDeltaLake(StorageFactory & factory)
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Storage DeltaLake requires 3 to 4 arguments: table_url, access_key, secret_access_key, [format]");
|
||||
|
||||
StorageS3Configuration configuration;
|
||||
StorageS3::Configuration configuration;
|
||||
|
||||
configuration.url = checkAndGetLiteralArgument<String>(engine_args[0], "url");
|
||||
configuration.url = S3::URI(checkAndGetLiteralArgument<String>(engine_args[0], "url"));
|
||||
configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[1], "access_key_id");
|
||||
configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[2], "secret_access_key");
|
||||
|
||||
|
@ -37,21 +37,20 @@ private:
|
||||
class JsonMetadataGetter
|
||||
{
|
||||
public:
|
||||
JsonMetadataGetter(StorageS3::S3Configuration & configuration_, const String & table_path_, ContextPtr context);
|
||||
JsonMetadataGetter(const StorageS3::Configuration & configuration_, ContextPtr context);
|
||||
|
||||
std::vector<String> getFiles() { return std::move(metadata).listCurrentFiles(); }
|
||||
|
||||
private:
|
||||
void init(ContextPtr context);
|
||||
|
||||
std::vector<String> getJsonLogFiles();
|
||||
std::vector<String> getJsonLogFiles() const;
|
||||
|
||||
std::shared_ptr<ReadBuffer> createS3ReadBuffer(const String & key, ContextPtr context);
|
||||
|
||||
void handleJSON(const JSON & json);
|
||||
|
||||
StorageS3::S3Configuration base_configuration;
|
||||
String table_path;
|
||||
StorageS3::Configuration base_configuration;
|
||||
DeltaLakeMetadata metadata;
|
||||
};
|
||||
|
||||
@ -62,7 +61,7 @@ public:
|
||||
// 2. Finds out parts with latest version
|
||||
// 3. Creates url for underlying StorageS3 enigne to handle reads
|
||||
StorageDeltaLake(
|
||||
const StorageS3Configuration & configuration_,
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
ColumnsDescription columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -83,11 +82,12 @@ public:
|
||||
size_t num_streams) override;
|
||||
|
||||
static ColumnsDescription getTableStructureFromData(
|
||||
const StorageS3Configuration & configuration,
|
||||
StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx);
|
||||
|
||||
private:
|
||||
StorageS3::S3Configuration base_configuration;
|
||||
StorageS3::Configuration base_configuration;
|
||||
std::shared_ptr<StorageS3> s3engine;
|
||||
Poco::Logger * log;
|
||||
String table_path;
|
||||
|
@ -30,11 +30,6 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
StorageS3::S3Configuration getBaseConfiguration(const StorageS3Configuration & configuration)
|
||||
{
|
||||
return {configuration.url, configuration.auth_settings, configuration.request_settings, configuration.headers};
|
||||
}
|
||||
|
||||
/// Apache Hudi store parts of data in different files.
|
||||
/// Every part file has timestamp in it.
|
||||
/// Every partition(directory) in Apache Hudi has different versions of part.
|
||||
@ -91,17 +86,17 @@ String generateQueryFromKeys(const std::vector<std::string> & keys, const String
|
||||
return "{" + list_of_keys + "}";
|
||||
}
|
||||
|
||||
std::vector<std::string> getKeysFromS3(const StorageS3::S3Configuration & base_configuration, const std::string & table_path, Poco::Logger * log)
|
||||
std::vector<std::string> getKeysFromS3(const StorageS3::Configuration & configuration, Poco::Logger * log)
|
||||
{
|
||||
const auto & client = configuration.client;
|
||||
const auto & table_path = configuration.url.key;
|
||||
const auto & bucket = configuration.url.bucket;
|
||||
|
||||
std::vector<std::string> keys;
|
||||
|
||||
const auto & client = base_configuration.client;
|
||||
|
||||
S3::ListObjectsV2Request request;
|
||||
Aws::S3::Model::ListObjectsV2Outcome outcome;
|
||||
|
||||
bool is_finished{false};
|
||||
const auto bucket{base_configuration.uri.bucket};
|
||||
|
||||
request.SetBucket(bucket);
|
||||
request.SetPrefix(table_path);
|
||||
@ -134,31 +129,22 @@ std::vector<std::string> getKeysFromS3(const StorageS3::S3Configuration & base_c
|
||||
}
|
||||
|
||||
|
||||
StorageS3Configuration getAdjustedS3Configuration(
|
||||
StorageS3::S3Configuration & base_configuration,
|
||||
const StorageS3Configuration & configuration,
|
||||
const std::string & table_path,
|
||||
Poco::Logger * log)
|
||||
StorageS3::Configuration getAdjustedS3Configuration(const StorageS3::Configuration & configuration, Poco::Logger * log)
|
||||
{
|
||||
auto keys = getKeysFromS3(base_configuration, table_path, log);
|
||||
auto new_uri = base_configuration.uri.uri.toString() + generateQueryFromKeys(keys, configuration.format);
|
||||
const auto keys = getKeysFromS3(configuration, log);
|
||||
const auto new_uri = configuration.url.uri.toString() + generateQueryFromKeys(keys, configuration.format);
|
||||
|
||||
LOG_DEBUG(log, "New uri: {}", new_uri);
|
||||
LOG_DEBUG(log, "Table path: {}", table_path);
|
||||
|
||||
StorageS3Configuration new_configuration;
|
||||
new_configuration.url = new_uri;
|
||||
new_configuration.auth_settings.access_key_id = configuration.auth_settings.access_key_id;
|
||||
new_configuration.auth_settings.secret_access_key = configuration.auth_settings.secret_access_key;
|
||||
new_configuration.format = configuration.format;
|
||||
StorageS3::Configuration new_configuration(configuration);
|
||||
new_configuration.url = S3::URI(new_uri);
|
||||
|
||||
LOG_DEBUG(log, "Table path: {}, new uri: {}", configuration.url.key, new_uri);
|
||||
return new_configuration;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
StorageHudi::StorageHudi(
|
||||
const StorageS3Configuration & configuration_,
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
ColumnsDescription columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -166,14 +152,13 @@ StorageHudi::StorageHudi(
|
||||
ContextPtr context_,
|
||||
std::optional<FormatSettings> format_settings_)
|
||||
: IStorage(table_id_)
|
||||
, base_configuration{getBaseConfiguration(configuration_)}
|
||||
, base_configuration{configuration_}
|
||||
, log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")"))
|
||||
, table_path(base_configuration.uri.key)
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
StorageS3::updateS3Configuration(context_, base_configuration);
|
||||
|
||||
auto new_configuration = getAdjustedS3Configuration(base_configuration, configuration_, table_path, log);
|
||||
auto new_configuration = getAdjustedS3Configuration(base_configuration, log);
|
||||
|
||||
if (columns_.empty())
|
||||
{
|
||||
@ -214,14 +199,11 @@ Pipe StorageHudi::read(
|
||||
}
|
||||
|
||||
ColumnsDescription StorageHudi::getTableStructureFromData(
|
||||
const StorageS3Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
|
||||
StorageS3::Configuration & configuration, const std::optional<FormatSettings> & format_settings, ContextPtr ctx)
|
||||
{
|
||||
auto base_configuration = getBaseConfiguration(configuration);
|
||||
StorageS3::updateS3Configuration(ctx, base_configuration);
|
||||
auto new_configuration = getAdjustedS3Configuration(
|
||||
base_configuration, configuration, base_configuration.uri.key, &Poco::Logger::get("StorageDeltaLake"));
|
||||
return StorageS3::getTableStructureFromData(
|
||||
new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
|
||||
StorageS3::updateS3Configuration(ctx, configuration);
|
||||
auto new_configuration = getAdjustedS3Configuration(configuration, &Poco::Logger::get("StorageDeltaLake"));
|
||||
return StorageS3::getTableStructureFromData(new_configuration, /*distributed processing*/ false, format_settings, ctx, /*object_infos*/ nullptr);
|
||||
}
|
||||
|
||||
void registerStorageHudi(StorageFactory & factory)
|
||||
@ -236,9 +218,9 @@ void registerStorageHudi(StorageFactory & factory)
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Storage Hudi requires 3 to 4 arguments: table_url, access_key, secret_access_key, [format]");
|
||||
|
||||
StorageS3Configuration configuration;
|
||||
StorageS3::Configuration configuration;
|
||||
|
||||
configuration.url = checkAndGetLiteralArgument<String>(engine_args[0], "url");
|
||||
configuration.url = S3::URI(checkAndGetLiteralArgument<String>(engine_args[0], "url"));
|
||||
configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[1], "access_key_id");
|
||||
configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[2], "secret_access_key");
|
||||
|
||||
|
@ -22,7 +22,7 @@ public:
|
||||
/// 2. Finds out parts with latest version.
|
||||
/// 3. Creates url for underlying StorageS3 enigne to handle reads.
|
||||
StorageHudi(
|
||||
const StorageS3Configuration & configuration_,
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
ColumnsDescription columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -44,14 +44,14 @@ public:
|
||||
size_t num_streams) override;
|
||||
|
||||
static ColumnsDescription getTableStructureFromData(
|
||||
const StorageS3Configuration & configuration,
|
||||
StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx);
|
||||
|
||||
private:
|
||||
StorageS3::S3Configuration base_configuration;
|
||||
StorageS3::Configuration base_configuration;
|
||||
std::shared_ptr<StorageS3> s3engine;
|
||||
Poco::Logger * log;
|
||||
String table_path;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -764,7 +764,7 @@ public:
|
||||
ContextPtr context,
|
||||
std::optional<FormatSettings> format_settings_,
|
||||
const CompressionMethod compression_method,
|
||||
const StorageS3::S3Configuration & s3_configuration_,
|
||||
const StorageS3::Configuration & s3_configuration_,
|
||||
const String & bucket,
|
||||
const String & key)
|
||||
: SinkToStorage(sample_block_)
|
||||
@ -855,7 +855,7 @@ public:
|
||||
ContextPtr context_,
|
||||
std::optional<FormatSettings> format_settings_,
|
||||
const CompressionMethod compression_method_,
|
||||
const StorageS3::S3Configuration & s3_configuration_,
|
||||
const StorageS3::Configuration & s3_configuration_,
|
||||
const String & bucket_,
|
||||
const String & key_)
|
||||
: PartitionedSink(partition_by, context_, sample_block_)
|
||||
@ -895,7 +895,7 @@ private:
|
||||
const Block sample_block;
|
||||
ContextPtr context;
|
||||
const CompressionMethod compression_method;
|
||||
const StorageS3::S3Configuration & s3_configuration;
|
||||
const StorageS3::Configuration & s3_configuration;
|
||||
const String bucket;
|
||||
const String key;
|
||||
std::optional<FormatSettings> format_settings;
|
||||
@ -930,7 +930,7 @@ private:
|
||||
|
||||
|
||||
StorageS3::StorageS3(
|
||||
const StorageS3Configuration & configuration_,
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -940,18 +940,18 @@ StorageS3::StorageS3(
|
||||
bool distributed_processing_,
|
||||
ASTPtr partition_by_)
|
||||
: IStorage(table_id_)
|
||||
, s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers}
|
||||
, keys({s3_configuration.uri.key})
|
||||
, s3_configuration{configuration_}
|
||||
, keys({s3_configuration.url.key})
|
||||
, format_name(configuration_.format)
|
||||
, compression_method(configuration_.compression_method)
|
||||
, name(s3_configuration.uri.storage_name)
|
||||
, name(s3_configuration.url.storage_name)
|
||||
, distributed_processing(distributed_processing_)
|
||||
, format_settings(format_settings_)
|
||||
, partition_by(partition_by_)
|
||||
, is_key_with_globs(s3_configuration.uri.key.find_first_of("*?{") != std::string::npos)
|
||||
, is_key_with_globs(s3_configuration.url.key.find_first_of("*?{") != std::string::npos)
|
||||
{
|
||||
FormatFactory::instance().checkFormatName(format_name);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(s3_configuration.uri.uri);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(s3_configuration.url.uri);
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
|
||||
updateS3Configuration(context_, s3_configuration);
|
||||
@ -987,7 +987,7 @@ StorageS3::StorageS3(
|
||||
}
|
||||
|
||||
std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
const std::vector<String> & keys,
|
||||
bool is_key_with_globs,
|
||||
bool distributed_processing,
|
||||
@ -1005,14 +1005,14 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
|
||||
{
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
|
||||
*s3_configuration.client, s3_configuration.uri, query, virtual_block,
|
||||
*s3_configuration.client, s3_configuration.url, query, virtual_block,
|
||||
local_context, object_infos, read_keys, s3_configuration.request_settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::make_shared<StorageS3Source::KeysIterator>(
|
||||
*s3_configuration.client, s3_configuration.uri.version_id, keys,
|
||||
s3_configuration.uri.bucket, s3_configuration.request_settings, query, virtual_block, local_context,
|
||||
*s3_configuration.client, s3_configuration.url.version_id, keys,
|
||||
s3_configuration.url.bucket, s3_configuration.request_settings, query, virtual_block, local_context,
|
||||
object_infos, read_keys);
|
||||
}
|
||||
}
|
||||
@ -1036,7 +1036,7 @@ Pipe StorageS3::read(
|
||||
size_t max_block_size,
|
||||
size_t num_streams)
|
||||
{
|
||||
bool has_wildcards = s3_configuration.uri.bucket.find(PARTITION_ID_WILDCARD) != String::npos
|
||||
bool has_wildcards = s3_configuration.url.bucket.find(PARTITION_ID_WILDCARD) != String::npos
|
||||
|| keys.back().find(PARTITION_ID_WILDCARD) != String::npos;
|
||||
if (partition_by && has_wildcards)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
|
||||
@ -1102,8 +1102,8 @@ Pipe StorageS3::read(
|
||||
s3_configuration.request_settings,
|
||||
compression_method,
|
||||
s3_configuration.client,
|
||||
s3_configuration.uri.bucket,
|
||||
s3_configuration.uri.version_id,
|
||||
s3_configuration.url.bucket,
|
||||
s3_configuration.url.version_id,
|
||||
iterator_wrapper,
|
||||
max_download_threads));
|
||||
}
|
||||
@ -1120,7 +1120,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
|
||||
|
||||
auto sample_block = metadata_snapshot->getSampleBlock();
|
||||
auto chosen_compression_method = chooseCompressionMethod(keys.back(), compression_method);
|
||||
bool has_wildcards = s3_configuration.uri.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos;
|
||||
bool has_wildcards = s3_configuration.url.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos;
|
||||
auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query);
|
||||
|
||||
auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr;
|
||||
@ -1136,18 +1136,18 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
|
||||
format_settings,
|
||||
chosen_compression_method,
|
||||
s3_configuration,
|
||||
s3_configuration.uri.bucket,
|
||||
s3_configuration.url.bucket,
|
||||
keys.back());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_key_with_globs)
|
||||
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
|
||||
"S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key);
|
||||
"S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.url.key);
|
||||
|
||||
bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert;
|
||||
|
||||
if (!truncate_in_insert && S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, keys.back(), s3_configuration.uri.version_id, s3_configuration.request_settings))
|
||||
if (!truncate_in_insert && S3::objectExists(*s3_configuration.client, s3_configuration.url.bucket, keys.back(), s3_configuration.url.version_id, s3_configuration.request_settings))
|
||||
{
|
||||
if (local_context->getSettingsRef().s3_create_new_file_on_insert)
|
||||
{
|
||||
@ -1159,7 +1159,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
|
||||
new_key = keys[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : keys[0].substr(pos));
|
||||
++index;
|
||||
}
|
||||
while (S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, new_key, s3_configuration.uri.version_id, s3_configuration.request_settings));
|
||||
while (S3::objectExists(*s3_configuration.client, s3_configuration.url.bucket, new_key, s3_configuration.url.version_id, s3_configuration.request_settings));
|
||||
keys.push_back(new_key);
|
||||
}
|
||||
else
|
||||
@ -1168,7 +1168,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
|
||||
"Object in bucket {} with key {} already exists. "
|
||||
"If you want to overwrite it, enable setting s3_truncate_on_insert, if you "
|
||||
"want to create a new file on each insert, enable setting s3_create_new_file_on_insert",
|
||||
s3_configuration.uri.bucket,
|
||||
s3_configuration.url.bucket,
|
||||
keys.back());
|
||||
}
|
||||
|
||||
@ -1179,7 +1179,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
|
||||
format_settings,
|
||||
chosen_compression_method,
|
||||
s3_configuration,
|
||||
s3_configuration.uri.bucket,
|
||||
s3_configuration.url.bucket,
|
||||
keys.back());
|
||||
}
|
||||
}
|
||||
@ -1190,7 +1190,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
|
||||
|
||||
if (is_key_with_globs)
|
||||
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
|
||||
"S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key);
|
||||
"S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.url.key);
|
||||
|
||||
Aws::S3::Model::Delete delkeys;
|
||||
|
||||
@ -1203,7 +1203,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
|
||||
S3::DeleteObjectsRequest request;
|
||||
request.SetBucket(s3_configuration.uri.bucket);
|
||||
request.SetBucket(s3_configuration.url.bucket);
|
||||
request.SetDelete(delkeys);
|
||||
|
||||
auto response = s3_configuration.client->DeleteObjects(request);
|
||||
@ -1218,9 +1218,9 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
|
||||
}
|
||||
|
||||
|
||||
void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd)
|
||||
void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::Configuration & upd)
|
||||
{
|
||||
auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString());
|
||||
auto settings = ctx->getStorageS3Settings().getSettings(upd.url.uri.toString());
|
||||
upd.request_settings = settings.request_settings;
|
||||
upd.request_settings.updateFromSettings(ctx->getSettings());
|
||||
|
||||
@ -1244,7 +1244,7 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration
|
||||
upd.request_settings.get_request_throttler,
|
||||
upd.request_settings.put_request_throttler);
|
||||
|
||||
client_configuration.endpointOverride = upd.uri.endpoint;
|
||||
client_configuration.endpointOverride = upd.url.endpoint;
|
||||
client_configuration.maxConnections = static_cast<unsigned>(upd.request_settings.max_connections);
|
||||
|
||||
auto credentials = Aws::Auth::AWSCredentials(upd.auth_settings.access_key_id, upd.auth_settings.secret_access_key);
|
||||
@ -1254,7 +1254,7 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration
|
||||
|
||||
upd.client = S3::ClientFactory::instance().create(
|
||||
client_configuration,
|
||||
upd.uri.is_virtual_hosted_style,
|
||||
upd.url.is_virtual_hosted_style,
|
||||
credentials.GetAWSAccessKeyId(),
|
||||
credentials.GetAWSSecretKey(),
|
||||
upd.auth_settings.server_side_encryption_customer_key_base64,
|
||||
@ -1263,15 +1263,15 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration
|
||||
upd.auth_settings.use_insecure_imds_request.value_or(ctx->getConfigRef().getBool("s3.use_insecure_imds_request", false)));
|
||||
}
|
||||
|
||||
void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection)
|
||||
void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection)
|
||||
{
|
||||
validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys);
|
||||
|
||||
configuration.url = collection.get<String>("url");
|
||||
|
||||
auto filename = collection.getOrDefault<String>("filename", "");
|
||||
if (!filename.empty())
|
||||
configuration.url = std::filesystem::path(configuration.url) / filename;
|
||||
configuration.url = S3::URI(std::filesystem::path(collection.get<String>("url")) / filename);
|
||||
else
|
||||
configuration.url = S3::URI(collection.get<String>("url"));
|
||||
|
||||
configuration.auth_settings.access_key_id = collection.getOrDefault<String>("access_key_id", "");
|
||||
configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
|
||||
@ -1284,9 +1284,9 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat
|
||||
configuration.request_settings = S3Settings::RequestSettings(collection);
|
||||
}
|
||||
|
||||
StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context)
|
||||
StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context)
|
||||
{
|
||||
StorageS3Configuration configuration;
|
||||
StorageS3::Configuration configuration;
|
||||
|
||||
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args))
|
||||
{
|
||||
@ -1307,14 +1307,14 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
|
||||
"Storage S3 requires 1 to 5 arguments: "
|
||||
"url, [access_key_id, secret_access_key], name of used format and [compression_method].");
|
||||
|
||||
auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers, local_context);
|
||||
auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers_from_ast, local_context);
|
||||
if (header_it != engine_args.end())
|
||||
engine_args.erase(header_it);
|
||||
|
||||
for (auto & engine_arg : engine_args)
|
||||
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
|
||||
|
||||
configuration.url = checkAndGetLiteralArgument<String>(engine_args[0], "url");
|
||||
configuration.url = S3::URI(checkAndGetLiteralArgument<String>(engine_args[0], "url"));
|
||||
if (engine_args.size() >= 4)
|
||||
{
|
||||
configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[1], "access_key_id");
|
||||
@ -1332,36 +1332,30 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
|
||||
configuration.format = checkAndGetLiteralArgument<String>(engine_args.back(), "format");
|
||||
}
|
||||
}
|
||||
configuration.static_configuration = !configuration.auth_settings.access_key_id.empty();
|
||||
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.key, true);
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
ColumnsDescription StorageS3::getTableStructureFromData(
|
||||
const StorageS3Configuration & configuration,
|
||||
StorageS3::Configuration & configuration,
|
||||
bool distributed_processing,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx,
|
||||
ObjectInfos * object_infos)
|
||||
{
|
||||
S3Configuration s3_configuration{
|
||||
configuration.url,
|
||||
configuration.auth_settings,
|
||||
S3Settings::RequestSettings(ctx->getSettingsRef()),
|
||||
configuration.headers};
|
||||
|
||||
updateS3Configuration(ctx, s3_configuration);
|
||||
|
||||
updateS3Configuration(ctx, configuration);
|
||||
return getTableStructureFromDataImpl(
|
||||
configuration.format, s3_configuration, configuration.compression_method, distributed_processing,
|
||||
s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos);
|
||||
configuration.format, configuration, configuration.compression_method, distributed_processing,
|
||||
configuration.url.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos);
|
||||
}
|
||||
|
||||
ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
const String & format,
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
const String & compression_method,
|
||||
bool distributed_processing,
|
||||
bool is_key_with_globs,
|
||||
@ -1373,7 +1367,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
|
||||
auto file_iterator = createFileIterator(
|
||||
s3_configuration,
|
||||
{s3_configuration.uri.key},
|
||||
{s3_configuration.url.key},
|
||||
is_key_with_globs,
|
||||
distributed_processing,
|
||||
ctx, nullptr,
|
||||
@ -1415,7 +1409,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
int zstd_window_log_max = static_cast<int>(ctx->getSettingsRef().zstd_window_log_max);
|
||||
return wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ReadBufferFromS3>(
|
||||
s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.request_settings, ctx->getReadSettings()),
|
||||
s3_configuration.client, s3_configuration.url.bucket, key, s3_configuration.url.version_id, s3_configuration.request_settings, ctx->getReadSettings()),
|
||||
chooseCompressionMethod(key, compression_method),
|
||||
zstd_window_log_max);
|
||||
};
|
||||
@ -1525,7 +1519,7 @@ SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx)
|
||||
std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
|
||||
const Strings::const_iterator & begin,
|
||||
const Strings::const_iterator & end,
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
ObjectInfos * object_infos,
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
@ -1534,7 +1528,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
|
||||
auto & schema_cache = getSchemaCache(ctx);
|
||||
for (auto it = begin; it < end; ++it)
|
||||
{
|
||||
String path = fs::path(s3_configuration.uri.bucket) / *it;
|
||||
String path = fs::path(s3_configuration.url.bucket) / *it;
|
||||
auto get_last_mod_time = [&]() -> std::optional<time_t>
|
||||
{
|
||||
S3::ObjectInfo info;
|
||||
@ -1547,7 +1541,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
|
||||
/// Note that in case of exception in getObjectInfo returned info will be empty,
|
||||
/// but schema cache will handle this case and won't return columns from cache
|
||||
/// because we can't say that it's valid without last modification time.
|
||||
info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, s3_configuration.request_settings,
|
||||
info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.url.bucket, *it, s3_configuration.url.version_id, s3_configuration.request_settings,
|
||||
{}, {}, /* throw_on_error= */ false);
|
||||
if (object_infos)
|
||||
(*object_infos)[path] = info;
|
||||
@ -1559,7 +1553,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
String source = fs::path(s3_configuration.uri.uri.getHost() + std::to_string(s3_configuration.uri.uri.getPort())) / path;
|
||||
String source = fs::path(s3_configuration.url.uri.getHost() + std::to_string(s3_configuration.url.uri.getPort())) / path;
|
||||
auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, ctx);
|
||||
auto columns = schema_cache.tryGet(cache_key, get_last_mod_time);
|
||||
if (columns)
|
||||
@ -1571,13 +1565,13 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
|
||||
|
||||
void StorageS3::addColumnsToCache(
|
||||
const Strings & keys,
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
const ColumnsDescription & columns,
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
const ContextPtr & ctx)
|
||||
{
|
||||
auto host_and_bucket = fs::path(s3_configuration.uri.uri.getHost() + std::to_string(s3_configuration.uri.uri.getPort())) / s3_configuration.uri.bucket;
|
||||
auto host_and_bucket = fs::path(s3_configuration.url.uri.getHost() + std::to_string(s3_configuration.url.uri.getPort())) / s3_configuration.url.bucket;
|
||||
Strings sources;
|
||||
sources.reserve(keys.size());
|
||||
std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const String & key){ return host_and_bucket / key; });
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <Storages/ExternalDataSourceConfiguration.h>
|
||||
#include <Storages/Cache/SchemaCache.h>
|
||||
#include <Storages/StorageConfiguration.h>
|
||||
|
||||
namespace Aws::S3
|
||||
{
|
||||
@ -238,8 +239,21 @@ private:
|
||||
class StorageS3 : public IStorage, WithContext
|
||||
{
|
||||
public:
|
||||
struct Configuration : public StatelessTableEngineConfiguration
|
||||
{
|
||||
S3::URI url;
|
||||
std::shared_ptr<const S3::Client> client;
|
||||
S3::AuthSettings auth_settings;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
/// If s3 configuration was passed from ast, then it is static.
|
||||
/// If from config - it can be changed with config reload.
|
||||
bool static_configuration = true;
|
||||
/// Headers from ast is a part of static configuration.
|
||||
HTTPHeaderEntries headers_from_ast;
|
||||
};
|
||||
|
||||
StorageS3(
|
||||
const StorageS3Configuration & configuration_,
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -271,45 +285,18 @@ public:
|
||||
|
||||
bool supportsPartitionBy() const override;
|
||||
|
||||
static StorageS3Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
|
||||
static StorageS3::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
|
||||
|
||||
using ObjectInfos = StorageS3Source::ObjectInfos;
|
||||
|
||||
static ColumnsDescription getTableStructureFromData(
|
||||
const StorageS3Configuration & configuration,
|
||||
StorageS3::Configuration & configuration,
|
||||
bool distributed_processing,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx,
|
||||
ObjectInfos * object_infos = nullptr);
|
||||
|
||||
static void processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection);
|
||||
|
||||
struct S3Configuration
|
||||
{
|
||||
const S3::URI uri;
|
||||
std::shared_ptr<const S3::Client> client;
|
||||
|
||||
S3::AuthSettings auth_settings;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
|
||||
/// If s3 configuration was passed from ast, then it is static.
|
||||
/// If from config - it can be changed with config reload.
|
||||
bool static_configuration = true;
|
||||
|
||||
/// Headers from ast is a part of static configuration.
|
||||
HTTPHeaderEntries headers_from_ast;
|
||||
|
||||
S3Configuration(
|
||||
const String & url_,
|
||||
const S3::AuthSettings & auth_settings_,
|
||||
const S3Settings::RequestSettings & request_settings_,
|
||||
const HTTPHeaderEntries & headers_from_ast_)
|
||||
: uri(S3::URI(url_))
|
||||
, auth_settings(auth_settings_)
|
||||
, request_settings(request_settings_)
|
||||
, static_configuration(!auth_settings_.access_key_id.empty())
|
||||
, headers_from_ast(headers_from_ast_) {}
|
||||
};
|
||||
static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection);
|
||||
|
||||
static SchemaCache & getSchemaCache(const ContextPtr & ctx);
|
||||
|
||||
@ -319,7 +306,7 @@ private:
|
||||
friend class StorageHudi;
|
||||
friend class StorageDeltaLake;
|
||||
|
||||
S3Configuration s3_configuration;
|
||||
Configuration s3_configuration;
|
||||
std::vector<String> keys;
|
||||
NamesAndTypesList virtual_columns;
|
||||
Block virtual_block;
|
||||
@ -334,10 +321,10 @@ private:
|
||||
|
||||
ObjectInfos object_infos;
|
||||
|
||||
static void updateS3Configuration(ContextPtr, S3Configuration &);
|
||||
static void updateS3Configuration(ContextPtr, Configuration &);
|
||||
|
||||
static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
const std::vector<String> & keys,
|
||||
bool is_key_with_globs,
|
||||
bool distributed_processing,
|
||||
@ -349,7 +336,7 @@ private:
|
||||
|
||||
static ColumnsDescription getTableStructureFromDataImpl(
|
||||
const String & format,
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
const String & compression_method,
|
||||
bool distributed_processing,
|
||||
bool is_key_with_globs,
|
||||
@ -364,7 +351,7 @@ private:
|
||||
static std::optional<ColumnsDescription> tryGetColumnsFromCache(
|
||||
const Strings::const_iterator & begin,
|
||||
const Strings::const_iterator & end,
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
ObjectInfos * object_infos,
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
@ -372,7 +359,7 @@ private:
|
||||
|
||||
static void addColumnsToCache(
|
||||
const Strings & keys,
|
||||
const S3Configuration & s3_configuration,
|
||||
const Configuration & s3_configuration,
|
||||
const ColumnsDescription & columns,
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
|
@ -41,26 +41,26 @@ namespace DB
|
||||
{
|
||||
|
||||
StorageS3Cluster::StorageS3Cluster(
|
||||
const StorageS3ClusterConfiguration & configuration_,
|
||||
const Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
ContextPtr context_,
|
||||
bool structure_argument_was_provided_)
|
||||
: IStorageCluster(table_id_)
|
||||
, s3_configuration{configuration_.url, configuration_.auth_settings, configuration_.request_settings, configuration_.headers}
|
||||
, filename(configuration_.url)
|
||||
, s3_configuration{configuration_}
|
||||
, cluster_name(configuration_.cluster_name)
|
||||
, format_name(configuration_.format)
|
||||
, compression_method(configuration_.compression_method)
|
||||
, structure_argument_was_provided(structure_argument_was_provided_)
|
||||
{
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI{filename});
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri);
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
StorageS3::updateS3Configuration(context_, s3_configuration);
|
||||
|
||||
if (columns_.empty())
|
||||
{
|
||||
const auto & filename = configuration_.url.uri.getPath();
|
||||
const bool is_key_with_globs = filename.find_first_of("*?{") != std::string::npos;
|
||||
|
||||
/// `distributed_processing` is set to false, because this code is executed on the initiator, so there is no callback set
|
||||
@ -173,7 +173,7 @@ ClusterPtr StorageS3Cluster::getCluster(ContextPtr context) const
|
||||
RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, ContextPtr context) const
|
||||
{
|
||||
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
|
||||
*s3_configuration.client, s3_configuration.uri, query, virtual_block, context);
|
||||
*s3_configuration.client, s3_configuration.url, query, virtual_block, context);
|
||||
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String { return iterator->next().key; });
|
||||
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
|
||||
}
|
||||
|
@ -21,8 +21,13 @@ class Context;
|
||||
class StorageS3Cluster : public IStorageCluster
|
||||
{
|
||||
public:
|
||||
struct Configuration : public StorageS3::Configuration
|
||||
{
|
||||
std::string cluster_name;
|
||||
};
|
||||
|
||||
StorageS3Cluster(
|
||||
const StorageS3ClusterConfiguration & configuration_,
|
||||
const Configuration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
@ -43,8 +48,7 @@ public:
|
||||
ClusterPtr getCluster(ContextPtr context) const override;
|
||||
|
||||
private:
|
||||
StorageS3::S3Configuration s3_configuration;
|
||||
String filename;
|
||||
StorageS3::Configuration s3_configuration;
|
||||
String cluster_name;
|
||||
String format_name;
|
||||
String compression_method;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Storages/StorageFactory.h>
|
||||
#include <Storages/ExternalDataSourceConfiguration.h>
|
||||
#include <Storages/Cache/SchemaCache.h>
|
||||
#include <Storages/StorageConfiguration.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -181,15 +182,10 @@ public:
|
||||
|
||||
static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args);
|
||||
|
||||
struct Configuration
|
||||
struct Configuration : public StatelessTableEngineConfiguration
|
||||
{
|
||||
std::string url;
|
||||
std::string http_method;
|
||||
|
||||
std::string format = "auto";
|
||||
std::string compression_method = "auto";
|
||||
std::string structure = "auto";
|
||||
|
||||
HTTPHeaderEntries headers;
|
||||
};
|
||||
|
||||
|
@ -28,12 +28,12 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
void TableFunctionDeltaLake::parseArgumentsImpl(
|
||||
const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & base_configuration)
|
||||
const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & base_configuration)
|
||||
{
|
||||
if (args.empty() || args.size() > 6)
|
||||
throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
auto * header_it = StorageURL::collectHeaders(args, base_configuration.headers, context);
|
||||
auto * header_it = StorageURL::collectHeaders(args, base_configuration.headers_from_ast, context);
|
||||
if (header_it != args.end())
|
||||
args.erase(header_it);
|
||||
|
||||
@ -77,7 +77,7 @@ void TableFunctionDeltaLake::parseArgumentsImpl(
|
||||
}
|
||||
|
||||
/// This argument is always the first
|
||||
base_configuration.url = checkAndGetLiteralArgument<String>(args[0], "url");
|
||||
base_configuration.url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
|
||||
|
||||
if (args_to_idx.contains("format"))
|
||||
base_configuration.format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
|
||||
|
@ -34,9 +34,9 @@ protected:
|
||||
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
|
||||
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & configuration);
|
||||
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & configuration);
|
||||
|
||||
StorageS3Configuration configuration;
|
||||
mutable StorageS3::Configuration configuration;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -28,12 +28,12 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
void TableFunctionHudi::parseArgumentsImpl(
|
||||
const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & base_configuration)
|
||||
const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & base_configuration)
|
||||
{
|
||||
if (args.empty() || args.size() > 6)
|
||||
throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
auto * header_it = StorageURL::collectHeaders(args, base_configuration.headers, context);
|
||||
auto * header_it = StorageURL::collectHeaders(args, base_configuration.headers_from_ast, context);
|
||||
if (header_it != args.end())
|
||||
args.erase(header_it);
|
||||
|
||||
@ -77,7 +77,7 @@ void TableFunctionHudi::parseArgumentsImpl(
|
||||
}
|
||||
|
||||
/// This argument is always the first
|
||||
base_configuration.url = checkAndGetLiteralArgument<String>(args[0], "url");
|
||||
base_configuration.url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
|
||||
|
||||
if (args_to_idx.contains("format"))
|
||||
base_configuration.format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
|
||||
|
@ -34,9 +34,9 @@ protected:
|
||||
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
|
||||
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & configuration);
|
||||
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & configuration);
|
||||
|
||||
StorageS3Configuration configuration;
|
||||
mutable StorageS3::Configuration configuration;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
/// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name
|
||||
void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & s3_configuration)
|
||||
void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & s3_configuration)
|
||||
{
|
||||
if (auto named_collection = tryGetNamedCollectionWithOverrides(args))
|
||||
{
|
||||
@ -40,7 +40,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar
|
||||
if (args.empty() || args.size() > 6)
|
||||
throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
auto * header_it = StorageURL::collectHeaders(args, s3_configuration.headers, context);
|
||||
auto * header_it = StorageURL::collectHeaders(args, s3_configuration.headers_from_ast, context);
|
||||
if (header_it != args.end())
|
||||
args.erase(header_it);
|
||||
|
||||
@ -87,7 +87,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar
|
||||
}
|
||||
|
||||
/// This argument is always the first
|
||||
s3_configuration.url = checkAndGetLiteralArgument<String>(args[0], "url");
|
||||
s3_configuration.url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
|
||||
|
||||
if (args_to_idx.contains("format"))
|
||||
s3_configuration.format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
|
||||
@ -106,7 +106,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar
|
||||
}
|
||||
|
||||
if (s3_configuration.format == "auto")
|
||||
s3_configuration.format = FormatFactory::instance().getFormatFromFileName(s3_configuration.url, true);
|
||||
s3_configuration.format = FormatFactory::instance().getFormatFromFileName(s3_configuration.url.uri.getPath(), true);
|
||||
}
|
||||
|
||||
void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
||||
|
@ -5,7 +5,7 @@
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
#include <Storages/ExternalDataSourceConfiguration.h>
|
||||
#include <Storages/StorageS3.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -51,9 +51,9 @@ protected:
|
||||
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
|
||||
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & configuration);
|
||||
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & configuration);
|
||||
|
||||
StorageS3Configuration configuration;
|
||||
mutable StorageS3::Configuration configuration;
|
||||
ColumnsDescription structure_hint;
|
||||
};
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Storages/StorageS3Cluster.h>
|
||||
#include <Storages/StorageS3.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
|
||||
@ -73,8 +72,8 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
|
||||
clipped_args.reserve(args.size());
|
||||
std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args));
|
||||
|
||||
/// StorageS3ClusterConfiguration inherints from StorageS3Configuration, so it is safe to upcast it.
|
||||
TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast<StorageS3Configuration & >(configuration));
|
||||
/// StorageS3ClusterConfiguration inherints from StorageS3::Configuration, so it is safe to upcast it.
|
||||
TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast<StorageS3::Configuration & >(configuration));
|
||||
}
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
#include <Storages/ExternalDataSourceConfiguration.h>
|
||||
#include <Storages/StorageS3Cluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -50,7 +50,7 @@ protected:
|
||||
ColumnsDescription getActualTableStructure(ContextPtr) const override;
|
||||
void parseArguments(const ASTPtr &, ContextPtr) override;
|
||||
|
||||
StorageS3ClusterConfiguration configuration;
|
||||
mutable StorageS3Cluster::Configuration configuration;
|
||||
ColumnsDescription structure_hint;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user