This commit is contained in:
kssenii 2024-04-20 17:27:54 +01:00
parent a4daf2b454
commit 399414bb40
37 changed files with 427 additions and 536 deletions

View File

@ -168,6 +168,9 @@
M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \ M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \
M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \ M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \
M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \ M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \
M(StorageObjectStorageThreads, "Number of threads in the remote table engines thread pools.") \
M(StorageObjectStorageThreadsActive, "Number of threads in the remote table engines thread pool running a task.") \
M(StorageObjectStorageThreadsScheduled, "Number of queued or active jobs in remote table engines thread pool.") \
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \ M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \ M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \ M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \

View File

@ -53,6 +53,9 @@
#include <Storages/StorageFile.h> #include <Storages/StorageFile.h>
#include <Storages/StorageURL.h> #include <Storages/StorageURL.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h> #include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/HDFS/Configuration.h>
#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
#include <Storages/MaterializedView/RefreshTask.h> #include <Storages/MaterializedView/RefreshTask.h>
#include <Storages/System/StorageSystemFilesystemCache.h> #include <Storages/System/StorageSystemFilesystemCache.h>
#include <Parsers/ASTSystemQuery.h> #include <Parsers/ASTSystemQuery.h>
@ -489,17 +492,17 @@ BlockIO InterpreterSystemQuery::execute()
StorageFile::getSchemaCache(getContext()).clear(); StorageFile::getSchemaCache(getContext()).clear();
#if USE_AWS_S3 #if USE_AWS_S3
if (caches_to_drop.contains("S3")) if (caches_to_drop.contains("S3"))
StorageS3::getSchemaCache(getContext()).clear(); StorageObjectStorage::getSchemaCache(getContext(), StorageS3Configuration::type_name).clear();
#endif #endif
#if USE_HDFS #if USE_HDFS
if (caches_to_drop.contains("HDFS")) if (caches_to_drop.contains("HDFS"))
StorageHDFS::getSchemaCache(getContext()).clear(); StorageObjectStorage::getSchemaCache(getContext(), StorageHDFSConfiguration::type_name).clear();
#endif #endif
if (caches_to_drop.contains("URL")) if (caches_to_drop.contains("URL"))
StorageURL::getSchemaCache(getContext()).clear(); StorageURL::getSchemaCache(getContext()).clear();
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
if (caches_to_drop.contains("AZURE")) if (caches_to_drop.contains("AZURE"))
StorageAzureBlob::getSchemaCache(getContext()).clear(); StorageObjectStorage::getSchemaCache(getContext(), StorageAzureBlobConfiguration::type_name).clear();
#endif #endif
break; break;
} }

View File

@ -101,6 +101,21 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co
return settings_ptr; return settings_ptr;
} }
StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySettings(const ContextPtr & context) const
{
const auto & settings = context->getSettingsRef();
return StorageObjectStorage::QuerySettings{
.truncate_on_insert = settings.azure_truncate_on_insert,
.create_new_file_on_insert = settings.azure_create_new_file_on_insert,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
.list_object_keys_size = settings.azure_list_object_keys_size,
.throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
.ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist,
};
}
ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
{ {
assertInitialized(); assertInitialized();

View File

@ -18,9 +18,15 @@ class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration
friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory); friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory);
public: public:
static constexpr auto type_name = "azure";
static constexpr auto engine_name = "Azure";
StorageAzureBlobConfiguration() = default; StorageAzureBlobConfiguration() = default;
StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other); StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other);
std::string getTypeName() const override { return type_name; }
std::string getEngineName() const override { return engine_name; }
Path getPath() const override { return blob_path; } Path getPath() const override { return blob_path; }
void setPath(const Path & path) override { blob_path = path; } void setPath(const Path & path) override { blob_path = path; }
@ -30,6 +36,7 @@ public:
String getDataSourceDescription() override { return fs::path(connection_url) / container; } String getDataSourceDescription() override { return fs::path(connection_url) / container; }
String getNamespace() const override { return container; } String getNamespace() const override { return container; }
StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
void check(ContextPtr context) const override; void check(ContextPtr context) const override;
ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
@ -37,8 +44,8 @@ public:
void fromNamedCollection(const NamedCollection & collection) override; void fromNamedCollection(const NamedCollection & collection) override;
void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
static void addStructureAndFormatToArgs( void addStructureAndFormatToArgs(
ASTs & args, const String & structure_, const String & format_, ContextPtr context); ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
protected: protected:
using AzureClient = Azure::Storage::Blobs::BlobContainerClient; using AzureClient = Azure::Storage::Blobs::BlobContainerClient;

View File

@ -184,7 +184,7 @@ struct DeltaLakeMetadata::Impl
* *
* We need to get "version", which is the version of the checkpoint we need to read. * We need to get "version", which is the version of the checkpoint we need to read.
*/ */
size_t readLastCheckpointIfExists() size_t readLastCheckpointIfExists() const
{ {
const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint";
if (!object_storage->exists(StoredObject(last_checkpoint_file))) if (!object_storage->exists(StoredObject(last_checkpoint_file)))

View File

@ -21,17 +21,16 @@ namespace DB
/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/)
/// Right now it's implemented on top of StorageS3 and right now it doesn't support /// Right now it's implemented on top of StorageS3 and right now it doesn't support
/// many Iceberg features like schema evolution, partitioning, positional and equality deletes. /// many Iceberg features like schema evolution, partitioning, positional and equality deletes.
template <typename DataLakeMetadata, typename StorageSettings> template <typename DataLakeMetadata>
class IStorageDataLake final : public StorageObjectStorage<StorageSettings> class IStorageDataLake final : public StorageObjectStorage
{ {
public: public:
using Storage = StorageObjectStorage<StorageSettings>; using Storage = StorageObjectStorage;
using ConfigurationPtr = Storage::ConfigurationPtr; using ConfigurationPtr = Storage::ConfigurationPtr;
static StoragePtr create( static StoragePtr create(
ConfigurationPtr base_configuration, ConfigurationPtr base_configuration,
ContextPtr context, ContextPtr context,
const String & engine_name_,
const StorageID & table_id_, const StorageID & table_id_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
@ -64,9 +63,9 @@ public:
tryLogCurrentException(__PRETTY_FUNCTION__); tryLogCurrentException(__PRETTY_FUNCTION__);
} }
return std::make_shared<IStorageDataLake<DataLakeMetadata, StorageSettings>>( return std::make_shared<IStorageDataLake<DataLakeMetadata>>(
base_configuration, std::move(metadata), configuration, object_storage, base_configuration, std::move(metadata), configuration, object_storage,
engine_name_, context, table_id_, context, table_id_,
columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
constraints_, comment_, format_settings_); constraints_, comment_, format_settings_);
} }
@ -133,9 +132,9 @@ private:
DataLakeMetadataPtr current_metadata; DataLakeMetadataPtr current_metadata;
}; };
using StorageIceberg = IStorageDataLake<IcebergMetadata, S3StorageSettings>; using StorageIceberg = IStorageDataLake<IcebergMetadata>;
using StorageDeltaLake = IStorageDataLake<DeltaLakeMetadata, S3StorageSettings>; using StorageDeltaLake = IStorageDataLake<DeltaLakeMetadata>;
using StorageHudi = IStorageDataLake<HudiMetadata, S3StorageSettings>; using StorageHudi = IStorageDataLake<HudiMetadata>;
} }

View File

@ -6,7 +6,6 @@
#include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h> #include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h> #include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
#include <Storages/ObjectStorage/S3/Configuration.h> #include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
namespace DB namespace DB
@ -24,7 +23,7 @@ void registerStorageIceberg(StorageFactory & factory)
StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
return StorageIceberg::create( return StorageIceberg::create(
configuration, args.getContext(), "Iceberg", args.table_id, args.columns, configuration, args.getContext(), args.table_id, args.columns,
args.constraints, args.comment, std::nullopt, args.mode); args.constraints, args.comment, std::nullopt, args.mode);
}, },
{ {
@ -47,7 +46,7 @@ void registerStorageDeltaLake(StorageFactory & factory)
StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
return StorageDeltaLake::create( return StorageDeltaLake::create(
configuration, args.getContext(), "DeltaLake", args.table_id, args.columns, configuration, args.getContext(), args.table_id, args.columns,
args.constraints, args.comment, std::nullopt, args.mode); args.constraints, args.comment, std::nullopt, args.mode);
}, },
{ {
@ -68,7 +67,7 @@ void registerStorageHudi(StorageFactory & factory)
StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
return StorageHudi::create( return StorageHudi::create(
configuration, args.getContext(), "Hudi", args.table_id, args.columns, configuration, args.getContext(), args.table_id, args.columns,
args.constraints, args.comment, std::nullopt, args.mode); args.constraints, args.comment, std::nullopt, args.mode);
}, },
{ {

View File

@ -60,6 +60,20 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const
return "/"; return "/";
return path.substr(0, end_of_path_without_globs); return path.substr(0, end_of_path_without_globs);
} }
StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(const ContextPtr & context) const
{
const auto & settings = context->getSettingsRef();
return StorageObjectStorage::QuerySettings{
.truncate_on_insert = settings.hdfs_truncate_on_insert,
.create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs
.list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
.throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
.ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
};
}
void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
{ {

View File

@ -13,9 +13,15 @@ namespace DB
class StorageHDFSConfiguration : public StorageObjectStorageConfiguration class StorageHDFSConfiguration : public StorageObjectStorageConfiguration
{ {
public: public:
static constexpr auto type_name = "hdfs";
static constexpr auto engine_name = "HDFS";
StorageHDFSConfiguration() = default; StorageHDFSConfiguration() = default;
StorageHDFSConfiguration(const StorageHDFSConfiguration & other); StorageHDFSConfiguration(const StorageHDFSConfiguration & other);
std::string getTypeName() const override { return type_name; }
std::string getEngineName() const override { return engine_name; }
Path getPath() const override { return path; } Path getPath() const override { return path; }
void setPath(const Path & path_) override { path = path_; } void setPath(const Path & path_) override { path = path_; }
@ -25,13 +31,14 @@ public:
String getNamespace() const override { return ""; } String getNamespace() const override { return ""; }
String getDataSourceDescription() override { return url; } String getDataSourceDescription() override { return url; }
StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
void check(ContextPtr context) const override; void check(ContextPtr context) const override;
ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); } StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
static void addStructureAndFormatToArgs( void addStructureAndFormatToArgs(
ASTs & args, const String & structure_, const String & format_, ContextPtr context); ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
std::string getPathWithoutGlob() const override; std::string getPathWithoutGlob() const override;

View File

@ -1,5 +1,4 @@
#include <Storages/ObjectStorage/ReadBufferIterator.h> #include <Storages/ObjectStorage/ReadBufferIterator.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h> #include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <IO/ReadBufferFromFileBase.h> #include <IO/ReadBufferFromFileBase.h>
@ -19,7 +18,6 @@ ReadBufferIterator::ReadBufferIterator(
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
const FileIterator & file_iterator_, const FileIterator & file_iterator_,
const std::optional<FormatSettings> & format_settings_, const std::optional<FormatSettings> & format_settings_,
const StorageObjectStorageSettings & query_settings_,
SchemaCache & schema_cache_, SchemaCache & schema_cache_,
ObjectInfos & read_keys_, ObjectInfos & read_keys_,
const ContextPtr & context_) const ContextPtr & context_)
@ -28,7 +26,7 @@ ReadBufferIterator::ReadBufferIterator(
, configuration(configuration_) , configuration(configuration_)
, file_iterator(file_iterator_) , file_iterator(file_iterator_)
, format_settings(format_settings_) , format_settings(format_settings_)
, query_settings(query_settings_) , query_settings(configuration->getQuerySettings(context_))
, schema_cache(schema_cache_) , schema_cache(schema_cache_)
, read_keys(read_keys_) , read_keys(read_keys_)
, format(configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format)) , format(configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format))

View File

@ -2,7 +2,6 @@
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h> #include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
#include <Storages/ObjectStorage/StorageObjectStorageSource.h> #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Formats/ReadSchemaUtils.h> #include <Formats/ReadSchemaUtils.h>
@ -19,7 +18,6 @@ public:
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
const FileIterator & file_iterator_, const FileIterator & file_iterator_,
const std::optional<FormatSettings> & format_settings_, const std::optional<FormatSettings> & format_settings_,
const StorageObjectStorageSettings & query_settings_,
SchemaCache & schema_cache_, SchemaCache & schema_cache_,
ObjectInfos & read_keys_, ObjectInfos & read_keys_,
const ContextPtr & context_); const ContextPtr & context_);
@ -50,7 +48,7 @@ private:
const ConfigurationPtr configuration; const ConfigurationPtr configuration;
const FileIterator file_iterator; const FileIterator file_iterator;
const std::optional<FormatSettings> & format_settings; const std::optional<FormatSettings> & format_settings;
const StorageObjectStorageSettings query_settings; const StorageObjectStorage::QuerySettings query_settings;
SchemaCache & schema_cache; SchemaCache & schema_cache;
ObjectInfos & read_keys; ObjectInfos & read_keys;
std::optional<String> format; std::optional<String> format;

View File

@ -1,11 +1,11 @@
#include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h> #include <Storages/ObjectStorage/ReadFromObjectStorageStep.h>
#include <Processors/Sources/NullSource.h> #include <Processors/Sources/NullSource.h>
#include <QueryPipeline/QueryPipelineBuilder.h> #include <QueryPipeline/QueryPipelineBuilder.h>
namespace DB namespace DB
{ {
ReadFromStorageObejctStorage::ReadFromStorageObejctStorage( ReadFromObjectStorageStep::ReadFromObjectStorageStep(
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
const String & name_, const String & name_,
@ -14,49 +14,41 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage(
const SelectQueryInfo & query_info_, const SelectQueryInfo & query_info_,
const StorageSnapshotPtr & storage_snapshot_, const StorageSnapshotPtr & storage_snapshot_,
const std::optional<DB::FormatSettings> & format_settings_, const std::optional<DB::FormatSettings> & format_settings_,
const StorageObjectStorageSettings & query_settings_,
bool distributed_processing_, bool distributed_processing_,
ReadFromFormatInfo info_, ReadFromFormatInfo info_,
SchemaCache & schema_cache_, SchemaCache & schema_cache_,
const bool need_only_count_, const bool need_only_count_,
ContextPtr context_, ContextPtr context_,
size_t max_block_size_, size_t max_block_size_,
size_t num_streams_, size_t num_streams_)
CurrentMetrics::Metric metric_threads_count_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_)
: SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
, object_storage(object_storage_) , object_storage(object_storage_)
, configuration(configuration_) , configuration(configuration_)
, info(std::move(info_)) , info(std::move(info_))
, virtual_columns(virtual_columns_) , virtual_columns(virtual_columns_)
, format_settings(format_settings_) , format_settings(format_settings_)
, query_settings(query_settings_) , query_settings(configuration->getQuerySettings(context_))
, schema_cache(schema_cache_) , schema_cache(schema_cache_)
, name(name_ + "Source") , name(name_ + "Source")
, need_only_count(need_only_count_) , need_only_count(need_only_count_)
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
, num_streams(num_streams_) , num_streams(num_streams_)
, distributed_processing(distributed_processing_) , distributed_processing(distributed_processing_)
, metric_threads_count(metric_threads_count_)
, metric_threads_active(metric_threads_active_)
, metric_threads_scheduled(metric_threads_scheduled_)
{ {
} }
void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predicate) void ReadFromObjectStorageStep::createIterator(const ActionsDAG::Node * predicate)
{ {
if (!iterator_wrapper) if (!iterator_wrapper)
{ {
auto context = getContext(); auto context = getContext();
iterator_wrapper = StorageObjectStorageSource::createFileIterator( iterator_wrapper = StorageObjectStorageSource::createFileIterator(
configuration, object_storage, query_settings, distributed_processing, configuration, object_storage, distributed_processing,
context, predicate, virtual_columns, nullptr, metric_threads_count, context, predicate, virtual_columns, nullptr, context->getFileProgressCallback());
metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback());
} }
} }
void ReadFromStorageObejctStorage::applyFilters(ActionDAGNodes added_filter_nodes) void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes)
{ {
filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
const ActionsDAG::Node * predicate = nullptr; const ActionsDAG::Node * predicate = nullptr;
@ -66,7 +58,7 @@ void ReadFromStorageObejctStorage::applyFilters(ActionDAGNodes added_filter_node
createIterator(predicate); createIterator(predicate);
} }
void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) void ReadFromObjectStorageStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{ {
createIterator(nullptr); createIterator(nullptr);
auto context = getContext(); auto context = getContext();
@ -74,13 +66,9 @@ void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pip
Pipes pipes; Pipes pipes;
for (size_t i = 0; i < num_streams; ++i) for (size_t i = 0; i < num_streams; ++i)
{ {
auto threadpool = std::make_shared<ThreadPool>(
metric_threads_count, metric_threads_active, metric_threads_scheduled, /* max_threads */1);
auto source = std::make_shared<StorageObjectStorageSource>( auto source = std::make_shared<StorageObjectStorageSource>(
getName(), object_storage, configuration, info, format_settings, query_settings, getName(), object_storage, configuration, info, format_settings, query_settings,
context, max_block_size, iterator_wrapper, need_only_count, schema_cache, context, max_block_size, iterator_wrapper, need_only_count, schema_cache);
std::move(threadpool), metric_threads_count, metric_threads_active, metric_threads_scheduled);
source->setKeyCondition(filter_actions_dag, context); source->setKeyCondition(filter_actions_dag, context);
pipes.emplace_back(std::move(source)); pipes.emplace_back(std::move(source));

View File

@ -1,17 +1,16 @@
#pragma once #pragma once
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Processors/QueryPlan/SourceStepWithFilter.h> #include <Processors/QueryPlan/SourceStepWithFilter.h>
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
namespace DB namespace DB
{ {
class ReadFromStorageObejctStorage : public SourceStepWithFilter class ReadFromObjectStorageStep : public SourceStepWithFilter
{ {
public: public:
using ConfigurationPtr = StorageObjectStorageConfigurationPtr; using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
ReadFromStorageObejctStorage( ReadFromObjectStorageStep(
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
const String & name_, const String & name_,
@ -20,17 +19,13 @@ public:
const SelectQueryInfo & query_info_, const SelectQueryInfo & query_info_,
const StorageSnapshotPtr & storage_snapshot_, const StorageSnapshotPtr & storage_snapshot_,
const std::optional<DB::FormatSettings> & format_settings_, const std::optional<DB::FormatSettings> & format_settings_,
const StorageObjectStorageSettings & query_settings_,
bool distributed_processing_, bool distributed_processing_,
ReadFromFormatInfo info_, ReadFromFormatInfo info_,
SchemaCache & schema_cache_, SchemaCache & schema_cache_,
bool need_only_count_, bool need_only_count_,
ContextPtr context_, ContextPtr context_,
size_t max_block_size_, size_t max_block_size_,
size_t num_streams_, size_t num_streams_);
CurrentMetrics::Metric metric_threads_count_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_);
std::string getName() const override { return name; } std::string getName() const override { return name; }
@ -46,16 +41,13 @@ private:
const ReadFromFormatInfo info; const ReadFromFormatInfo info;
const NamesAndTypesList virtual_columns; const NamesAndTypesList virtual_columns;
const std::optional<DB::FormatSettings> format_settings; const std::optional<DB::FormatSettings> format_settings;
const StorageObjectStorageSettings query_settings; const StorageObjectStorage::QuerySettings query_settings;
SchemaCache & schema_cache; SchemaCache & schema_cache;
const String name; const String name;
const bool need_only_count; const bool need_only_count;
const size_t max_block_size; const size_t max_block_size;
const size_t num_streams; const size_t num_streams;
const bool distributed_processing; const bool distributed_processing;
const CurrentMetrics::Metric metric_threads_count;
const CurrentMetrics::Metric metric_threads_active;
const CurrentMetrics::Metric metric_threads_scheduled;
void createIterator(const ActionsDAG::Node * predicate); void createIterator(const ActionsDAG::Node * predicate);
}; };

View File

@ -70,6 +70,21 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot
keys = other.keys; keys = other.keys;
} }
StorageObjectStorage::QuerySettings StorageS3Configuration::getQuerySettings(const ContextPtr & context) const
{
const auto & settings = context->getSettingsRef();
return StorageObjectStorage::QuerySettings{
.truncate_on_insert = settings.s3_truncate_on_insert,
.create_new_file_on_insert = settings.s3_create_new_file_on_insert,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_s3,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.s3_skip_empty_files,
.list_object_keys_size = settings.s3_list_object_keys_size,
.throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
.ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist,
};
}
ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
{ {
assertInitialized(); assertInitialized();

View File

@ -7,6 +7,7 @@
#include <IO/S3/getObjectInfo.h> #include <IO/S3/getObjectInfo.h>
#include <Storages/StorageS3Settings.h> #include <Storages/StorageS3Settings.h>
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h> #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Common/CurrentMetrics.h>
namespace DB namespace DB
{ {
@ -14,9 +15,14 @@ namespace DB
class StorageS3Configuration : public StorageObjectStorageConfiguration class StorageS3Configuration : public StorageObjectStorageConfiguration
{ {
public: public:
static constexpr auto type_name = "s3";
StorageS3Configuration() = default; StorageS3Configuration() = default;
StorageS3Configuration(const StorageS3Configuration & other); StorageS3Configuration(const StorageS3Configuration & other);
std::string getTypeName() const override { return type_name; }
std::string getEngineName() const override { return url.storage_name; }
Path getPath() const override { return url.key; } Path getPath() const override { return url.key; }
void setPath(const Path & path) override { url.key = path; } void setPath(const Path & path) override { url.key = path; }
@ -26,6 +32,7 @@ public:
String getNamespace() const override { return url.bucket; } String getNamespace() const override { return url.bucket; }
String getDataSourceDescription() override; String getDataSourceDescription() override;
StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
void check(ContextPtr context) const override; void check(ContextPtr context) const override;
void validateNamespace(const String & name) const override; void validateNamespace(const String & name) const override;
@ -34,8 +41,8 @@ public:
bool isStaticConfiguration() const override { return static_configuration; } bool isStaticConfiguration() const override { return static_configuration; }
ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
static void addStructureAndFormatToArgs( void addStructureAndFormatToArgs(
ASTs & args, const String & structure, const String & format, ContextPtr context); ASTs & args, const String & structure, const String & format, ContextPtr context) override;
private: private:
void fromNamedCollection(const NamedCollection & collection) override; void fromNamedCollection(const NamedCollection & collection) override;

View File

@ -11,10 +11,9 @@
#include <Storages/StorageFactory.h> #include <Storages/StorageFactory.h>
#include <Storages/VirtualColumnUtils.h> #include <Storages/VirtualColumnUtils.h>
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h> #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Storages/ObjectStorage/StorageObjectStorageSink.h> #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
#include <Storages/ObjectStorage/StorageObjectStorageSource.h> #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h> #include <Storages/ObjectStorage/ReadFromObjectStorageStep.h>
#include <Storages/ObjectStorage/ReadBufferIterator.h> #include <Storages/ObjectStorage/ReadBufferIterator.h>
#include <Storages/ObjectStorage/Utils.h> #include <Storages/ObjectStorage/Utils.h>
#include <Storages/Cache/SchemaCache.h> #include <Storages/Cache/SchemaCache.h>
@ -25,53 +24,13 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BAD_ARGUMENTS;
extern const int DATABASE_ACCESS_DENIED; extern const int DATABASE_ACCESS_DENIED;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
} }
template <typename StorageSettings> StorageObjectStorage::StorageObjectStorage(
std::unique_ptr<StorageInMemoryMetadata> getStorageMetadata(
ObjectStoragePtr object_storage,
const StorageObjectStorageConfigurationPtr & configuration,
const ColumnsDescription & columns,
const ConstraintsDescription & constraints,
std::optional<FormatSettings> format_settings,
const String & comment,
const std::string & engine_name,
const ContextPtr & context)
{
using Storage = StorageObjectStorage<StorageSettings>;
auto storage_metadata = std::make_unique<StorageInMemoryMetadata>();
if (columns.empty())
{
auto fetched_columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context);
storage_metadata->setColumns(fetched_columns);
}
else if (!columns.hasOnlyOrdinary())
{
/// We don't allow special columns.
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine {} doesn't support special columns "
"like MATERIALIZED, ALIAS or EPHEMERAL", engine_name);
}
else
{
if (configuration->format == "auto")
Storage::setFormatFromData(object_storage, configuration, format_settings, context);
storage_metadata->setColumns(columns);
}
storage_metadata->setConstraints(constraints);
storage_metadata->setComment(comment);
return storage_metadata;
}
template <typename StorageSettings>
StorageObjectStorage<StorageSettings>::StorageObjectStorage(
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
const String & engine_name_,
ContextPtr context, ContextPtr context,
const StorageID & table_id_, const StorageID & table_id_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
@ -80,16 +39,13 @@ StorageObjectStorage<StorageSettings>::StorageObjectStorage(
std::optional<FormatSettings> format_settings_, std::optional<FormatSettings> format_settings_,
bool distributed_processing_, bool distributed_processing_,
ASTPtr partition_by_) ASTPtr partition_by_)
: IStorage(table_id_, getStorageMetadata<StorageSettings>( : IStorage(table_id_)
object_storage_, configuration_, columns_, constraints_, format_settings_, , configuration(configuration_)
comment, engine_name, context)) , object_storage(object_storage_)
, engine_name(engine_name_)
, format_settings(format_settings_) , format_settings(format_settings_)
, partition_by(partition_by_) , partition_by(partition_by_)
, distributed_processing(distributed_processing_) , distributed_processing(distributed_processing_)
, log(getLogger("Storage" + engine_name_)) , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName())))
, object_storage(object_storage_)
, configuration(configuration_)
{ {
FormatFactory::instance().checkFormatName(configuration->format); FormatFactory::instance().checkFormatName(configuration->format);
configuration->check(context); configuration->check(context);
@ -98,46 +54,41 @@ StorageObjectStorage<StorageSettings>::StorageObjectStorage(
for (const auto & key : configuration->getPaths()) for (const auto & key : configuration->getPaths())
objects.emplace_back(key); objects.emplace_back(key);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(getInMemoryMetadataPtr()->getColumns())); auto metadata = getStorageMetadata(
object_storage_, configuration_, columns_,
constraints_, format_settings_, comment, context);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
setInMemoryMetadata(std::move(metadata));
} }
template <typename StorageSettings> String StorageObjectStorage::getName() const
bool StorageObjectStorage<StorageSettings>::prefersLargeBlocks() const {
return configuration->getEngineName();
}
bool StorageObjectStorage::prefersLargeBlocks() const
{ {
return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format); return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format);
} }
template <typename StorageSettings> bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const
bool StorageObjectStorage<StorageSettings>::parallelizeOutputAfterReading(ContextPtr context) const
{ {
return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context); return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context);
} }
template <typename StorageSettings> bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const
bool StorageObjectStorage<StorageSettings>::supportsSubsetOfColumns(const ContextPtr & context) const
{ {
return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings); return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings);
} }
template <typename StorageSettings> void StorageObjectStorage::updateConfiguration(ContextPtr context)
void StorageObjectStorage<StorageSettings>::updateConfiguration(ContextPtr context)
{ {
if (!configuration->isStaticConfiguration()) if (!configuration->isStaticConfiguration())
object_storage->applyNewSettings(context->getConfigRef(), "s3.", context); object_storage->applyNewSettings(context->getConfigRef(), "s3.", context);
} }
template <typename StorageSettings> void StorageObjectStorage::read(
SchemaCache & StorageObjectStorage<StorageSettings>::getSchemaCache(const ContextPtr & context)
{
static SchemaCache schema_cache(
context->getConfigRef().getUInt(
StorageSettings::SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING,
DEFAULT_SCHEMA_CACHE_ELEMENTS));
return schema_cache;
}
template <typename StorageSettings>
void StorageObjectStorage<StorageSettings>::read(
QueryPlan & query_plan, QueryPlan & query_plan,
const Names & column_names, const Names & column_names,
const StorageSnapshotPtr & storage_snapshot, const StorageSnapshotPtr & storage_snapshot,
@ -155,13 +106,12 @@ void StorageObjectStorage<StorageSettings>::read(
getName()); getName());
} }
const auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); const auto read_from_format_info = prepareReadingFromFormat(
column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
&& local_context->getSettingsRef().optimize_count_from_files; && local_context->getSettingsRef().optimize_count_from_files;
LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII SOURCE HEADER: {}", read_from_format_info.source_header.dumpStructure()); auto read_step = std::make_unique<ReadFromObjectStorageStep>(
LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII FORMAT HEADER: {}", read_from_format_info.format_header.dumpStructure());
auto read_step = std::make_unique<ReadFromStorageObejctStorage>(
object_storage, object_storage,
configuration, configuration,
getName(), getName(),
@ -170,23 +120,18 @@ void StorageObjectStorage<StorageSettings>::read(
query_info, query_info,
storage_snapshot, storage_snapshot,
format_settings, format_settings,
StorageSettings::create(local_context->getSettingsRef()),
distributed_processing, distributed_processing,
std::move(read_from_format_info), std::move(read_from_format_info),
getSchemaCache(local_context), getSchemaCache(local_context),
need_only_count, need_only_count,
local_context, local_context,
max_block_size, max_block_size,
num_streams, num_streams);
StorageSettings::ObjectStorageThreads(),
StorageSettings::ObjectStorageThreadsActive(),
StorageSettings::ObjectStorageThreadsScheduled());
query_plan.addStep(std::move(read_step)); query_plan.addStep(std::move(read_step));
} }
template <typename StorageSettings> SinkToStoragePtr StorageObjectStorage::write(
SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
const ASTPtr & query, const ASTPtr & query,
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
ContextPtr local_context, ContextPtr local_context,
@ -194,7 +139,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
{ {
updateConfiguration(local_context); updateConfiguration(local_context);
const auto sample_block = metadata_snapshot->getSampleBlock(); const auto sample_block = metadata_snapshot->getSampleBlock();
const auto & query_settings = StorageSettings::create(local_context->getSettingsRef()); const auto & settings = configuration->getQuerySettings(local_context);
if (configuration->withWildcard()) if (configuration->withWildcard())
{ {
@ -209,23 +154,22 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
if (partition_by_ast) if (partition_by_ast)
{ {
LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath());
return std::make_shared<PartitionedStorageObjectStorageSink>( return std::make_shared<PartitionedStorageObjectStorageSink>(
object_storage, configuration, query_settings, object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
format_settings, sample_block, local_context, partition_by_ast);
} }
} }
if (configuration->withGlobs()) if (configuration->withGlobs())
{ {
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, throw Exception(
ErrorCodes::DATABASE_ACCESS_DENIED,
"{} key '{}' contains globs, so the table is in readonly mode", "{} key '{}' contains globs, so the table is in readonly mode",
getName(), configuration->getPath()); getName(), configuration->getPath());
} }
auto & paths = configuration->getPaths(); auto & paths = configuration->getPaths();
if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(
*object_storage, *configuration, query_settings, paths.front(), paths.size())) *object_storage, *configuration, settings, paths.front(), paths.size()))
{ {
paths.push_back(*new_key); paths.push_back(*new_key);
} }
@ -238,9 +182,11 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
local_context); local_context);
} }
template <typename StorageSettings> void StorageObjectStorage::truncate(
void StorageObjectStorage<StorageSettings>::truncate( const ASTPtr &,
const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) const StorageMetadataPtr &,
ContextPtr,
TableExclusiveLockHolder &)
{ {
if (configuration->withGlobs()) if (configuration->withGlobs())
{ {
@ -257,34 +203,37 @@ void StorageObjectStorage<StorageSettings>::truncate(
object_storage->removeObjectsIfExist(objects); object_storage->removeObjectsIfExist(objects);
} }
template <typename StorageSettings> std::unique_ptr<ReadBufferIterator> StorageObjectStorage::createReadBufferIterator(
std::unique_ptr<ReadBufferIterator> StorageObjectStorage<StorageSettings>::createReadBufferIterator(
const ObjectStoragePtr & object_storage, const ObjectStoragePtr & object_storage,
const ConfigurationPtr & configuration, const ConfigurationPtr & configuration,
const std::optional<FormatSettings> & format_settings, const std::optional<FormatSettings> & format_settings,
ObjectInfos & read_keys, ObjectInfos & read_keys,
const ContextPtr & context) const ContextPtr & context)
{ {
const auto settings = StorageSettings::create(context->getSettingsRef());
auto file_iterator = StorageObjectStorageSource::createFileIterator( auto file_iterator = StorageObjectStorageSource::createFileIterator(
configuration, object_storage, settings, /* distributed_processing */false, configuration,
context, /* predicate */{}, /* virtual_columns */{}, &read_keys, object_storage,
StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled()); false/* distributed_processing */,
context,
{}/* predicate */,
{}/* virtual_columns */,
&read_keys);
return std::make_unique<ReadBufferIterator>( return std::make_unique<ReadBufferIterator>(
object_storage, configuration, file_iterator, object_storage, configuration, file_iterator,
format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context); format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context);
} }
template <typename StorageSettings> ColumnsDescription StorageObjectStorage::getTableStructureFromData(
ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromData(
const ObjectStoragePtr & object_storage, const ObjectStoragePtr & object_storage,
const ConfigurationPtr & configuration, const ConfigurationPtr & configuration,
const std::optional<FormatSettings> & format_settings, const std::optional<FormatSettings> & format_settings,
const ContextPtr & context) const ContextPtr & context)
{ {
ObjectInfos read_keys; ObjectInfos read_keys;
auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); auto read_buffer_iterator = createReadBufferIterator(
object_storage, configuration, format_settings, read_keys, context);
if (configuration->format == "auto") if (configuration->format == "auto")
{ {
auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context); auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context);
@ -297,20 +246,34 @@ ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromD
} }
} }
template <typename StorageSettings> void StorageObjectStorage::setFormatFromData(
void StorageObjectStorage<StorageSettings>::setFormatFromData(
const ObjectStoragePtr & object_storage, const ObjectStoragePtr & object_storage,
const ConfigurationPtr & configuration, const ConfigurationPtr & configuration,
const std::optional<FormatSettings> & format_settings, const std::optional<FormatSettings> & format_settings,
const ContextPtr & context) const ContextPtr & context)
{ {
ObjectInfos read_keys; ObjectInfos read_keys;
auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); auto read_buffer_iterator = createReadBufferIterator(
object_storage, configuration, format_settings, read_keys, context);
configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second; configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second;
} }
template class StorageObjectStorage<S3StorageSettings>; SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context)
template class StorageObjectStorage<AzureStorageSettings>; {
template class StorageObjectStorage<HDFSStorageSettings>; static SchemaCache schema_cache(
context->getConfigRef().getUInt(
"schema_inference_cache_max_elements_for_" + configuration->getTypeName(),
DEFAULT_SCHEMA_CACHE_ELEMENTS));
return schema_cache;
}
SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name)
{
static SchemaCache schema_cache(
context->getConfigRef().getUInt(
"schema_inference_cache_max_elements_for_" + storage_type_name,
DEFAULT_SCHEMA_CACHE_ELEMENTS));
return schema_cache;
}
} }

View File

@ -1,31 +1,22 @@
#pragma once #pragma once
#include <Common/re2.h>
#include <Disks/ObjectStorages/IObjectStorage.h> #include <Disks/ObjectStorages/IObjectStorage.h>
#include <Common/threadPoolCallbackRunner.h> #include <Common/threadPoolCallbackRunner.h>
#include <Common/logger_useful.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
#include <Storages/prepareReadingFromFormat.h> #include <Storages/prepareReadingFromFormat.h>
#include <Processors/Formats/IInputFormat.h> #include <Processors/Formats/IInputFormat.h>
namespace DB namespace DB
{ {
struct SelectQueryInfo;
class StorageObjectStorageConfiguration; class StorageObjectStorageConfiguration;
struct S3StorageSettings;
struct HDFSStorageSettings;
struct AzureStorageSettings;
class PullingPipelineExecutor;
using ReadTaskCallback = std::function<String()>;
class IOutputFormat;
class IInputFormat;
class SchemaCache;
class ReadBufferIterator; class ReadBufferIterator;
class SchemaCache;
/**
template <typename StorageSettings> * A general class containing implementation for external table engines
* such as StorageS3, StorageAzure, StorageHDFS.
* Works with an object of IObjectStorage class.
*/
class StorageObjectStorage : public IStorage class StorageObjectStorage : public IStorage
{ {
public: public:
@ -35,10 +26,26 @@ public:
using ObjectInfoPtr = std::shared_ptr<ObjectInfo>; using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
using ObjectInfos = std::vector<ObjectInfoPtr>; using ObjectInfos = std::vector<ObjectInfoPtr>;
struct QuerySettings
{
/// Insert settings:
bool truncate_on_insert;
bool create_new_file_on_insert;
/// Schema inference settings:
bool schema_inference_use_cache;
SchemaInferenceMode schema_inference_mode;
/// List settings:
bool skip_empty_files;
size_t list_object_keys_size;
bool throw_on_zero_files_match;
bool ignore_non_existent_file;
};
StorageObjectStorage( StorageObjectStorage(
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
const String & engine_name_,
ContextPtr context_, ContextPtr context_,
const StorageID & table_id_, const StorageID & table_id_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
@ -48,17 +55,17 @@ public:
bool distributed_processing_ = false, bool distributed_processing_ = false,
ASTPtr partition_by_ = nullptr); ASTPtr partition_by_ = nullptr);
String getName() const override { return engine_name; } String getName() const override;
void read( void read(
QueryPlan & query_plan, QueryPlan & query_plan,
const Names &, const Names & column_names,
const StorageSnapshotPtr &, const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo &, SelectQueryInfo & query_info,
ContextPtr, ContextPtr local_context,
QueryProcessingStage::Enum, QueryProcessingStage::Enum processed_stage,
size_t, size_t max_block_size,
size_t) override; size_t num_streams) override;
SinkToStoragePtr write( SinkToStoragePtr write(
const ASTPtr & query, const ASTPtr & query,
@ -84,7 +91,9 @@ public:
bool parallelizeOutputAfterReading(ContextPtr context) const override; bool parallelizeOutputAfterReading(ContextPtr context) const override;
static SchemaCache & getSchemaCache(const ContextPtr & context); SchemaCache & getSchemaCache(const ContextPtr & context);
static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name);
static ColumnsDescription getTableStructureFromData( static ColumnsDescription getTableStructureFromData(
const ObjectStoragePtr & object_storage, const ObjectStoragePtr & object_storage,
@ -108,19 +117,15 @@ protected:
ObjectInfos & read_keys, ObjectInfos & read_keys,
const ContextPtr & context); const ContextPtr & context);
ConfigurationPtr configuration;
const ObjectStoragePtr object_storage;
const std::string engine_name; const std::string engine_name;
std::optional<FormatSettings> format_settings; const std::optional<FormatSettings> format_settings;
const ASTPtr partition_by; const ASTPtr partition_by;
const bool distributed_processing; const bool distributed_processing;
LoggerPtr log; LoggerPtr log;
ObjectStoragePtr object_storage;
ConfigurationPtr configuration;
std::mutex configuration_update_mutex; std::mutex configuration_update_mutex;
}; };
using StorageS3 = StorageObjectStorage<S3StorageSettings>;
using StorageAzureBlob = StorageObjectStorage<AzureStorageSettings>;
using StorageHDFS = StorageObjectStorage<HDFSStorageSettings>;
} }

View File

@ -15,6 +15,7 @@
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h> #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Storages/ObjectStorage/Utils.h>
namespace DB namespace DB
{ {
@ -24,47 +25,34 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
template <typename Definition, typename StorageSettings, typename Configuration> StorageObjectStorageCluster::StorageObjectStorageCluster(
StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::StorageObjectStorageCluster(
const String & cluster_name_, const String & cluster_name_,
const Storage::ConfigurationPtr & configuration_, ConfigurationPtr configuration_,
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
const String & engine_name_,
const StorageID & table_id_, const StorageID & table_id_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
ContextPtr context_) ContextPtr context_)
: IStorageCluster(cluster_name_, : IStorageCluster(
table_id_, cluster_name_, table_id_, getLogger(fmt::format("{}({})", configuration_->getEngineName(), table_id_.table_name)))
getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)))
, engine_name(engine_name_)
, configuration{configuration_} , configuration{configuration_}
, object_storage(object_storage_) , object_storage(object_storage_)
{ {
configuration->check(context_); configuration->check(context_);
StorageInMemoryMetadata storage_metadata; auto metadata = getStorageMetadata(
object_storage, configuration, columns_, constraints_,
{}/* format_settings */, ""/* comment */, context_);
if (columns_.empty()) setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
setInMemoryMetadata(std::move(metadata));
}
std::string StorageObjectStorageCluster::getName() const
{ {
ColumnsDescription columns = Storage::getTableStructureFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_); return configuration->getEngineName();
storage_metadata.setColumns(columns);
}
else
{
if (configuration->format == "auto")
StorageS3::setFormatFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_);
storage_metadata.setColumns(columns_);
} }
storage_metadata.setConstraints(constraints_); void StorageObjectStorageCluster::updateQueryToSendIfNeeded(
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
}
template <typename Definition, typename StorageSettings, typename Configuration>
void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::updateQueryToSendIfNeeded(
ASTPtr & query, ASTPtr & query,
const DB::StorageSnapshotPtr & storage_snapshot, const DB::StorageSnapshotPtr & storage_snapshot,
const ContextPtr & context) const ContextPtr & context)
@ -72,24 +60,32 @@ void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::up
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list) if (!expression_list)
{ {
throw Exception(ErrorCodes::LOGICAL_ERROR, throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Expected SELECT query from table function {}, got '{}'", "Expected SELECT query from table function {}, got '{}'",
engine_name, queryToString(query)); configuration->getEngineName(), queryToString(query));
} }
TableFunction::updateStructureAndFormatArgumentsIfNeeded( ASTs & args = expression_list->children;
expression_list->children, const auto & structure = storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription();
storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), if (args.empty())
configuration->format, {
context); throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected empty list of arguments for {}Cluster table function",
configuration->getEngineName());
} }
template <typename Definition, typename StorageSettings, typename Configuration> ASTPtr cluster_name_arg = args.front();
RemoteQueryExecutor::Extension args.erase(args.begin());
StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension( configuration->addStructureAndFormatToArgs(args, structure, configuration->format, context);
args.insert(args.begin(), cluster_name_arg);
}
RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension(
const ActionsDAG::Node * predicate, const ContextPtr & local_context) const const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
{ {
const auto settings = StorageSettings::create(local_context->getSettingsRef()); const auto settings = configuration->getQuerySettings(local_context);
auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>( auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
object_storage, configuration, predicate, virtual_columns, local_context, object_storage, configuration, predicate, virtual_columns, local_context,
nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match,
@ -106,17 +102,4 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTask
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
} }
#if USE_AWS_S3
template class StorageObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
#endif
#if USE_AZURE_BLOB_STORAGE
template class StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
#endif
#if USE_HDFS
template class StorageObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
#endif
} }

View File

@ -11,32 +11,25 @@
namespace DB namespace DB
{ {
class StorageS3Settings;
class StorageAzureBlobSettings;
class Context; class Context;
template <typename Definition, typename StorageSettings, typename Configuration>
class StorageObjectStorageCluster : public IStorageCluster class StorageObjectStorageCluster : public IStorageCluster
{ {
public: public:
using Storage = StorageObjectStorage<StorageSettings>; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
using TableFunction = TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>;
StorageObjectStorageCluster( StorageObjectStorageCluster(
const String & cluster_name_, const String & cluster_name_,
const Storage::ConfigurationPtr & configuration_, ConfigurationPtr configuration_,
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
const String & engine_name_,
const StorageID & table_id_, const StorageID & table_id_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
ContextPtr context_); ContextPtr context_);
std::string getName() const override { return engine_name; } std::string getName() const override;
RemoteQueryExecutor::Extension RemoteQueryExecutor::Extension getTaskIteratorExtension(
getTaskIteratorExtension(
const ActionsDAG::Node * predicate, const ActionsDAG::Node * predicate,
const ContextPtr & context) const override; const ContextPtr & context) const override;
@ -53,20 +46,9 @@ private:
const ContextPtr & context) override; const ContextPtr & context) override;
const String engine_name; const String engine_name;
const Storage::ConfigurationPtr configuration; const StorageObjectStorage::ConfigurationPtr configuration;
const ObjectStoragePtr object_storage; const ObjectStoragePtr object_storage;
NamesAndTypesList virtual_columns; NamesAndTypesList virtual_columns;
}; };
#if USE_AWS_S3
using StorageS3Cluster = StorageObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
#endif
#if USE_AZURE_BLOB_STORAGE
using StorageAzureBlobCluster = StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
#endif
#if USE_HDFS
using StorageHDFSCluster = StorageObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
#endif
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Disks/ObjectStorages/IObjectStorage.h> #include <Disks/ObjectStorages/IObjectStorage.h>
#include <Storages/NamedCollectionsHelpers.h> #include <Storages/NamedCollectionsHelpers.h>
#include "StorageObjectStorage.h"
#include <filesystem> #include <filesystem>
namespace fs = std::filesystem; namespace fs = std::filesystem;
@ -27,6 +28,9 @@ public:
ContextPtr local_context, ContextPtr local_context,
bool with_table_structure); bool with_table_structure);
virtual std::string getTypeName() const = 0;
virtual std::string getEngineName() const = 0;
virtual Path getPath() const = 0; virtual Path getPath() const = 0;
virtual void setPath(const Path & path) = 0; virtual void setPath(const Path & path) = 0;
@ -36,6 +40,9 @@ public:
virtual String getDataSourceDescription() = 0; virtual String getDataSourceDescription() = 0;
virtual String getNamespace() const = 0; virtual String getNamespace() const = 0;
virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
virtual void addStructureAndFormatToArgs(
ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0;
bool withWildcard() const; bool withWildcard() const;
bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }

View File

@ -1,102 +0,0 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Core/Settings.h>
#include <Common/CurrentMetrics.h>
namespace CurrentMetrics
{
extern const Metric ObjectStorageAzureThreads;
extern const Metric ObjectStorageAzureThreadsActive;
extern const Metric ObjectStorageAzureThreadsScheduled;
extern const Metric ObjectStorageS3Threads;
extern const Metric ObjectStorageS3ThreadsActive;
extern const Metric ObjectStorageS3ThreadsScheduled;
}
namespace DB
{
struct StorageObjectStorageSettings
{
bool truncate_on_insert;
bool create_new_file_on_insert;
bool schema_inference_use_cache;
SchemaInferenceMode schema_inference_mode;
bool skip_empty_files;
size_t list_object_keys_size;
bool throw_on_zero_files_match;
bool ignore_non_existent_file;
};
struct S3StorageSettings
{
static StorageObjectStorageSettings create(const Settings & settings)
{
return StorageObjectStorageSettings{
.truncate_on_insert = settings.s3_truncate_on_insert,
.create_new_file_on_insert = settings.s3_create_new_file_on_insert,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_s3,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.s3_skip_empty_files,
.list_object_keys_size = settings.s3_list_object_keys_size,
.throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
.ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist,
};
}
static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_s3";
static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT
static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT
static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT
};
struct AzureStorageSettings
{
static StorageObjectStorageSettings create(const Settings & settings)
{
return StorageObjectStorageSettings{
.truncate_on_insert = settings.azure_truncate_on_insert,
.create_new_file_on_insert = settings.azure_create_new_file_on_insert,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
.list_object_keys_size = settings.azure_list_object_keys_size,
.throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
.ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist,
};
}
static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_azure";
static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageAzureThreads; } /// NOLINT
static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageAzureThreadsActive; } /// NOLINT
static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageAzureThreadsScheduled; } /// NOLINT
};
struct HDFSStorageSettings
{
static StorageObjectStorageSettings create(const Settings & settings)
{
return StorageObjectStorageSettings{
.truncate_on_insert = settings.hdfs_truncate_on_insert,
.create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs
.list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
.throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
.ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
};
}
static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_hdfs";
/// TODO: s3 -> hdfs
static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT
static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT
static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT
};
}

View File

@ -103,7 +103,6 @@ void StorageObjectStorageSink::release()
PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
StorageObjectStorageConfigurationPtr configuration_, StorageObjectStorageConfigurationPtr configuration_,
const StorageObjectStorageSettings & query_settings_,
std::optional<FormatSettings> format_settings_, std::optional<FormatSettings> format_settings_,
const Block & sample_block_, const Block & sample_block_,
ContextPtr context_, ContextPtr context_,
@ -111,7 +110,7 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
: PartitionedSink(partition_by, context_, sample_block_) : PartitionedSink(partition_by, context_, sample_block_)
, object_storage(object_storage_) , object_storage(object_storage_)
, configuration(configuration_) , configuration(configuration_)
, query_settings(query_settings_) , query_settings(configuration_->getQuerySettings(context_))
, format_settings(format_settings_) , format_settings(format_settings_)
, sample_block(sample_block_) , sample_block(sample_block_)
, context(context_) , context(context_)

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <Storages/PartitionedSink.h> #include <Storages/PartitionedSink.h>
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h> #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Processors/Formats/IOutputFormat.h> #include <Processors/Formats/IOutputFormat.h>
#include <Disks/ObjectStorages/IObjectStorage_fwd.h> #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
@ -47,7 +46,6 @@ public:
PartitionedStorageObjectStorageSink( PartitionedStorageObjectStorageSink(
ObjectStoragePtr object_storage_, ObjectStoragePtr object_storage_,
StorageObjectStorageConfigurationPtr configuration_, StorageObjectStorageConfigurationPtr configuration_,
const StorageObjectStorageSettings & query_settings_,
std::optional<FormatSettings> format_settings_, std::optional<FormatSettings> format_settings_,
const Block & sample_block_, const Block & sample_block_,
ContextPtr context_, ContextPtr context_,
@ -61,7 +59,7 @@ private:
ObjectStoragePtr object_storage; ObjectStoragePtr object_storage;
StorageObjectStorageConfigurationPtr configuration; StorageObjectStorageConfigurationPtr configuration;
const StorageObjectStorageSettings query_settings; const StorageObjectStorage::QuerySettings query_settings;
const std::optional<FormatSettings> format_settings; const std::optional<FormatSettings> format_settings;
const Block sample_block; const Block sample_block;
const ContextPtr context; const ContextPtr context;

View File

@ -10,7 +10,6 @@
#include <Formats/FormatFactory.h> #include <Formats/FormatFactory.h>
#include <Formats/ReadSchemaUtils.h> #include <Formats/ReadSchemaUtils.h>
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h> #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Storages/Cache/SchemaCache.h> #include <Storages/Cache/SchemaCache.h>
#include <Common/parseGlobs.h> #include <Common/parseGlobs.h>
@ -20,6 +19,13 @@ namespace ProfileEvents
extern const Event EngineFileLikeReadFiles; extern const Event EngineFileLikeReadFiles;
} }
namespace CurrentMetrics
{
extern const Metric StorageObjectStorageThreads;
extern const Metric StorageObjectStorageThreadsActive;
extern const Metric StorageObjectStorageThreadsScheduled;
}
namespace DB namespace DB
{ {
@ -37,16 +43,12 @@ StorageObjectStorageSource::StorageObjectStorageSource(
ConfigurationPtr configuration_, ConfigurationPtr configuration_,
const ReadFromFormatInfo & info, const ReadFromFormatInfo & info,
std::optional<FormatSettings> format_settings_, std::optional<FormatSettings> format_settings_,
const StorageObjectStorageSettings & query_settings_, const StorageObjectStorage::QuerySettings & query_settings_,
ContextPtr context_, ContextPtr context_,
UInt64 max_block_size_, UInt64 max_block_size_,
std::shared_ptr<IIterator> file_iterator_, std::shared_ptr<IIterator> file_iterator_,
bool need_only_count_, bool need_only_count_,
SchemaCache & schema_cache_, SchemaCache & schema_cache_)
std::shared_ptr<ThreadPool> reader_pool_,
CurrentMetrics::Metric metric_threads_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_)
: SourceWithKeyCondition(info.source_header, false) : SourceWithKeyCondition(info.source_header, false)
, WithContext(context_) , WithContext(context_)
, name(std::move(name_)) , name(std::move(name_))
@ -57,13 +59,14 @@ StorageObjectStorageSource::StorageObjectStorageSource(
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
, need_only_count(need_only_count_) , need_only_count(need_only_count_)
, read_from_format_info(info) , read_from_format_info(info)
, create_reader_pool(reader_pool_) , create_reader_pool(std::make_shared<ThreadPool>(
CurrentMetrics::StorageObjectStorageThreads,
CurrentMetrics::StorageObjectStorageThreadsActive,
CurrentMetrics::StorageObjectStorageThreadsScheduled,
1/* max_threads */))
, columns_desc(info.columns_description) , columns_desc(info.columns_description)
, file_iterator(file_iterator_) , file_iterator(file_iterator_)
, schema_cache(schema_cache_) , schema_cache(schema_cache_)
, metric_threads(metric_threads_)
, metric_threads_active(metric_threads_active_)
, metric_threads_scheduled(metric_threads_scheduled_)
, create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(*create_reader_pool, "Reader")) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(*create_reader_pool, "Reader"))
{ {
} }
@ -76,26 +79,23 @@ StorageObjectStorageSource::~StorageObjectStorageSource()
std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator( std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
ConfigurationPtr configuration, ConfigurationPtr configuration,
ObjectStoragePtr object_storage, ObjectStoragePtr object_storage,
const StorageObjectStorageSettings & settings,
bool distributed_processing, bool distributed_processing,
const ContextPtr & local_context, const ContextPtr & local_context,
const ActionsDAG::Node * predicate, const ActionsDAG::Node * predicate,
const NamesAndTypesList & virtual_columns, const NamesAndTypesList & virtual_columns,
ObjectInfos * read_keys, ObjectInfos * read_keys,
CurrentMetrics::Metric metric_threads_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_,
std::function<void(FileProgress)> file_progress_callback) std::function<void(FileProgress)> file_progress_callback)
{ {
if (distributed_processing) if (distributed_processing)
return std::make_shared<ReadTaskIterator>( return std::make_shared<ReadTaskIterator>(
local_context->getReadTaskCallback(), local_context->getReadTaskCallback(),
local_context->getSettingsRef().max_threads, local_context->getSettingsRef().max_threads);
metric_threads_, metric_threads_active_, metric_threads_scheduled_);
if (configuration->isNamespaceWithGlobs()) if (configuration->isNamespaceWithGlobs())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
auto settings = configuration->getQuerySettings(local_context);
if (configuration->isPathWithGlobs()) if (configuration->isPathWithGlobs())
{ {
/// Iterate through disclosed globs and make a source for each file /// Iterate through disclosed globs and make a source for each file
@ -568,7 +568,8 @@ StorageObjectStorageSource::ReaderHolder::ReaderHolder(
{ {
} }
StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept StorageObjectStorageSource::ReaderHolder &
StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept
{ {
/// The order of destruction is important. /// The order of destruction is important.
/// reader uses pipeline, pipeline uses read_buf. /// reader uses pipeline, pipeline uses read_buf.
@ -581,15 +582,15 @@ StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHol
} }
StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
const ReadTaskCallback & callback_, const ReadTaskCallback & callback_, size_t max_threads_count)
size_t max_threads_count,
CurrentMetrics::Metric metric_threads_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_)
: IIterator("ReadTaskIterator") : IIterator("ReadTaskIterator")
, callback(callback_) , callback(callback_)
{ {
ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count); ThreadPool pool(
CurrentMetrics::StorageObjectStorageThreads,
CurrentMetrics::StorageObjectStorageThreadsActive,
CurrentMetrics::StorageObjectStorageThreadsScheduled, max_threads_count);
auto pool_scheduler = threadPoolCallbackRunnerUnsafe<String>(pool, "ReadTaskIter"); auto pool_scheduler = threadPoolCallbackRunnerUnsafe<String>(pool, "ReadTaskIter");
std::vector<std::future<String>> keys; std::vector<std::future<String>> keys;

View File

@ -3,7 +3,6 @@
#include <Processors/Executors/PullingPipelineExecutor.h> #include <Processors/Executors/PullingPipelineExecutor.h>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h> #include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h> #include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
#include <Processors/Formats/IInputFormat.h> #include <Processors/Formats/IInputFormat.h>
@ -28,16 +27,12 @@ public:
ConfigurationPtr configuration, ConfigurationPtr configuration,
const ReadFromFormatInfo & info, const ReadFromFormatInfo & info,
std::optional<FormatSettings> format_settings_, std::optional<FormatSettings> format_settings_,
const StorageObjectStorageSettings & query_settings_, const StorageObjectStorage::QuerySettings & query_settings_,
ContextPtr context_, ContextPtr context_,
UInt64 max_block_size_, UInt64 max_block_size_,
std::shared_ptr<IIterator> file_iterator_, std::shared_ptr<IIterator> file_iterator_,
bool need_only_count_, bool need_only_count_,
SchemaCache & schema_cache_, SchemaCache & schema_cache_);
std::shared_ptr<ThreadPool> reader_pool_,
CurrentMetrics::Metric metric_threads_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_);
~StorageObjectStorageSource() override; ~StorageObjectStorageSource() override;
@ -53,15 +48,11 @@ public:
static std::shared_ptr<IIterator> createFileIterator( static std::shared_ptr<IIterator> createFileIterator(
ConfigurationPtr configuration, ConfigurationPtr configuration,
ObjectStoragePtr object_storage, ObjectStoragePtr object_storage,
const StorageObjectStorageSettings & settings,
bool distributed_processing, bool distributed_processing,
const ContextPtr & local_context, const ContextPtr & local_context,
const ActionsDAG::Node * predicate, const ActionsDAG::Node * predicate,
const NamesAndTypesList & virtual_columns, const NamesAndTypesList & virtual_columns,
ObjectInfos * read_keys, ObjectInfos * read_keys,
CurrentMetrics::Metric metric_threads_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_,
std::function<void(FileProgress)> file_progress_callback = {}); std::function<void(FileProgress)> file_progress_callback = {});
protected: protected:
@ -69,7 +60,7 @@ protected:
ObjectStoragePtr object_storage; ObjectStoragePtr object_storage;
const ConfigurationPtr configuration; const ConfigurationPtr configuration;
const std::optional<FormatSettings> format_settings; const std::optional<FormatSettings> format_settings;
const StorageObjectStorageSettings query_settings; const StorageObjectStorage::QuerySettings query_settings;
const UInt64 max_block_size; const UInt64 max_block_size;
const bool need_only_count; const bool need_only_count;
const ReadFromFormatInfo read_from_format_info; const ReadFromFormatInfo read_from_format_info;
@ -79,10 +70,6 @@ protected:
SchemaCache & schema_cache; SchemaCache & schema_cache;
bool initialized = false; bool initialized = false;
const CurrentMetrics::Metric metric_threads;
const CurrentMetrics::Metric metric_threads_active;
const CurrentMetrics::Metric metric_threads_scheduled;
size_t total_rows_in_file = 0; size_t total_rows_in_file = 0;
LoggerPtr log = getLogger("StorageObjectStorageSource"); LoggerPtr log = getLogger("StorageObjectStorageSource");
@ -149,12 +136,7 @@ protected:
class StorageObjectStorageSource::ReadTaskIterator : public IIterator class StorageObjectStorageSource::ReadTaskIterator : public IIterator
{ {
public: public:
ReadTaskIterator( ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count);
const ReadTaskCallback & callback_,
size_t max_threads_count,
CurrentMetrics::Metric metric_threads_,
CurrentMetrics::Metric metric_threads_active_,
CurrentMetrics::Metric metric_threads_scheduled_);
size_t estimatedKeysCount() override { return buffer.size(); } size_t estimatedKeysCount() override { return buffer.size(); }

View File

@ -1,8 +1,6 @@
#include <Storages/ObjectStorage/Utils.h> #include <Storages/ObjectStorage/Utils.h>
#include <Disks/ObjectStorages/IObjectStorage.h> #include <Disks/ObjectStorages/IObjectStorage.h>
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h> #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
namespace DB namespace DB
{ {
@ -15,15 +13,15 @@ namespace ErrorCodes
std::optional<String> checkAndGetNewFileOnInsertIfNeeded( std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
const IObjectStorage & object_storage, const IObjectStorage & object_storage,
const StorageObjectStorageConfiguration & configuration, const StorageObjectStorageConfiguration & configuration,
const StorageObjectStorageSettings & query_settings, const StorageObjectStorage::QuerySettings & settings,
const String & key, const String & key,
size_t sequence_number) size_t sequence_number)
{ {
if (query_settings.truncate_on_insert if (settings.truncate_on_insert
|| !object_storage.exists(StoredObject(key))) || !object_storage.exists(StoredObject(key)))
return std::nullopt; return std::nullopt;
if (query_settings.create_new_file_on_insert) if (settings.create_new_file_on_insert)
{ {
auto pos = key.find_first_of('.'); auto pos = key.find_first_of('.');
String new_key; String new_key;
@ -45,4 +43,38 @@ std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
configuration.getNamespace(), key); configuration.getNamespace(), key);
} }
StorageInMemoryMetadata getStorageMetadata(
ObjectStoragePtr object_storage,
const StorageObjectStorageConfigurationPtr & configuration,
const ColumnsDescription & columns,
const ConstraintsDescription & constraints,
std::optional<FormatSettings> format_settings,
const String & comment,
const ContextPtr & context)
{
StorageInMemoryMetadata storage_metadata;
if (columns.empty())
{
auto fetched_columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context);
storage_metadata.setColumns(fetched_columns);
}
else if (!columns.hasOnlyOrdinary())
{
/// We don't allow special columns.
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Special columns are not supported for {} storage"
"like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName());
}
else
{
if (configuration->format == "auto")
StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context);
storage_metadata.setColumns(columns);
}
storage_metadata.setConstraints(constraints);
storage_metadata.setComment(comment);
return storage_metadata;
}
} }

View File

@ -1,18 +1,30 @@
#pragma once #pragma once
#include <Core/Types.h> #include <Core/Types.h>
#include "StorageObjectStorage.h"
namespace DB namespace DB
{ {
class IObjectStorage; class IObjectStorage;
class StorageObjectStorageConfiguration; class StorageObjectStorageConfiguration;
using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
struct StorageObjectStorageSettings; struct StorageObjectStorageSettings;
std::optional<std::string> checkAndGetNewFileOnInsertIfNeeded( std::optional<std::string> checkAndGetNewFileOnInsertIfNeeded(
const IObjectStorage & object_storage, const IObjectStorage & object_storage,
const StorageObjectStorageConfiguration & configuration, const StorageObjectStorageConfiguration & configuration,
const StorageObjectStorageSettings & query_settings, const StorageObjectStorage::QuerySettings & settings,
const std::string & key, const std::string & key,
size_t sequence_number); size_t sequence_number);
StorageInMemoryMetadata getStorageMetadata(
ObjectStoragePtr object_storage,
const StorageObjectStorageConfigurationPtr & configuration,
const ColumnsDescription & columns,
const ConstraintsDescription & constraints,
std::optional<FormatSettings> format_settings,
const String & comment,
const ContextPtr & context);
} }

View File

@ -2,22 +2,23 @@
#include <Storages/ObjectStorage/S3/Configuration.h> #include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/HDFS/Configuration.h> #include <Storages/ObjectStorage/HDFS/Configuration.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h> #include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
#include <Storages/StorageFactory.h> #include <Storages/StorageFactory.h>
#include <Formats/FormatFactory.h> #include <Formats/FormatFactory.h>
namespace DB namespace DB
{ {
#if USE_AWS_S3 || USE_AZURE_BLOB_STORAGE || USE_HDFS
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
} }
template <typename StorageSettings> static std::shared_ptr<StorageObjectStorage> createStorageObjectStorage(
static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjectStorage(
const StorageFactory::Arguments & args, const StorageFactory::Arguments & args,
typename StorageObjectStorage<StorageSettings>::ConfigurationPtr configuration, typename StorageObjectStorage::ConfigurationPtr configuration,
const String & engine_name,
ContextPtr context) ContextPtr context)
{ {
auto & engine_args = args.engine_args; auto & engine_args = args.engine_args;
@ -54,10 +55,9 @@ static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjec
if (args.storage_def->partition_by) if (args.storage_def->partition_by)
partition_by = args.storage_def->partition_by->clone(); partition_by = args.storage_def->partition_by->clone();
return std::make_shared<StorageObjectStorage<StorageSettings>>( return std::make_shared<StorageObjectStorage>(
configuration, configuration,
configuration->createObjectStorage(context), configuration->createObjectStorage(context),
engine_name,
args.getContext(), args.getContext(),
args.table_id, args.table_id,
args.columns, args.columns,
@ -68,6 +68,8 @@ static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjec
partition_by); partition_by);
} }
#endif
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
void registerStorageAzure(StorageFactory & factory) void registerStorageAzure(StorageFactory & factory)
{ {
@ -76,7 +78,7 @@ void registerStorageAzure(StorageFactory & factory)
auto context = args.getLocalContext(); auto context = args.getLocalContext();
auto configuration = std::make_shared<StorageAzureBlobConfiguration>(); auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
return createStorageObjectStorage<AzureStorageSettings>(args, configuration, "Azure", context); return createStorageObjectStorage(args, configuration, context);
}, },
{ {
.supports_settings = true, .supports_settings = true,
@ -95,7 +97,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
auto context = args.getLocalContext(); auto context = args.getLocalContext();
auto configuration = std::make_shared<StorageS3Configuration>(); auto configuration = std::make_shared<StorageS3Configuration>();
StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
return createStorageObjectStorage<S3StorageSettings>(args, configuration, name, context); return createStorageObjectStorage(args, configuration, context);
}, },
{ {
.supports_settings = true, .supports_settings = true,
@ -130,7 +132,7 @@ void registerStorageHDFS(StorageFactory & factory)
auto context = args.getLocalContext(); auto context = args.getLocalContext();
auto configuration = std::make_shared<StorageHDFSConfiguration>(); auto configuration = std::make_shared<StorageHDFSConfiguration>();
StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
return createStorageObjectStorage<HDFSStorageSettings>(args, configuration, "HDFS", context); return createStorageObjectStorage(args, configuration, context);
}, },
{ {
.supports_settings = true, .supports_settings = true,

View File

@ -7,7 +7,6 @@
#include <Storages/S3Queue/S3QueueFilesMetadata.h> #include <Storages/S3Queue/S3QueueFilesMetadata.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h> #include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/ObjectStorage/StorageObjectStorageSource.h> #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
#include <Interpreters/S3QueueLog.h> #include <Interpreters/S3QueueLog.h>
@ -21,7 +20,7 @@ struct ObjectMetadata;
class StorageS3QueueSource : public ISource, WithContext class StorageS3QueueSource : public ISource, WithContext
{ {
public: public:
using Storage = StorageObjectStorage<S3StorageSettings>; using Storage = StorageObjectStorage;
using ConfigurationPtr = Storage::ConfigurationPtr; using ConfigurationPtr = Storage::ConfigurationPtr;
using GlobIterator = StorageObjectStorageSource::GlobIterator; using GlobIterator = StorageObjectStorageSource::GlobIterator;

View File

@ -37,13 +37,6 @@ namespace ProfileEvents
extern const Event S3ListObjects; extern const Event S3ListObjects;
} }
namespace CurrentMetrics
{
extern const Metric ObjectStorageS3Threads;
extern const Metric ObjectStorageS3ThreadsActive;
extern const Metric ObjectStorageS3ThreadsScheduled;
}
namespace DB namespace DB
{ {
@ -151,14 +144,14 @@ StorageS3Queue::StorageS3Queue(
StorageInMemoryMetadata storage_metadata; StorageInMemoryMetadata storage_metadata;
if (columns_.empty()) if (columns_.empty())
{ {
auto columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context_); auto columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context_);
storage_metadata.setColumns(columns); storage_metadata.setColumns(columns);
} }
else else
{ {
if (configuration->format == "auto") if (configuration->format == "auto")
{ {
StorageObjectStorage<S3StorageSettings>::setFormatFromData(object_storage, configuration, format_settings, context_); StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_);
} }
storage_metadata.setColumns(columns_); storage_metadata.setColumns(columns_);
} }
@ -370,26 +363,18 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
size_t max_block_size, size_t max_block_size,
ContextPtr local_context) ContextPtr local_context)
{ {
auto threadpool = std::make_shared<ThreadPool>(CurrentMetrics::ObjectStorageS3Threads,
CurrentMetrics::ObjectStorageS3ThreadsActive,
CurrentMetrics::ObjectStorageS3ThreadsScheduled,
/* max_threads */1);
auto internal_source = std::make_unique<StorageObjectStorageSource>( auto internal_source = std::make_unique<StorageObjectStorageSource>(
getName(), getName(),
object_storage, object_storage,
configuration, configuration,
info, info,
format_settings, format_settings,
S3StorageSettings::create(local_context->getSettingsRef()), configuration->getQuerySettings(local_context),
local_context, local_context,
max_block_size, max_block_size,
file_iterator, file_iterator,
false, false,
Storage::getSchemaCache(local_context), StorageObjectStorage::getSchemaCache(local_context, configuration->getTypeName()));
threadpool,
CurrentMetrics::ObjectStorageS3Threads,
CurrentMetrics::ObjectStorageS3ThreadsActive,
CurrentMetrics::ObjectStorageS3ThreadsScheduled);
auto file_deleter = [=, this](const std::string & path) mutable auto file_deleter = [=, this](const std::string & path) mutable
{ {
@ -596,7 +581,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
{ {
auto settings = S3StorageSettings::create(local_context->getSettingsRef()); auto settings = configuration->getQuerySettings(local_context);
auto glob_iterator = std::make_unique<StorageObjectStorageSource::GlobIterator>( auto glob_iterator = std::make_unique<StorageObjectStorageSource::GlobIterator>(
object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match); object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match);

View File

@ -21,8 +21,7 @@ class S3QueueFilesMetadata;
class StorageS3Queue : public IStorage, WithContext class StorageS3Queue : public IStorage, WithContext
{ {
public: public:
using Storage = StorageObjectStorage<S3StorageSettings>; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
using ConfigurationPtr = Storage::ConfigurationPtr;
StorageS3Queue( StorageS3Queue(
std::unique_ptr<S3QueueSettings> s3queue_settings_, std::unique_ptr<S3QueueSettings> s3queue_settings_,

View File

@ -9,6 +9,9 @@
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Formats/ReadSchemaUtils.h> #include <Formats/ReadSchemaUtils.h>
#include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/HDFS/Configuration.h>
#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
namespace DB namespace DB
{ {
@ -74,14 +77,14 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C
{ {
fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File"); fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File");
#if USE_AWS_S3 #if USE_AWS_S3
fillDataImpl(res_columns, StorageS3::getSchemaCache(context), "S3"); fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageS3Configuration::type_name), "S3");
#endif #endif
#if USE_HDFS #if USE_HDFS
fillDataImpl(res_columns, StorageHDFS::getSchemaCache(context), "HDFS"); fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageHDFSConfiguration::type_name), "HDFS");
#endif #endif
fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL");
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); /// FIXME fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureBlobConfiguration::type_name), "Azure");
#endif #endif
} }

View File

@ -39,7 +39,7 @@ protected:
columns = cached_columns; columns = cached_columns;
StoragePtr storage = Storage::create( StoragePtr storage = Storage::create(
configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name), configuration, context, StorageID(TableFunction::getDatabaseName(), table_name),
columns, ConstraintsDescription{}, String{}, std::nullopt, LoadingStrictnessLevel::CREATE); columns, ConstraintsDescription{}, String{}, std::nullopt, LoadingStrictnessLevel::CREATE);
storage->startup(); storage->startup();

View File

@ -27,27 +27,27 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
ObjectStoragePtr TableFunctionObjectStorage< ObjectStoragePtr TableFunctionObjectStorage<
Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const Definition, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
{ {
if (!object_storage) if (!object_storage)
object_storage = configuration->createObjectStorage(context, create_readonly); object_storage = configuration->createObjectStorage(context, create_readonly);
return object_storage; return object_storage;
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
StorageObjectStorageConfigurationPtr TableFunctionObjectStorage< StorageObjectStorageConfigurationPtr TableFunctionObjectStorage<
Definition, StorageSettings, Configuration>::getConfiguration() const Definition, Configuration>::getConfiguration() const
{ {
if (!configuration) if (!configuration)
configuration = std::make_shared<Configuration>(); configuration = std::make_shared<Configuration>();
return configuration; return configuration;
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
std::vector<size_t> TableFunctionObjectStorage< std::vector<size_t> TableFunctionObjectStorage<
Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const Definition, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
{ {
auto & table_function_node = query_node_table_function->as<TableFunctionNode &>(); auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
@ -63,22 +63,21 @@ std::vector<size_t> TableFunctionObjectStorage<
return result; return result;
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::updateStructureAndFormatArgumentsIfNeeded( void TableFunctionObjectStorage<Definition, Configuration>::updateStructureAndFormatArgumentsIfNeeded(
ASTs & args, const String & structure, const String & format, const ContextPtr & context) ASTs & args, const String & structure, const String & format, const ContextPtr & context)
{ {
Configuration::addStructureAndFormatToArgs(args, structure, format, context); Configuration().addStructureAndFormatToArgs(args, structure, format, context);
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
void TableFunctionObjectStorage< void TableFunctionObjectStorage<Definition, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
{ {
StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true); StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true);
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::parseArguments(const ASTPtr & ast_function, ContextPtr context) void TableFunctionObjectStorage<Definition, Configuration>::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{ {
/// Clone ast function, because we can modify its arguments like removing headers. /// Clone ast function, because we can modify its arguments like removing headers.
auto ast_copy = ast_function->clone(); auto ast_copy = ast_function->clone();
@ -90,38 +89,38 @@ void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::par
parseArgumentsImpl(args, context); parseArgumentsImpl(args, context);
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
ColumnsDescription TableFunctionObjectStorage< ColumnsDescription TableFunctionObjectStorage<
Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
{ {
chassert(configuration); chassert(configuration);
if (configuration->structure == "auto") if (configuration->structure == "auto")
{ {
context->checkAccess(getSourceAccessType()); context->checkAccess(getSourceAccessType());
auto storage = getObjectStorage(context, !is_insert_query); auto storage = getObjectStorage(context, !is_insert_query);
return StorageObjectStorage<StorageSettings>::getTableStructureFromData(storage, configuration, std::nullopt, context); return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context);
} }
return parseColumnsListFromString(configuration->structure, context); return parseColumnsListFromString(configuration->structure, context);
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
bool TableFunctionObjectStorage< bool TableFunctionObjectStorage<
Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context) Definition, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
{ {
chassert(configuration); chassert(configuration);
return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
std::unordered_set<String> TableFunctionObjectStorage< std::unordered_set<String> TableFunctionObjectStorage<
Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const Definition, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
{ {
return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); return VirtualColumnUtils::getVirtualNamesForFileLikeStorage();
} }
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::executeImpl( StoragePtr TableFunctionObjectStorage<Definition, Configuration>::executeImpl(
const ASTPtr & /* ast_function */, const ASTPtr & /* ast_function */,
ContextPtr context, ContextPtr context,
const std::string & table_name, const std::string & table_name,
@ -137,10 +136,9 @@ StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration
else if (!cached_columns.empty()) else if (!cached_columns.empty())
columns = cached_columns; columns = cached_columns;
StoragePtr storage = std::make_shared<StorageObjectStorage<StorageSettings>>( StoragePtr storage = std::make_shared<StorageObjectStorage>(
configuration, configuration,
getObjectStorage(context, !is_insert_query), getObjectStorage(context, !is_insert_query),
Definition::storage_type_name,
context, context,
StorageID(getDatabaseName(), table_name), StorageID(getDatabaseName(), table_name),
columns, columns,
@ -159,7 +157,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
{ {
UNUSED(factory); UNUSED(factory);
#if USE_AWS_S3 #if USE_AWS_S3
factory.registerFunction<TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>>( factory.registerFunction<TableFunctionObjectStorage<S3Definition, StorageS3Configuration>>(
{ {
.documentation = .documentation =
{ {
@ -170,7 +168,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
.allow_readonly = false .allow_readonly = false
}); });
factory.registerFunction<TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>>( factory.registerFunction<TableFunctionObjectStorage<GCSDefinition, StorageS3Configuration>>(
{ {
.documentation = .documentation =
{ {
@ -181,7 +179,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
.allow_readonly = false .allow_readonly = false
}); });
factory.registerFunction<TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>>( factory.registerFunction<TableFunctionObjectStorage<COSNDefinition, StorageS3Configuration>>(
{ {
.documentation = .documentation =
{ {
@ -191,7 +189,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
.categories{"DataLake"}}, .categories{"DataLake"}},
.allow_readonly = false .allow_readonly = false
}); });
factory.registerFunction<TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>>( factory.registerFunction<TableFunctionObjectStorage<OSSDefinition, StorageS3Configuration>>(
{ {
.documentation = .documentation =
{ {
@ -204,7 +202,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
#endif #endif
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>>( factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>>(
{ {
.documentation = .documentation =
{ {
@ -220,7 +218,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
}); });
#endif #endif
#if USE_HDFS #if USE_HDFS
factory.registerFunction<TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>>( factory.registerFunction<TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>>(
{ {
.allow_readonly = false .allow_readonly = false
}); });
@ -228,21 +226,21 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
} }
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
template class TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>; template class TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>;
template class TableFunctionObjectStorage<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>; template class TableFunctionObjectStorage<AzureClusterDefinition, StorageAzureBlobConfiguration>;
#endif #endif
#if USE_AWS_S3 #if USE_AWS_S3
template class TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>; template class TableFunctionObjectStorage<S3Definition, StorageS3Configuration>;
template class TableFunctionObjectStorage<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>; template class TableFunctionObjectStorage<S3ClusterDefinition, StorageS3Configuration>;
template class TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>; template class TableFunctionObjectStorage<GCSDefinition, StorageS3Configuration>;
template class TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>; template class TableFunctionObjectStorage<COSNDefinition, StorageS3Configuration>;
template class TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>; template class TableFunctionObjectStorage<OSSDefinition, StorageS3Configuration>;
#endif #endif
#if USE_HDFS #if USE_HDFS
template class TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>; template class TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>;
template class TableFunctionObjectStorage<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>; template class TableFunctionObjectStorage<HDFSClusterDefinition, StorageHDFSConfiguration>;
#endif #endif
} }

View File

@ -85,7 +85,7 @@ struct HDFSDefinition
" - uri, format, structure, compression_method\n"; " - uri, format, structure, compression_method\n";
}; };
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
class TableFunctionObjectStorage : public ITableFunction class TableFunctionObjectStorage : public ITableFunction
{ {
public: public:
@ -142,14 +142,14 @@ protected:
}; };
#if USE_AWS_S3 #if USE_AWS_S3
using TableFunctionS3 = TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>; using TableFunctionS3 = TableFunctionObjectStorage<S3Definition, StorageS3Configuration>;
#endif #endif
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>; using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>;
#endif #endif
#if USE_HDFS #if USE_HDFS
using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>; using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>;
#endif #endif
} }

View File

@ -14,8 +14,8 @@
namespace DB namespace DB
{ {
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>::executeImpl( StoragePtr TableFunctionObjectStorageCluster<Definition, Configuration>::executeImpl(
const ASTPtr & /*function*/, ContextPtr context, const ASTPtr & /*function*/, ContextPtr context,
const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const
{ {
@ -34,10 +34,9 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
{ {
/// On worker node this filename won't contains globs /// On worker node this filename won't contains globs
storage = std::make_shared<StorageObjectStorage<StorageSettings>>( storage = std::make_shared<StorageObjectStorage>(
configuration, configuration,
object_storage, object_storage,
Definition::storage_type_name,
context, context,
StorageID(Base::getDatabaseName(), table_name), StorageID(Base::getDatabaseName(), table_name),
columns, columns,
@ -49,11 +48,10 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
} }
else else
{ {
storage = std::make_shared<StorageObjectStorageCluster<Definition, StorageSettings, Configuration>>( storage = std::make_shared<StorageObjectStorageCluster>(
ITableFunctionCluster<Base>::cluster_name, ITableFunctionCluster<Base>::cluster_name,
configuration, configuration,
object_storage, object_storage,
Definition::storage_type_name,
StorageID(Base::getDatabaseName(), table_name), StorageID(Base::getDatabaseName(), table_name),
columns, columns,
ConstraintsDescription{}, ConstraintsDescription{},
@ -107,14 +105,14 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
} }
#if USE_AWS_S3 #if USE_AWS_S3
template class TableFunctionObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>; template class TableFunctionObjectStorageCluster<S3ClusterDefinition, StorageS3Configuration>;
#endif #endif
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
template class TableFunctionObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>; template class TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureBlobConfiguration>;
#endif #endif
#if USE_HDFS #if USE_HDFS
template class TableFunctionObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>; template class TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
#endif #endif
} }

View File

@ -56,8 +56,8 @@ struct HDFSClusterDefinition
" - cluster_name, uri, format, structure, compression_method\n"; " - cluster_name, uri, format, structure, compression_method\n";
}; };
template <typename Definition, typename StorageSettings, typename Configuration> template <typename Definition, typename Configuration>
class TableFunctionObjectStorageCluster : public ITableFunctionCluster<TableFunctionObjectStorage<Definition, StorageSettings, Configuration>> class TableFunctionObjectStorageCluster : public ITableFunctionCluster<TableFunctionObjectStorage<Definition, Configuration>>
{ {
public: public:
static constexpr auto name = Definition::name; static constexpr auto name = Definition::name;
@ -67,7 +67,7 @@ public:
String getSignature() const override { return signature; } String getSignature() const override { return signature; }
protected: protected:
using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>; using Base = TableFunctionObjectStorage<Definition, Configuration>;
StoragePtr executeImpl( StoragePtr executeImpl(
const ASTPtr & ast_function, const ASTPtr & ast_function,
@ -86,14 +86,14 @@ protected:
}; };
#if USE_AWS_S3 #if USE_AWS_S3
using TableFunctionS3Cluster = TableFunctionObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>; using TableFunctionS3Cluster = TableFunctionObjectStorageCluster<S3ClusterDefinition, StorageS3Configuration>;
#endif #endif
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>; using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureBlobConfiguration>;
#endif #endif
#if USE_HDFS #if USE_HDFS
using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>; using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
#endif #endif
} }