2023-06-08 07:26:30 +00:00
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
#if USE_AZURE_BLOB_STORAGE
|
|
|
|
|
|
|
|
#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
|
|
|
|
#include <TableFunctions/TableFunctionFactory.h>
|
|
|
|
#include <Interpreters/parseColumnsListForTableFunction.h>
|
|
|
|
#include <Storages/StorageAzureBlob.h>
|
|
|
|
|
|
|
|
#include "registerTableFunctions.h"
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
|
|
|
|
const ASTPtr & /*function*/, ContextPtr context,
|
2023-08-03 12:32:03 +00:00
|
|
|
const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
|
2023-06-08 07:26:30 +00:00
|
|
|
{
|
|
|
|
StoragePtr storage;
|
|
|
|
ColumnsDescription columns;
|
|
|
|
|
2024-01-22 22:55:50 +00:00
|
|
|
if (configuration.structure != "auto")
|
2023-06-08 07:26:30 +00:00
|
|
|
{
|
|
|
|
columns = parseColumnsListFromString(configuration.structure, context);
|
|
|
|
}
|
|
|
|
else if (!structure_hint.empty())
|
|
|
|
{
|
|
|
|
columns = structure_hint;
|
|
|
|
}
|
|
|
|
|
2023-08-03 12:32:03 +00:00
|
|
|
auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
|
2023-06-08 07:26:30 +00:00
|
|
|
auto settings = StorageAzureBlob::createSettings(context);
|
|
|
|
|
|
|
|
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
|
|
|
|
{
|
|
|
|
/// On worker node this filename won't contains globs
|
|
|
|
storage = std::make_shared<StorageAzureBlob>(
|
|
|
|
configuration,
|
2024-01-05 09:58:04 +00:00
|
|
|
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
|
2023-06-08 07:26:30 +00:00
|
|
|
context,
|
|
|
|
StorageID(getDatabaseName(), table_name),
|
|
|
|
columns,
|
|
|
|
ConstraintsDescription{},
|
|
|
|
/* comment */String{},
|
2023-06-09 13:17:08 +00:00
|
|
|
/* format_settings */std::nullopt, /// No format_settings
|
|
|
|
/* distributed_processing */ true,
|
2023-06-08 07:26:30 +00:00
|
|
|
/*partition_by_=*/nullptr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
storage = std::make_shared<StorageAzureBlobCluster>(
|
|
|
|
cluster_name,
|
|
|
|
configuration,
|
2024-01-05 09:58:04 +00:00
|
|
|
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
|
2023-06-08 07:26:30 +00:00
|
|
|
StorageID(getDatabaseName(), table_name),
|
|
|
|
columns,
|
|
|
|
ConstraintsDescription{},
|
2024-01-22 22:55:50 +00:00
|
|
|
context);
|
2023-06-08 07:26:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
storage->startup();
|
|
|
|
|
|
|
|
return storage;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory)
|
|
|
|
{
|
2023-06-13 08:51:35 +00:00
|
|
|
factory.registerFunction<TableFunctionAzureBlobStorageCluster>(
|
|
|
|
{.documentation
|
|
|
|
= {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
|
|
|
|
.examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
|
|
|
|
.allow_readonly = false}
|
|
|
|
);
|
2023-06-08 07:26:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|