mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 21:51:57 +00:00
Better
This commit is contained in:
parent
66c9305668
commit
e82c4800f2
@ -12,9 +12,11 @@
|
|||||||
|
|
||||||
#include <QueryPipeline/Pipe.h>
|
#include <QueryPipeline/Pipe.h>
|
||||||
|
|
||||||
namespace DB {
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
namespace ErrorCodes {
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
extern const int S3_ERROR;
|
extern const int S3_ERROR;
|
||||||
}
|
}
|
||||||
@ -24,11 +26,11 @@ StorageHudi::StorageHudi(
|
|||||||
const String & access_key_,
|
const String & access_key_,
|
||||||
const String & secret_access_key_,
|
const String & secret_access_key_,
|
||||||
const StorageID & table_id_,
|
const StorageID & table_id_,
|
||||||
const ColumnsDescription & columns_,
|
ColumnsDescription columns_,
|
||||||
const ConstraintsDescription & constraints_,
|
const ConstraintsDescription & constraints_,
|
||||||
const String & comment,
|
const String & comment,
|
||||||
ContextPtr context_
|
ContextPtr context_)
|
||||||
) : IStorage(table_id_)
|
: IStorage(table_id_)
|
||||||
, base_configuration({uri_, access_key_, secret_access_key_, {}, {}, {}})
|
, base_configuration({uri_, access_key_, secret_access_key_, {}, {}, {}})
|
||||||
, log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")"))
|
, log(&Poco::Logger::get("StorageHudi (" + table_id_.table_name + ")"))
|
||||||
{
|
{
|
||||||
@ -42,21 +44,12 @@ StorageHudi::StorageHudi(
|
|||||||
LOG_DEBUG(log, "New uri: {}", new_uri);
|
LOG_DEBUG(log, "New uri: {}", new_uri);
|
||||||
|
|
||||||
auto s3_uri = S3::URI(Poco::URI(new_uri));
|
auto s3_uri = S3::URI(Poco::URI(new_uri));
|
||||||
// StorageS3::S3Configuration s3_configuration{s3_uri, access_key_, secret_access_key_, {}, {}, {}};
|
|
||||||
|
|
||||||
if (columns_.empty())
|
if (columns_.empty())
|
||||||
{
|
{
|
||||||
auto columns = StorageS3::getTableStructureFromData(
|
columns_
|
||||||
String("Parquet"),
|
= StorageS3::getTableStructureFromData(String("Parquet"), s3_uri, access_key_, secret_access_key_, "", false, {}, context_);
|
||||||
s3_uri,
|
storage_metadata.setColumns(columns_);
|
||||||
access_key_,
|
|
||||||
secret_access_key_,
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
{},
|
|
||||||
context_
|
|
||||||
);
|
|
||||||
storage_metadata.setColumns(columns);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
storage_metadata.setColumns(columns_);
|
storage_metadata.setColumns(columns_);
|
||||||
@ -76,8 +69,7 @@ StorageHudi::StorageHudi(
|
|||||||
constraints_,
|
constraints_,
|
||||||
comment,
|
comment,
|
||||||
context_,
|
context_,
|
||||||
std::nullopt
|
std::nullopt);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Pipe StorageHudi::read(
|
Pipe StorageHudi::read(
|
||||||
@ -91,15 +83,7 @@ Pipe StorageHudi::read(
|
|||||||
{
|
{
|
||||||
updateS3Configuration(context, base_configuration);
|
updateS3Configuration(context, base_configuration);
|
||||||
|
|
||||||
//auto keys = getKeysFromS3();
|
return s3engine->read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
|
||||||
|
|
||||||
//auto new_uri = base_configuration.uri.uri.toString() + "/" + generateQueryFromKeys(std::forward(keys));
|
|
||||||
//s3_configuration.uri = S3::URI(Poco::URI(new_uri));
|
|
||||||
|
|
||||||
return s3engine->read(column_names, storage_snapshot,
|
|
||||||
query_info, context, processed_stage,
|
|
||||||
max_block_size, num_streams);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageHudi::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd)
|
void StorageHudi::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd)
|
||||||
@ -128,7 +112,8 @@ void StorageHudi::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configurati
|
|||||||
|
|
||||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||||
settings.auth_settings.region,
|
settings.auth_settings.region,
|
||||||
ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects,
|
ctx->getRemoteHostFilter(),
|
||||||
|
ctx->getGlobalContext()->getSettingsRef().s3_max_redirects,
|
||||||
ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging);
|
ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging);
|
||||||
|
|
||||||
client_configuration.endpointOverride = upd.uri.endpoint;
|
client_configuration.endpointOverride = upd.uri.endpoint;
|
||||||
@ -147,7 +132,8 @@ void StorageHudi::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configurati
|
|||||||
upd.auth_settings = std::move(settings.auth_settings);
|
upd.auth_settings = std::move(settings.auth_settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> StorageHudi::getKeysFromS3() {
|
std::vector<std::string> StorageHudi::getKeysFromS3()
|
||||||
|
{
|
||||||
std::vector<std::string> keys;
|
std::vector<std::string> keys;
|
||||||
|
|
||||||
const auto & client = base_configuration.client;
|
const auto & client = base_configuration.client;
|
||||||
@ -188,10 +174,15 @@ std::vector<std::string> StorageHudi::getKeysFromS3() {
|
|||||||
return keys;
|
return keys;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string StorageHudi::generateQueryFromKeys(std::vector<std::string>&& keys) {
|
std::string StorageHudi::generateQueryFromKeys(std::vector<std::string> && keys)
|
||||||
|
{
|
||||||
// filter only .parquet files
|
// filter only .parquet files
|
||||||
std::erase_if(keys, [](const std::string& s) {
|
std::erase_if(
|
||||||
if (s.size() >= 8) {
|
keys,
|
||||||
|
[](const std::string & s)
|
||||||
|
{
|
||||||
|
if (s.size() >= 8)
|
||||||
|
{
|
||||||
return s.substr(s.size() - 8) != ".parquet";
|
return s.substr(s.size() - 8) != ".parquet";
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -201,13 +192,16 @@ std::string StorageHudi::generateQueryFromKeys(std::vector<std::string>&& keys)
|
|||||||
|
|
||||||
std::unordered_map<std::string, std::pair<std::string, uint64_t>> latest_parquets;
|
std::unordered_map<std::string, std::pair<std::string, uint64_t>> latest_parquets;
|
||||||
|
|
||||||
for (const auto& key : keys) {
|
for (const auto & key : keys)
|
||||||
|
{
|
||||||
auto slash = key.find_last_of("/");
|
auto slash = key.find_last_of("/");
|
||||||
std::string path;
|
std::string path;
|
||||||
if (slash == std::string::npos) {
|
if (slash == std::string::npos)
|
||||||
|
{
|
||||||
path = "";
|
path = "";
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
path = key.substr(0, slash);
|
path = key.substr(0, slash);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,22 +209,29 @@ std::string StorageHudi::generateQueryFromKeys(std::vector<std::string>&& keys)
|
|||||||
|
|
||||||
auto it = latest_parquets.find(path);
|
auto it = latest_parquets.find(path);
|
||||||
|
|
||||||
if (it != latest_parquets.end()) {
|
if (it != latest_parquets.end())
|
||||||
if (it->second.second < timestamp) {
|
{
|
||||||
|
if (it->second.second < timestamp)
|
||||||
|
{
|
||||||
it->second = {key, timestamp};
|
it->second = {key, timestamp};
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
latest_parquets[path] = {key, timestamp};
|
latest_parquets[path] = {key, timestamp};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> filtered_keys;
|
std::vector<std::string> filtered_keys;
|
||||||
std::transform(latest_parquets.begin(), latest_parquets.end(), std::back_inserter(filtered_keys), [](const auto& kv){return kv.second.first;});
|
std::transform(
|
||||||
|
latest_parquets.begin(), latest_parquets.end(), std::back_inserter(filtered_keys), [](const auto & kv) { return kv.second.first; });
|
||||||
|
|
||||||
std::string new_query;
|
std::string new_query;
|
||||||
|
|
||||||
for (auto&& key : filtered_keys) {
|
for (auto && key : filtered_keys)
|
||||||
if (!new_query.empty()) {
|
{
|
||||||
|
if (!new_query.empty())
|
||||||
|
{
|
||||||
new_query += ",";
|
new_query += ",";
|
||||||
}
|
}
|
||||||
new_query += key;
|
new_query += key;
|
||||||
@ -243,7 +244,9 @@ std::string StorageHudi::generateQueryFromKeys(std::vector<std::string>&& keys)
|
|||||||
|
|
||||||
void registerStorageHudi(StorageFactory & factory)
|
void registerStorageHudi(StorageFactory & factory)
|
||||||
{
|
{
|
||||||
factory.registerStorage("Hudi", [](const StorageFactory::Arguments & args)
|
factory.registerStorage(
|
||||||
|
"Hudi",
|
||||||
|
[](const StorageFactory::Arguments & args)
|
||||||
{
|
{
|
||||||
auto & engine_args = args.engine_args;
|
auto & engine_args = args.engine_args;
|
||||||
if (engine_args.empty())
|
if (engine_args.empty())
|
||||||
@ -276,4 +279,3 @@ void registerStorageHudi(StorageFactory & factory)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,7 +5,8 @@
|
|||||||
#include <Storages/IStorage.h>
|
#include <Storages/IStorage.h>
|
||||||
#include <Storages/StorageS3.h>
|
#include <Storages/StorageS3.h>
|
||||||
|
|
||||||
namespace Poco {
|
namespace Poco
|
||||||
|
{
|
||||||
class Logger;
|
class Logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -17,18 +18,18 @@ namespace Aws::S3
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class StorageHudi : public IStorage {
|
class StorageHudi : public IStorage
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
StorageHudi(
|
StorageHudi(
|
||||||
const S3::URI & uri_,
|
const S3::URI & uri_,
|
||||||
const String & access_key_,
|
const String & access_key_,
|
||||||
const String & secret_access_key_,
|
const String & secret_access_key_,
|
||||||
const StorageID & table_id_,
|
const StorageID & table_id_,
|
||||||
const ColumnsDescription & columns_,
|
ColumnsDescription columns_,
|
||||||
const ConstraintsDescription & constraints_,
|
const ConstraintsDescription & constraints_,
|
||||||
const String & comment,
|
const String & comment,
|
||||||
ContextPtr context_
|
ContextPtr context_);
|
||||||
);
|
|
||||||
|
|
||||||
String getName() const override { return "Hudi"; }
|
String getName() const override { return "Hudi"; }
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user