ClickHouse/src/Storages/ExternalDataSourceConfiguration.h

136 lines
3.9 KiB
C++
Raw Normal View History

2021-09-01 17:59:11 +00:00
#pragma once
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
2022-04-03 22:33:59 +00:00
#include <Storages/StorageS3Settings.h>
2021-09-01 17:59:11 +00:00
namespace DB
{
2021-12-27 14:41:37 +00:00
#define EMPTY_SETTINGS(M)
DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS)
struct EmptySettings : public BaseSettings<EmptySettingsTraits> {};
2021-09-01 17:59:11 +00:00
struct ExternalDataSourceConfiguration
{
String host;
2021-09-02 13:01:26 +00:00
UInt16 port = 0;
Improvements for `parallel_distributed_insert_select` (and related) (#34728) * Add a warning if parallel_distributed_insert_select was ignored Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Respect max_distributed_depth for parallel_distributed_insert_select Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Print warning for non applied parallel_distributed_insert_select only for initial query Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Remove Cluster::getHashOfAddresses() Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Forbid parallel_distributed_insert_select for remote()/cluster() with different addresses Before it uses empty cluster name (getClusterName()) which is not correct, compare all addresses instead. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Fix max_distributed_depth check max_distributed_depth=1 must mean not more then one distributed query, not two, since max_distributed_depth=0 means no limit, and distribute_depth is 0 for the first query. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Fix INSERT INTO remote()/cluster() with parallel_distributed_insert_select Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Add a test for parallel_distributed_insert_select with cluster()/remote() Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Return <remote> instead of empty cluster name in Distributed engine Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> * Make user with sharding_key and w/o in remote()/cluster() identical Before with sharding_key the user was "default", while w/o it it was empty. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-03-08 14:24:39 +00:00
String username = "default";
2021-09-01 17:59:11 +00:00
String password;
String database;
String table;
2021-09-01 23:17:15 +00:00
String schema;
2021-09-01 17:59:11 +00:00
2021-09-07 11:17:25 +00:00
std::vector<std::pair<String, UInt16>> addresses; /// Failover replicas.
String addresses_expr;
2021-09-07 11:17:25 +00:00
2021-09-01 23:17:15 +00:00
String toString() const;
2021-09-15 22:45:43 +00:00
void set(const ExternalDataSourceConfiguration & conf);
2021-09-01 17:59:11 +00:00
};
2021-09-01 23:17:15 +00:00
2021-09-01 17:59:11 +00:00
struct StoragePostgreSQLConfiguration : ExternalDataSourceConfiguration
{
String on_conflict;
};
2021-09-02 13:01:26 +00:00
struct StorageMySQLConfiguration : ExternalDataSourceConfiguration
{
2021-09-04 18:46:09 +00:00
bool replace_query = false;
2021-09-02 13:01:26 +00:00
String on_duplicate_clause;
};
2021-09-03 11:16:32 +00:00
struct StorageMongoDBConfiguration : ExternalDataSourceConfiguration
{
String options;
};
2021-09-02 13:01:26 +00:00
using StorageSpecificArgs = std::vector<std::pair<String, ASTPtr>>;
2021-09-01 17:59:11 +00:00
2021-12-27 14:41:37 +00:00
struct ExternalDataSourceInfo
2021-09-22 15:10:25 +00:00
{
ExternalDataSourceConfiguration configuration;
StorageSpecificArgs specific_args;
2021-12-27 14:41:37 +00:00
SettingsChanges settings_changes;
2021-09-22 15:10:25 +00:00
};
/* If there is a storage engine's configuration specified in the named_collections,
* this function returns valid for usage ExternalDataSourceConfiguration struct
* otherwise std::nullopt is returned.
2021-09-01 17:59:11 +00:00
*
2021-09-22 15:10:25 +00:00
* If any configuration options are provided as key-value engine arguments, they will override
* configuration values, i.e. ENGINE = PostgreSQL(postgresql_configuration, database = 'postgres_database');
2021-09-01 17:59:11 +00:00
*
2021-09-22 15:10:25 +00:00
* Any key-value engine argument except common (`host`, `port`, `username`, `password`, `database`)
* is returned in EngineArgs struct.
2021-09-01 17:59:11 +00:00
*/
2021-12-27 14:41:37 +00:00
template <typename T = EmptySettingsTraits>
std::optional<ExternalDataSourceInfo> getExternalDataSourceConfiguration(
const ASTs & args, ContextPtr context, bool is_database_engine = false, bool throw_on_no_collection = true, const BaseSettings<T> & storage_settings = {});
2021-09-01 23:17:15 +00:00
2021-12-29 10:02:18 +00:00
using HasConfigKeyFunc = std::function<bool(const String &)>;
2022-01-10 11:00:03 +00:00
template <typename T = EmptySettingsTraits>
std::optional<ExternalDataSourceInfo> getExternalDataSourceConfiguration(
2021-12-29 10:02:18 +00:00
const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix,
2022-01-10 11:00:03 +00:00
ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings<T> & settings = {});
2021-09-02 13:01:26 +00:00
/// Highest priority is 0, the bigger the number in map, the less the priority.
using ExternalDataSourcesConfigurationByPriority = std::map<size_t, std::vector<ExternalDataSourceConfiguration>>;
struct ExternalDataSourcesByPriority
{
String database;
String table;
String schema;
ExternalDataSourcesConfigurationByPriority replicas_configurations;
};
ExternalDataSourcesByPriority
2021-12-29 10:02:18 +00:00
getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key);
2021-09-01 17:59:11 +00:00
2021-09-07 11:17:25 +00:00
struct URLBasedDataSourceConfiguration
{
String url;
String format = "auto";
2021-09-07 11:17:25 +00:00
String compression_method = "auto";
String structure = "auto";
2021-09-07 11:17:25 +00:00
std::vector<std::pair<String, Field>> headers;
2021-10-28 12:44:12 +00:00
String http_method;
2021-09-15 22:45:43 +00:00
void set(const URLBasedDataSourceConfiguration & conf);
2021-09-07 11:17:25 +00:00
};
struct StorageS3Configuration : URLBasedDataSourceConfiguration
{
2022-04-03 22:33:59 +00:00
S3Settings::AuthSettings auth_settings;
S3Settings::ReadWriteSettings rw_settings;
2021-09-07 11:17:25 +00:00
};
struct StorageS3ClusterConfiguration : StorageS3Configuration
{
String cluster_name;
};
2021-09-22 15:10:25 +00:00
struct URLBasedDataSourceConfig
{
URLBasedDataSourceConfiguration configuration;
StorageSpecificArgs specific_args;
};
std::optional<URLBasedDataSourceConfig> getURLBasedDataSourceConfiguration(const ASTs & args, ContextPtr context);
2021-09-07 11:17:25 +00:00
template<typename T>
bool getExternalDataSourceConfiguration(const ASTs & args, BaseSettings<T> & settings, ContextPtr context);
2021-09-01 17:59:11 +00:00
}