ClickHouse/src/Interpreters/Cluster.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

357 lines
13 KiB
C++
Raw Normal View History

2014-02-22 18:53:42 +00:00
#pragma once
#include <Client/ConnectionPool.h>
#include <Client/ConnectionPoolWithFailover.h>
#include <Common/Macros.h>
#include <Common/MultiVersion.h>
2023-06-07 16:25:52 +00:00
#include <Common/Priority.h>
2013-12-07 16:51:29 +00:00
#include <Poco/Net/SocketAddress.h>
#include <map>
#include <string>
#include <unordered_set>
namespace Poco
{
namespace Util
{
class AbstractConfiguration;
}
}
2013-12-07 16:51:29 +00:00
namespace DB
{
struct Settings;
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
2015-05-28 21:41:28 +00:00
2023-04-07 16:26:23 +00:00
struct DatabaseReplicaInfo
{
String hostname;
String shard_name;
String replica_name;
};
struct ClusterConnectionParameters
{
const String & username;
const String & password;
UInt16 clickhouse_port;
bool treat_local_as_remote;
bool treat_local_port_as_remote;
bool secure = false;
2023-06-07 16:25:52 +00:00
Priority priority{1};
2023-04-07 16:26:23 +00:00
String cluster_name;
String cluster_secret;
};
2017-06-02 21:37:28 +00:00
/// Cluster contains connection pools to each node
/// With the local nodes, the connection is not established, but the request is executed directly.
/// Therefore we store only the number of local nodes
/// In the config, the cluster includes nodes <node> or <shard>
class Cluster
2013-12-07 16:51:29 +00:00
{
public:
Cluster(const Poco::Util::AbstractConfiguration & config,
const Settings & settings,
const String & config_prefix_,
const String & cluster_name);
2013-12-07 16:51:29 +00:00
2017-12-01 17:13:14 +00:00
/// Construct a cluster by the names of shards and replicas.
/// Local are treated as well as remote ones if treat_local_as_remote is true.
/// Local are also treated as remote if treat_local_port_as_remote is set and the local address includes a port
2017-09-07 17:55:02 +00:00
/// 'clickhouse_port' - port that this server instance listen for queries.
/// This parameter is needed only to check that some address is local (points to ourself).
///
/// Used for remote() function.
Cluster(
const Settings & settings,
const std::vector<std::vector<String>> & names,
2023-04-07 16:26:23 +00:00
const ClusterConnectionParameters & params);
Cluster(
const Settings & settings,
const std::vector<std::vector<DatabaseReplicaInfo>> & infos,
const ClusterConnectionParameters & params);
Cluster(const Cluster &)= delete;
Cluster & operator=(const Cluster &) = delete;
2013-12-07 16:51:29 +00:00
2017-06-02 21:37:28 +00:00
/// is used to set a limit on the size of the timeout
2021-04-29 16:11:20 +00:00
static Poco::Timespan saturate(Poco::Timespan v, Poco::Timespan limit);
2015-04-30 12:43:16 +00:00
2021-03-07 19:07:25 +00:00
using SlotToShard = std::vector<UInt64>;
2013-12-07 16:51:29 +00:00
struct Address
{
/** In configuration file,
* addresses are located either in <node> elements:
2013-12-07 16:51:29 +00:00
* <node>
* <host>example01-01-1</host>
* <port>9000</port>
* <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
2013-12-07 16:51:29 +00:00
* </node>
* ...
* or in <shard> and inside in <replica> elements:
2013-12-07 16:51:29 +00:00
* <shard>
* <replica>
* <host>example01-01-1</host>
* <port>9000</port>
* <!-- <user>, <password>, <default_database>, <compression>, <priority>. <secure> if needed -->
2013-12-10 09:35:30 +00:00
* </replica>
2013-12-07 16:51:29 +00:00
* </shard>
*/
String host_name;
2023-04-07 16:26:23 +00:00
String database_shard_name;
String database_replica_name;
2021-09-09 14:59:25 +00:00
UInt16 port{0};
2013-12-07 16:51:29 +00:00
String user;
String password;
2022-08-03 19:44:08 +00:00
String quota_key;
/// For inter-server authorization
String cluster;
String cluster_secret;
2020-02-16 04:13:54 +00:00
UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1.
UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas.
2020-01-31 09:22:30 +00:00
/// This database is selected when no database is specified for Distributed table
String default_database;
/// The locality is determined at the initialization, and is not changed even if DNS is changed
bool is_local = false;
2018-12-28 17:11:52 +00:00
bool user_specified = false;
Protocol::Compression compression = Protocol::Compression::Enable;
Protocol::Secure secure = Protocol::Secure::Disable;
2023-06-07 16:25:52 +00:00
Priority priority{1};
Address() = default;
2021-03-29 00:39:10 +00:00
Address(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const String & cluster_,
const String & cluster_secret_,
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
2021-03-29 00:39:10 +00:00
2023-04-07 16:26:23 +00:00
Address(
2023-04-11 14:32:40 +00:00
const DatabaseReplicaInfo & info,
2023-04-07 16:26:23 +00:00
const ClusterConnectionParameters & params,
UInt32 shard_index_,
UInt32 replica_index_);
2020-02-12 14:32:18 +00:00
2017-07-28 16:14:49 +00:00
/// Returns 'escaped_host_name:port'
String toString() const;
2017-07-28 16:14:49 +00:00
/// Returns 'host_name:port'
String readableString() const;
static String toString(const String & host_name, UInt16 port);
2019-01-21 19:45:26 +00:00
static std::pair<String, UInt16> fromString(const String & host_port_string);
/// Returns escaped shard{shard_index}_replica{replica_index} or escaped
/// user:password@resolved_host_address:resolved_host_port#default_database
/// depending on use_compact_format flag
String toFullString(bool use_compact_format) const;
2020-01-31 10:49:10 +00:00
/// Returns address with only shard index and replica index or full address without shard index and replica index
2019-01-21 19:45:26 +00:00
static Address fromFullString(const String & address_full_string);
2018-12-02 02:17:08 +00:00
/// Returns resolved address if it does resolve.
std::optional<Poco::Net::SocketAddress> getResolvedAddress() const;
2019-01-21 19:45:26 +00:00
auto tuple() const { return std::tie(host_name, port, secure, user, password, default_database); }
bool operator==(const Address & other) const { return tuple() == other.tuple(); }
2018-12-02 02:17:08 +00:00
private:
bool isLocal(UInt16 clickhouse_port) const;
2013-12-07 16:51:29 +00:00
};
using Addresses = std::vector<Address>;
using AddressesWithFailover = std::vector<Addresses>;
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
///
/// Contains different path for permutations of:
/// - prefer_localhost_replica
/// Notes with prefer_localhost_replica==0 will contains local nodes.
/// - use_compact_format_in_distributed_parts_names
/// See toFullString()
///
/// This is cached to avoid looping by replicas in insertPathForInternalReplication().
struct ShardInfoInsertPathForInternalReplication
{
/// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=0
std::string prefer_localhost_replica;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0
std::string no_prefer_localhost_replica;
/// use_compact_format_in_distributed_parts_names=1
std::string compact;
};
struct ShardInfo
{
public:
bool isLocal() const { return !local_addresses.empty(); }
bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); }
size_t getLocalNodeCount() const { return local_addresses.size(); }
size_t getRemoteNodeCount() const { return per_replica_pools.size() - local_addresses.size(); }
size_t getAllNodeCount() const { return per_replica_pools.size(); }
bool hasInternalReplication() const { return has_internal_replication; }
/// Name of directory for asynchronous write to StorageDistributed if has_internal_replication
const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const;
ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication;
2017-04-25 15:21:03 +00:00
/// Number of the shard, the indexation begins with 1
2019-04-22 16:07:09 +00:00
UInt32 shard_num = 0;
UInt32 weight = 1;
Addresses local_addresses;
/// nullptr if there are no remote addresses
ConnectionPoolWithFailoverPtr pool;
/// Connection pool for each replica, contains nullptr for local replicas
ConnectionPoolPtrs per_replica_pools;
2018-09-01 20:28:46 +00:00
bool has_internal_replication = false;
};
using ShardsInfo = std::vector<ShardInfo>;
const ShardsInfo & getShardsInfo() const { return shards_info; }
const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; }
/// Returns addresses of some replicas according to specified `only_shard_num` and `only_replica_num`.
/// `only_shard_num` is 1-based index of a shard, 0 means all shards.
/// `only_replica_num` is 1-based index of a replica, 0 means all replicas.
std::vector<const Address *> filterAddressesByShardOrReplica(size_t only_shard_num, size_t only_replica_num) const;
const ShardInfo & getAnyShardInfo() const
{
if (shards_info.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty");
return shards_info.front();
}
2017-06-02 21:37:28 +00:00
/// The number of remote shards.
size_t getRemoteShardCount() const { return remote_shard_count; }
2017-06-02 21:37:28 +00:00
/// The number of clickhouse nodes located locally
/// we access the local nodes directly.
size_t getLocalShardCount() const { return local_shard_count; }
2017-06-02 21:37:28 +00:00
/// The number of all shards.
size_t getShardCount() const { return shards_info.size(); }
/// Returns an array of arrays of strings in the format 'escaped_host_name:port' for all replicas of all shards in the cluster.
std::vector<Strings> getHostIDs() const;
const String & getSecret() const { return secret; }
2017-06-02 21:37:28 +00:00
/// Get a subcluster consisting of one shard - index by count (from 0) of the shard of this cluster.
std::unique_ptr<Cluster> getClusterWithSingleShard(size_t index) const;
/// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster.
2018-11-21 04:04:24 +00:00
std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
2020-01-10 17:44:34 +00:00
/// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
2023-01-19 09:20:40 +00:00
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0) const;
/// Returns false if cluster configuration doesn't allow to use it for cross-replication.
/// NOTE: true does not mean, that it's actually a cross-replication cluster.
bool maybeCrossReplication() const;
/// Are distributed DDL Queries (ON CLUSTER Clause) allowed for this cluster
bool areDistributedDDLQueriesAllowed() const { return allow_distributed_ddl_queries; }
2023-08-15 17:03:32 +00:00
const String & getName() const { return name; }
private:
SlotToShard slot_to_shard;
public:
const SlotToShard & getSlotToShard() const { return slot_to_shard; }
2015-04-30 12:43:16 +00:00
private:
void initMisc();
/// For getClusterWithMultipleShards implementation.
2020-01-10 17:44:34 +00:00
struct SubclusterTag {};
Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices);
/// For getClusterWithReplicasAsShards implementation
2020-01-10 17:44:34 +00:00
struct ReplicasAsShardsTag {};
2023-01-19 09:20:40 +00:00
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard);
2023-12-05 12:31:24 +00:00
void addShard(
const Settings & settings,
Addresses addresses,
bool treat_local_as_remote,
UInt32 current_shard_num,
UInt32 weight = 1,
2023-12-05 18:07:51 +00:00
ShardInfoInsertPathForInternalReplication insert_paths = {},
2023-12-05 12:31:24 +00:00
bool internal_replication = false);
2023-04-07 16:26:23 +00:00
/// Inter-server secret
String secret;
2017-06-02 21:37:28 +00:00
/// Description of the cluster shards.
ShardsInfo shards_info;
2017-06-02 21:37:28 +00:00
/// Any remote shard.
ShardInfo * any_remote_shard_info = nullptr;
2017-06-02 21:37:28 +00:00
/// Non-empty is either addresses or addresses_with_failover.
/// The size and order of the elements in the corresponding array corresponds to shards_info.
2017-06-02 21:37:28 +00:00
/// An array of shards. For each shard, an array of replica addresses (servers that are considered identical).
2013-12-07 16:51:29 +00:00
AddressesWithFailover addresses_with_failover;
bool allow_distributed_ddl_queries = true;
size_t remote_shard_count = 0;
size_t local_shard_count = 0;
2021-07-01 10:18:29 +00:00
String name;
2013-12-07 16:51:29 +00:00
};
using ClusterPtr = std::shared_ptr<Cluster>;
2016-03-04 02:40:48 +00:00
2016-03-01 17:47:53 +00:00
class Clusters
2013-12-07 16:51:29 +00:00
{
2016-03-01 17:47:53 +00:00
public:
Clusters(const Poco::Util::AbstractConfiguration & config, const Settings & settings, MultiVersion<Macros>::Version macros, const String & config_prefix = "remote_servers");
2016-03-01 17:47:53 +00:00
Clusters(const Clusters &) = delete;
Clusters & operator=(const Clusters &) = delete;
ClusterPtr getCluster(const std::string & cluster_name) const;
void setCluster(const String & cluster_name, const ClusterPtr & cluster);
void updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config = nullptr);
2016-03-01 17:47:53 +00:00
using Impl = std::map<String, ClusterPtr>;
Impl getContainer() const;
protected:
/// setup outside of this class, stored to prevent deleting from impl on config update
std::unordered_set<std::string> automatic_clusters;
MultiVersion<Macros>::Version macros_;
2016-03-01 17:47:53 +00:00
Impl impl;
mutable std::mutex mutex;
2013-12-07 16:51:29 +00:00
};
2013-12-07 16:51:29 +00:00
}