ClickHouse/dbms/src/Storages/StorageDistributed.h

144 lines
5.3 KiB
C++
Raw Normal View History

2012-05-21 20:38:34 +00:00
#pragma once
2017-06-06 17:18:32 +00:00
#include <ext/shared_ptr_helper.h>
#include <Storages/IStorage.h>
#include <Common/SimpleIncrement.h>
#include <Client/ConnectionPool.h>
#include <Client/ConnectionPoolWithFailover.h>
#include <Interpreters/Settings.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/ExpressionActions.h>
2016-03-25 11:48:45 +00:00
#include <common/logger_useful.h>
2012-05-21 20:38:34 +00:00
namespace DB
{
2016-12-08 02:49:04 +00:00
class Context;
2016-12-12 03:33:34 +00:00
class StorageDistributedDirectoryMonitor;
2016-12-08 02:49:04 +00:00
2017-04-16 15:00:33 +00:00
/** A distributed table that resides on multiple servers.
* Uses data from the specified database and tables on each server.
2012-05-30 04:45:49 +00:00
*
2017-04-16 15:00:33 +00:00
* You can pass one address, not several.
* In this case, the table can be considered remote, rather than distributed.
2012-05-21 20:38:34 +00:00
*/
class StorageDistributed : public ext::shared_ptr_helper<StorageDistributed>, public IStorage
2012-05-21 20:38:34 +00:00
{
friend class DistributedBlockOutputStream;
friend class StorageDistributedDirectoryMonitor;
2012-05-21 20:38:34 +00:00
public:
~StorageDistributed() override;
static StoragePtr createWithOwnCluster(
const std::string & table_name_,
const ColumnsDescription & columns_,
2017-04-16 15:00:33 +00:00
const String & remote_database_, /// database on remote servers.
const String & remote_table_, /// The name of the table on the remote servers.
ClusterPtr & owned_cluster_,
const Context & context_);
std::string getName() const override { return "Distributed"; }
std::string getTableName() const override { return table_name; }
bool supportsSampling() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsPrewhere() const override { return true; }
NameAndTypePair getColumn(const String & column_name) const override;
bool hasColumn(const String & column_name) const override;
bool isRemote() const override { return true; }
BlockInputStreams read(
const Names & column_names,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum & processed_stage,
2017-06-02 15:54:39 +00:00
size_t max_block_size,
unsigned num_streams) override;
2017-05-21 22:25:25 +00:00
BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override;
void drop() override {}
void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name) override { table_name = new_table_name; }
2017-04-16 15:00:33 +00:00
/// in the sub-tables, you need to manually add and delete columns
/// the structure of the sub-table is not checked
void alter(const AlterCommands & params, const String & database_name, const String & table_name, const Context & context) override;
void startup() override;
void shutdown() override;
String getDataPath() const override { return path; }
2017-04-16 15:00:33 +00:00
/// From each replica, get a description of the corresponding local table.
BlockInputStreams describe(const Context & context, const Settings & settings);
const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; }
const String & getShardingKeyColumnName() const { return sharding_key_column_name; }
size_t getShardCount() const;
const String & getPath() const { return path; }
std::string getRemoteDatabaseName() const { return remote_database; }
std::string getRemoteTableName() const { return remote_table; }
std::string getClusterName() const { return cluster_name; } /// Returns empty string if tables is used by TableFunctionRemote
/// create directory monitors for each existing subdirectory
void createDirectoryMonitors();
2017-08-10 04:37:35 +00:00
/// ensure directory monitor thread creation by subdirectory name
void requireDirectoryMonitor(const std::string & name);
/// ensure connection pool creation and return it
ConnectionPoolPtr requireConnectionPool(const std::string & name);
ClusterPtr getCluster() const;
String table_name;
String remote_database;
String remote_table;
2017-05-21 22:25:25 +00:00
const Context & context;
Logger * log = &Logger::get("StorageDistributed");
/// Used to implement TableFunctionRemote.
std::shared_ptr<Cluster> owned_cluster;
/// Is empty if this storage implements TableFunctionRemote.
const String cluster_name;
bool has_sharding_key;
ExpressionActionsPtr sharding_key_expr;
String sharding_key_column_name;
2017-04-16 15:00:33 +00:00
String path; /// Can be empty if data_path_ is empty. In this case, a directory for the data to be sent is not created.
struct ClusterNodeData
{
std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
ConnectionPoolPtr conneciton_pool;
/// Creates connection_pool if not exists.
void requireConnectionPool(const std::string & name, const StorageDistributed & storage);
/// Creates directory_monitor if not exists.
void requireDirectoryMonitor(const std::string & name, StorageDistributed & storage);
};
std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
std::mutex cluster_nodes_mutex;
/// Used for global monotonic ordering of files to send.
SimpleIncrement file_names_increment;
protected:
StorageDistributed(
const std::string & name_,
const ColumnsDescription & columns_,
const String & remote_database_,
const String & remote_table_,
const String & cluster_name_,
const Context & context_,
const ASTPtr & sharding_key_,
const String & data_path_);
2012-05-21 20:38:34 +00:00
};
}