ClickHouse/src/Storages/StorageDistributed.h

206 lines
7.9 KiB
C++
Raw Normal View History

2012-05-21 20:38:34 +00:00
#pragma once
2017-06-06 17:18:32 +00:00
#include <ext/shared_ptr_helper.h>
#include <Storages/IStorage.h>
#include <Storages/Distributed/DirectoryMonitor.h>
#include <Common/SimpleIncrement.h>
#include <Client/ConnectionPool.h>
#include <Client/ConnectionPoolWithFailover.h>
2018-07-25 12:31:47 +00:00
#include <Parsers/ASTFunction.h>
2016-03-25 11:48:45 +00:00
#include <common/logger_useful.h>
#include <Common/ActionBlocker.h>
2012-05-21 20:38:34 +00:00
namespace DB
{
struct Settings;
2016-12-08 02:49:04 +00:00
class Context;
2020-07-22 17:21:08 +00:00
class IVolume;
using VolumePtr = std::shared_ptr<IVolume>;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
2016-12-08 02:49:04 +00:00
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
2017-04-16 15:00:33 +00:00
/** A distributed table that resides on multiple servers.
* Uses data from the specified database and tables on each server.
2012-05-30 04:45:49 +00:00
*
2017-04-16 15:00:33 +00:00
* You can pass one address, not several.
* In this case, the table can be considered remote, rather than distributed.
2012-05-21 20:38:34 +00:00
*/
class StorageDistributed final : public ext::shared_ptr_helper<StorageDistributed>, public IStorage
2012-05-21 20:38:34 +00:00
{
2019-08-26 19:07:29 +00:00
friend struct ext::shared_ptr_helper<StorageDistributed>;
friend class DistributedBlockOutputStream;
friend class StorageDistributedDirectoryMonitor;
2012-05-21 20:38:34 +00:00
public:
~StorageDistributed() override;
static StoragePtr createWithOwnCluster(
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
const ColumnsDescription & columns_,
const String & remote_database_, /// database on remote servers.
const String & remote_table_, /// The name of the table on the remote servers.
ClusterPtr owned_cluster_,
const Context & context_);
2018-07-27 21:33:30 +00:00
static StoragePtr createWithOwnCluster(
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
const ColumnsDescription & columns_,
2018-07-25 12:31:47 +00:00
ASTPtr & remote_table_function_ptr_, /// Table function ptr.
ClusterPtr & owned_cluster_,
const Context & context_);
std::string getName() const override { return "Distributed"; }
bool supportsSampling() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsPrewhere() const override { return true; }
StoragePolicyPtr getStoragePolicy() const override;
bool isRemote() const override { return true; }
QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override;
2020-08-03 13:54:14 +00:00
Pipe read(
const Names & column_names,
const StorageMetadataPtr & /*metadata_snapshot*/,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
2017-06-02 15:54:39 +00:00
unsigned num_streams) override;
bool supportsParallelInsert() const override { return true; }
BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
2018-06-09 15:48:22 +00:00
/// Removes temporary data in local filesystem.
2020-06-18 16:10:47 +00:00
void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override;
2019-08-27 20:43:08 +00:00
2020-04-07 14:05:51 +00:00
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
void renameOnDisk(const String & new_path_to_table_data);
2018-04-21 00:35:20 +00:00
2020-06-10 11:16:31 +00:00
void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override;
2019-12-26 18:17:05 +00:00
2017-04-16 15:00:33 +00:00
/// in the sub-tables, you need to manually add and delete columns
/// the structure of the sub-table is not checked
2020-06-18 16:10:47 +00:00
void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
void startup() override;
void shutdown() override;
void drop() override;
Strings getDataPaths() const override;
const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; }
const String & getShardingKeyColumnName() const { return sharding_key_column_name; }
size_t getShardCount() const;
2020-07-23 14:10:48 +00:00
const String & getRelativeDataPath() const { return relative_data_path; }
std::string getRemoteDatabaseName() const { return remote_database; }
std::string getRemoteTableName() const { return remote_table; }
std::string getClusterName() const { return cluster_name; } /// Returns empty string if tables is used by TableFunctionRemote
/// create directory monitors for each existing subdirectory
void createDirectoryMonitors(const std::string & disk);
/// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & disk, const std::string & name);
/// Return list of metrics for all created monitors
/// (note that monitors are created lazily, i.e. until at least one INSERT executed)
std::vector<StorageDistributedDirectoryMonitor::Status> getDirectoryMonitorsStatuses() const;
void flushClusterNodesAllData();
ClusterPtr getCluster() const;
static IColumn::Selector createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result);
/// Apply the following settings:
/// - optimize_skip_unused_shards
/// - force_optimize_skip_unused_shards
ClusterPtr getOptimizedCluster(const Context &, const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query_ptr) const;
ClusterPtr skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const StorageMetadataPtr & metadata_snapshot, const Context & context) const;
ActionLock getActionLock(StorageActionBlockType type) override;
NamesAndTypesList getVirtuals() const override;
2020-04-27 13:55:30 +00:00
String remote_database;
String remote_table;
ASTPtr remote_table_function_ptr;
std::unique_ptr<Context> global_context;
2020-05-30 21:57:37 +00:00
Poco::Logger * log;
/// Used to implement TableFunctionRemote.
std::shared_ptr<Cluster> owned_cluster;
/// Is empty if this storage implements TableFunctionRemote.
const String cluster_name;
bool has_sharding_key;
bool sharding_key_is_deterministic = false;
ExpressionActionsPtr sharding_key_expr;
String sharding_key_column_name;
/// Used for global monotonic ordering of files to send.
SimpleIncrement file_names_increment;
ActionBlocker monitors_blocker;
protected:
StorageDistributed(
2019-12-04 16:06:55 +00:00
const StorageID & id_,
const ColumnsDescription & columns_,
2019-08-24 21:20:20 +00:00
const ConstraintsDescription & constraints_,
const String & remote_database_,
const String & remote_table_,
const String & cluster_name_,
const Context & context_,
const ASTPtr & sharding_key_,
2020-07-23 14:10:48 +00:00
const String & storage_policy_name_,
2019-10-25 19:07:47 +00:00
const String & relative_data_path_,
2019-08-03 11:02:40 +00:00
bool attach_);
2018-07-27 21:33:30 +00:00
StorageDistributed(
2019-12-04 16:06:55 +00:00
const StorageID & id_,
const ColumnsDescription & columns_,
2019-08-24 21:20:20 +00:00
const ConstraintsDescription & constraints_,
ASTPtr remote_table_function_ptr_,
const String & cluster_name_,
const Context & context_,
const ASTPtr & sharding_key_,
2020-07-23 14:10:48 +00:00
const String & storage_policy_name_,
2019-10-25 19:07:47 +00:00
const String & relative_data_path_,
bool attach);
2018-12-05 15:48:06 +00:00
String relative_data_path;
2020-07-23 14:10:48 +00:00
/// Can be empty if relative_data_path is empty. In this case, a directory for the data to be sent is not created.
2020-07-23 14:10:48 +00:00
StoragePolicyPtr storage_policy;
/// The main volume to store data.
/// Storage policy may have several configured volumes, but second and other volumes are used for parts movement in MergeTree engine.
/// For Distributed engine such configuration doesn't make sense and only the first (main) volume will be used to store data.
/// Other volumes will be ignored. It's needed to allow using the same multi-volume policy both for Distributed and other engines.
VolumePtr data_volume;
struct ClusterNodeData
{
std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
2020-06-03 00:10:39 +00:00
ConnectionPoolPtr connection_pool;
2020-04-22 06:22:14 +00:00
void flushAllData() const;
void shutdownAndDropAllData() const;
};
std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
mutable std::mutex cluster_nodes_mutex;
2012-05-21 20:38:34 +00:00
};
}