2012-05-21 20:38:34 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/shared_ptr_helper.h>
|
2016-08-26 21:25:05 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2017-05-10 06:49:19 +00:00
|
|
|
#include <Common/SimpleIncrement.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Client/ConnectionPool.h>
|
|
|
|
#include <Client/ConnectionPoolWithFailover.h>
|
2019-03-22 12:08:30 +00:00
|
|
|
#include <Core/Settings.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/Cluster.h>
|
2018-07-25 12:31:47 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
2016-03-25 11:48:45 +00:00
|
|
|
#include <common/logger_useful.h>
|
2019-04-08 05:13:16 +00:00
|
|
|
#include <Common/ActionBlocker.h>
|
2012-05-21 20:38:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-12-08 02:49:04 +00:00
|
|
|
class Context;
|
2016-12-12 03:33:34 +00:00
|
|
|
class StorageDistributedDirectoryMonitor;
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
class Volume;
|
|
|
|
using VolumePtr = std::shared_ptr<Volume>;
|
|
|
|
|
2020-02-10 15:50:12 +00:00
|
|
|
class ExpressionActions;
|
|
|
|
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** A distributed table that resides on multiple servers.
|
|
|
|
* Uses data from the specified database and tables on each server.
|
2012-05-30 04:45:49 +00:00
|
|
|
*
|
2017-04-16 15:00:33 +00:00
|
|
|
* You can pass one address, not several.
|
|
|
|
* In this case, the table can be considered remote, rather than distributed.
|
2012-05-21 20:38:34 +00:00
|
|
|
*/
|
2020-03-19 23:48:53 +00:00
|
|
|
class StorageDistributed final : public ext::shared_ptr_helper<StorageDistributed>, public IStorage
|
2012-05-21 20:38:34 +00:00
|
|
|
{
|
2019-08-26 19:07:29 +00:00
|
|
|
friend struct ext::shared_ptr_helper<StorageDistributed>;
|
2017-04-01 07:20:54 +00:00
|
|
|
friend class DistributedBlockOutputStream;
|
|
|
|
friend class StorageDistributedDirectoryMonitor;
|
2014-08-15 09:50:05 +00:00
|
|
|
|
2012-05-21 20:38:34 +00:00
|
|
|
public:
|
2017-06-06 18:48:38 +00:00
|
|
|
~StorageDistributed() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-06 18:48:38 +00:00
|
|
|
static StoragePtr createWithOwnCluster(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & table_id_,
|
2018-03-12 13:47:01 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2018-07-24 13:10:34 +00:00
|
|
|
const String & remote_database_, /// database on remote servers.
|
|
|
|
const String & remote_table_, /// The name of the table on the remote servers.
|
|
|
|
ClusterPtr owned_cluster_,
|
|
|
|
const Context & context_);
|
2018-07-27 21:33:30 +00:00
|
|
|
|
2018-07-24 13:10:34 +00:00
|
|
|
static StoragePtr createWithOwnCluster(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & table_id_,
|
2018-07-24 13:10:34 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2018-07-25 12:31:47 +00:00
|
|
|
ASTPtr & remote_table_function_ptr_, /// Table function ptr.
|
2017-04-01 07:20:54 +00:00
|
|
|
ClusterPtr & owned_cluster_,
|
2017-05-23 18:37:14 +00:00
|
|
|
const Context & context_);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
std::string getName() const override { return "Distributed"; }
|
2019-07-09 15:40:21 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool supportsSampling() const override { return true; }
|
|
|
|
bool supportsFinal() const override { return true; }
|
|
|
|
bool supportsPrewhere() const override { return true; }
|
2020-03-29 07:43:40 +00:00
|
|
|
StoragePolicyPtr getStoragePolicy() const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
bool isRemote() const override { return true; }
|
|
|
|
|
2020-04-16 21:54:43 +00:00
|
|
|
/// Return true if distributed_group_by_no_merge may be applied.
|
2020-04-19 18:06:21 +00:00
|
|
|
bool canForceGroupByNoMerge(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const;
|
|
|
|
QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override;
|
2018-04-19 14:47:09 +00:00
|
|
|
|
2020-02-19 16:07:28 +00:00
|
|
|
Pipes read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2017-07-15 03:48:36 +00:00
|
|
|
const SelectQueryInfo & query_info,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Context & context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-06-02 15:54:39 +00:00
|
|
|
unsigned num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-27 18:26:24 +00:00
|
|
|
BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-06-09 15:48:22 +00:00
|
|
|
/// Removes temporary data in local filesystem.
|
2019-08-27 20:43:08 +00:00
|
|
|
void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override;
|
|
|
|
|
2020-04-07 14:05:51 +00:00
|
|
|
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
|
2020-01-20 17:54:52 +00:00
|
|
|
void renameOnDisk(const String & new_path_to_table_data);
|
2018-04-21 00:35:20 +00:00
|
|
|
|
2019-12-26 18:17:05 +00:00
|
|
|
void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) override;
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// in the sub-tables, you need to manually add and delete columns
|
|
|
|
/// the structure of the sub-table is not checked
|
2019-12-26 18:17:05 +00:00
|
|
|
void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-06 17:06:14 +00:00
|
|
|
void startup() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
void shutdown() override;
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
Strings getDataPaths() const override;
|
2018-02-21 19:26:59 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; }
|
|
|
|
const String & getShardingKeyColumnName() const { return sharding_key_column_name; }
|
|
|
|
size_t getShardCount() const;
|
2020-01-20 17:54:52 +00:00
|
|
|
std::pair<const std::string &, const std::string &> getPath();
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string getRemoteDatabaseName() const { return remote_database; }
|
|
|
|
std::string getRemoteTableName() const { return remote_table; }
|
|
|
|
std::string getClusterName() const { return cluster_name; } /// Returns empty string if tables is used by TableFunctionRemote
|
2014-08-13 11:26:13 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// create directory monitors for each existing subdirectory
|
2020-01-20 17:54:52 +00:00
|
|
|
void createDirectoryMonitors(const std::string & disk);
|
|
|
|
/// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
|
2020-04-14 18:12:08 +00:00
|
|
|
StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const std::string & disk, const std::string & name);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-05-10 04:19:02 +00:00
|
|
|
void flushClusterNodesAllData();
|
2019-04-08 05:13:16 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ClusterPtr getCluster() const;
|
|
|
|
|
2020-03-24 07:51:54 +00:00
|
|
|
/// Apply the following settings:
|
|
|
|
/// - optimize_skip_unused_shards
|
|
|
|
/// - force_optimize_skip_unused_shards
|
|
|
|
ClusterPtr getOptimizedCluster(const Context &, const ASTPtr & query_ptr) const;
|
|
|
|
ClusterPtr skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const Context & context) const;
|
|
|
|
|
2019-04-08 05:13:16 +00:00
|
|
|
ActionLock getActionLock(StorageActionBlockType type) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-28 10:38:57 +00:00
|
|
|
NamesAndTypesList getVirtuals() const override;
|
2020-04-27 13:55:30 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
String remote_database;
|
|
|
|
String remote_table;
|
2018-07-24 13:10:34 +00:00
|
|
|
ASTPtr remote_table_function_ptr;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
Context global_context;
|
2017-04-01 07:20:54 +00:00
|
|
|
Logger * log = &Logger::get("StorageDistributed");
|
|
|
|
|
|
|
|
/// Used to implement TableFunctionRemote.
|
|
|
|
std::shared_ptr<Cluster> owned_cluster;
|
|
|
|
|
|
|
|
/// Is empty if this storage implements TableFunctionRemote.
|
|
|
|
const String cluster_name;
|
|
|
|
|
|
|
|
bool has_sharding_key;
|
|
|
|
ExpressionActionsPtr sharding_key_expr;
|
|
|
|
String sharding_key_column_name;
|
2017-05-10 06:39:37 +00:00
|
|
|
|
|
|
|
/// Used for global monotonic ordering of files to send.
|
|
|
|
SimpleIncrement file_names_increment;
|
2017-11-03 21:50:22 +00:00
|
|
|
|
2019-04-08 05:13:16 +00:00
|
|
|
ActionBlocker monitors_blocker;
|
|
|
|
|
2017-11-03 21:50:22 +00:00
|
|
|
protected:
|
|
|
|
StorageDistributed(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & id_,
|
2018-03-06 20:18:34 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2019-08-24 21:20:20 +00:00
|
|
|
const ConstraintsDescription & constraints_,
|
2017-11-03 21:50:22 +00:00
|
|
|
const String & remote_database_,
|
|
|
|
const String & remote_table_,
|
|
|
|
const String & cluster_name_,
|
|
|
|
const Context & context_,
|
2017-12-30 03:49:02 +00:00
|
|
|
const ASTPtr & sharding_key_,
|
2020-01-20 17:54:52 +00:00
|
|
|
const String & storage_policy_,
|
2019-10-25 19:07:47 +00:00
|
|
|
const String & relative_data_path_,
|
2019-08-03 11:02:40 +00:00
|
|
|
bool attach_);
|
2018-07-27 21:33:30 +00:00
|
|
|
|
2018-07-24 13:10:34 +00:00
|
|
|
StorageDistributed(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & id_,
|
2018-07-24 13:10:34 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2019-08-24 21:20:20 +00:00
|
|
|
const ConstraintsDescription & constraints_,
|
2018-07-24 13:10:34 +00:00
|
|
|
ASTPtr remote_table_function_ptr_,
|
|
|
|
const String & cluster_name_,
|
|
|
|
const Context & context_,
|
|
|
|
const ASTPtr & sharding_key_,
|
2020-01-20 17:54:52 +00:00
|
|
|
const String & storage_policy_,
|
2019-10-25 19:07:47 +00:00
|
|
|
const String & relative_data_path_,
|
2018-07-24 13:10:34 +00:00
|
|
|
bool attach);
|
2018-12-05 15:48:06 +00:00
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
void createStorage();
|
|
|
|
|
|
|
|
String storage_policy;
|
|
|
|
String relative_data_path;
|
|
|
|
/// Can be empty if relative_data_path is empty. In this case, a directory for the data to be sent is not created.
|
|
|
|
VolumePtr volume;
|
|
|
|
|
|
|
|
struct ClusterNodeData
|
|
|
|
{
|
|
|
|
std::unique_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
|
|
|
|
ConnectionPoolPtr conneciton_pool;
|
|
|
|
|
2020-04-22 06:22:14 +00:00
|
|
|
void flushAllData() const;
|
|
|
|
void shutdownAndDropAllData() const;
|
2020-01-20 17:54:52 +00:00
|
|
|
};
|
|
|
|
std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
|
|
|
|
std::mutex cluster_nodes_mutex;
|
|
|
|
|
2012-05-21 20:38:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|