2012-05-21 20:38:34 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/shared_ptr_helper.h>
|
2016-08-26 21:25:05 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2020-06-03 23:50:47 +00:00
|
|
|
#include <Storages/Distributed/DirectoryMonitor.h>
|
2021-01-07 14:14:41 +00:00
|
|
|
#include <Storages/Distributed/DistributedSettings.h>
|
2021-07-23 16:30:18 +00:00
|
|
|
#include <Storages/getStructureOfRemoteTable.h>
|
2017-05-10 06:49:19 +00:00
|
|
|
#include <Common/SimpleIncrement.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Client/ConnectionPool.h>
|
|
|
|
#include <Client/ConnectionPoolWithFailover.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/logger_useful.h>
|
2019-04-08 05:13:16 +00:00
|
|
|
#include <Common/ActionBlocker.h>
|
2020-12-23 16:04:05 +00:00
|
|
|
#include <Interpreters/Cluster.h>
|
2012-05-21 20:38:34 +00:00
|
|
|
|
2020-12-23 16:04:05 +00:00
|
|
|
#include <pcg_random.hpp>
|
2012-05-21 20:38:34 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-06-13 16:31:28 +00:00
|
|
|
struct Settings;
|
2016-12-08 02:49:04 +00:00
|
|
|
class Context;
|
|
|
|
|
2020-07-22 17:21:08 +00:00
|
|
|
class IVolume;
|
|
|
|
using VolumePtr = std::shared_ptr<IVolume>;
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2021-01-09 12:26:37 +00:00
|
|
|
class IDisk;
|
|
|
|
using DiskPtr = std::shared_ptr<IDisk>;
|
|
|
|
|
2020-02-10 15:50:12 +00:00
|
|
|
class ExpressionActions;
|
|
|
|
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** A distributed table that resides on multiple servers.
|
|
|
|
* Uses data from the specified database and tables on each server.
|
2012-05-30 04:45:49 +00:00
|
|
|
*
|
2017-04-16 15:00:33 +00:00
|
|
|
* You can pass one address, not several.
|
|
|
|
* In this case, the table can be considered remote, rather than distributed.
|
2012-05-21 20:38:34 +00:00
|
|
|
*/
|
2021-06-15 19:55:21 +00:00
|
|
|
class StorageDistributed final : public shared_ptr_helper<StorageDistributed>, public IStorage, WithContext
|
2012-05-21 20:38:34 +00:00
|
|
|
{
|
2021-06-15 19:55:21 +00:00
|
|
|
friend struct shared_ptr_helper<StorageDistributed>;
|
2021-07-23 14:25:35 +00:00
|
|
|
friend class DistributedSink;
|
2017-04-01 07:20:54 +00:00
|
|
|
friend class StorageDistributedDirectoryMonitor;
|
2021-01-26 18:45:36 +00:00
|
|
|
friend class StorageSystemDistributionQueue;
|
2014-08-15 09:50:05 +00:00
|
|
|
|
2012-05-21 20:38:34 +00:00
|
|
|
public:
|
2022-03-10 15:23:49 +00:00
|
|
|
virtual ~StorageDistributed() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
std::string getName() const override { return "Distributed"; }
|
2019-07-09 15:40:21 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool supportsSampling() const override { return true; }
|
|
|
|
bool supportsFinal() const override { return true; }
|
|
|
|
bool supportsPrewhere() const override { return true; }
|
2021-05-25 00:49:24 +00:00
|
|
|
bool supportsSubcolumns() const override { return true; }
|
2022-02-15 23:14:09 +00:00
|
|
|
bool supportsDynamicSubcolumns() const override { return true; }
|
2020-03-29 07:43:40 +00:00
|
|
|
StoragePolicyPtr getStoragePolicy() const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-12-29 01:33:39 +00:00
|
|
|
/// Do not apply moving to PREWHERE optimization for distributed tables,
|
|
|
|
/// because we can't be sure that underlying table supports PREWHERE.
|
|
|
|
bool canMoveConditionsToPrewhere() const override { return false; }
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool isRemote() const override { return true; }
|
2021-07-23 16:30:18 +00:00
|
|
|
|
2022-03-14 17:29:18 +00:00
|
|
|
/// Snapshot for StorageDistributed contains descriptions
|
|
|
|
/// of columns of type Object for each shard at the moment
|
|
|
|
/// of the start of query.
|
2021-07-23 16:30:18 +00:00
|
|
|
struct SnapshotData : public StorageSnapshot::Data
|
|
|
|
{
|
|
|
|
ColumnsDescriptionByShardNum objects_by_shard;
|
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-07-12 14:54:02 +00:00
|
|
|
StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const override;
|
2022-02-15 23:14:09 +00:00
|
|
|
StorageSnapshotPtr getStorageSnapshotForQuery(
|
|
|
|
const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query) const override;
|
2021-07-12 14:54:02 +00:00
|
|
|
|
2021-04-22 13:32:17 +00:00
|
|
|
QueryProcessingStage::Enum
|
2021-07-09 03:15:41 +00:00
|
|
|
getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
|
2018-04-19 14:47:09 +00:00
|
|
|
|
2020-08-03 13:54:14 +00:00
|
|
|
Pipe read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-06-02 15:54:39 +00:00
|
|
|
unsigned num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-09-25 13:19:26 +00:00
|
|
|
void read(
|
2020-09-18 14:16:53 +00:00
|
|
|
QueryPlan & query_plan,
|
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-11-10 12:02:22 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2020-09-18 14:16:53 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2020-09-25 13:03:12 +00:00
|
|
|
size_t /*max_block_size*/,
|
|
|
|
unsigned /*num_streams*/) override;
|
2020-09-18 14:16:53 +00:00
|
|
|
|
2020-08-26 16:41:30 +00:00
|
|
|
bool supportsParallelInsert() const override { return true; }
|
2021-01-26 18:45:36 +00:00
|
|
|
std::optional<UInt64> totalBytes(const Settings &) const override;
|
2020-08-26 16:41:30 +00:00
|
|
|
|
2021-07-23 14:25:35 +00:00
|
|
|
SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-09-14 17:48:18 +00:00
|
|
|
QueryPipelineBuilderPtr distributedWrite(const ASTInsertQuery & query, ContextPtr context) override;
|
2021-04-04 10:27:45 +00:00
|
|
|
|
2018-06-09 15:48:22 +00:00
|
|
|
/// Removes temporary data in local filesystem.
|
2021-04-10 23:33:54 +00:00
|
|
|
void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override;
|
2019-08-27 20:43:08 +00:00
|
|
|
|
2020-04-07 14:05:51 +00:00
|
|
|
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
|
2018-04-21 00:35:20 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override;
|
2019-12-26 18:17:05 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// in the sub-tables, you need to manually add and delete columns
|
|
|
|
/// the structure of the sub-table is not checked
|
2021-10-25 17:49:49 +00:00
|
|
|
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-06-06 17:06:14 +00:00
|
|
|
void startup() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
void shutdown() override;
|
2021-07-01 13:21:38 +00:00
|
|
|
void flush() override;
|
2020-07-16 20:35:23 +00:00
|
|
|
void drop() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-09-03 17:04:30 +00:00
|
|
|
bool storesDataOnDisk() const override { return data_volume != nullptr; }
|
2020-01-20 17:54:52 +00:00
|
|
|
Strings getDataPaths() const override;
|
2018-02-21 19:26:59 +00:00
|
|
|
|
2021-01-26 18:45:36 +00:00
|
|
|
ActionLock getActionLock(StorageActionBlockType type) override;
|
|
|
|
|
|
|
|
NamesAndTypesList getVirtuals() const override;
|
|
|
|
|
|
|
|
/// Used by InterpreterInsertQuery
|
|
|
|
std::string getRemoteDatabaseName() const { return remote_database; }
|
|
|
|
std::string getRemoteTableName() const { return remote_table; }
|
|
|
|
ClusterPtr getCluster() const;
|
|
|
|
|
|
|
|
/// Used by InterpreterSystemQuery
|
2021-04-10 23:33:54 +00:00
|
|
|
void flushClusterNodesAllData(ContextPtr context);
|
2021-01-26 18:45:36 +00:00
|
|
|
|
|
|
|
/// Used by ClusterCopier
|
|
|
|
size_t getShardCount() const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
StorageDistributed(
|
|
|
|
const StorageID & id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const ConstraintsDescription & constraints_,
|
2021-04-23 12:18:23 +00:00
|
|
|
const String & comment,
|
2021-01-26 18:45:36 +00:00
|
|
|
const String & remote_database_,
|
|
|
|
const String & remote_table_,
|
|
|
|
const String & cluster_name_,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context_,
|
2021-01-26 18:45:36 +00:00
|
|
|
const ASTPtr & sharding_key_,
|
|
|
|
const String & storage_policy_name_,
|
|
|
|
const String & relative_data_path_,
|
|
|
|
const DistributedSettings & distributed_settings_,
|
|
|
|
bool attach_,
|
2021-08-20 14:05:53 +00:00
|
|
|
ClusterPtr owned_cluster_ = {},
|
|
|
|
ASTPtr remote_table_function_ptr_ = {});
|
2021-01-26 18:45:36 +00:00
|
|
|
|
|
|
|
StorageDistributed(
|
|
|
|
const StorageID & id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const ConstraintsDescription & constraints_,
|
|
|
|
ASTPtr remote_table_function_ptr_,
|
|
|
|
const String & cluster_name_,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context_,
|
2021-01-26 18:45:36 +00:00
|
|
|
const ASTPtr & sharding_key_,
|
|
|
|
const String & storage_policy_name_,
|
|
|
|
const String & relative_data_path_,
|
|
|
|
const DistributedSettings & distributed_settings_,
|
|
|
|
bool attach,
|
|
|
|
ClusterPtr owned_cluster_ = {});
|
|
|
|
|
|
|
|
void renameOnDisk(const String & new_path_to_table_data);
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; }
|
|
|
|
const String & getShardingKeyColumnName() const { return sharding_key_column_name; }
|
2020-07-23 14:10:48 +00:00
|
|
|
const String & getRelativeDataPath() const { return relative_data_path; }
|
2014-08-13 11:26:13 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// create directory monitors for each existing subdirectory
|
2021-01-09 12:26:37 +00:00
|
|
|
void createDirectoryMonitors(const DiskPtr & disk);
|
2020-01-20 17:54:52 +00:00
|
|
|
/// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
|
2021-06-24 07:07:31 +00:00
|
|
|
StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup);
|
2021-01-26 18:45:36 +00:00
|
|
|
|
2020-06-03 23:50:47 +00:00
|
|
|
/// Return list of metrics for all created monitors
|
|
|
|
/// (note that monitors are created lazily, i.e. until at least one INSERT executed)
|
2021-01-26 18:45:36 +00:00
|
|
|
///
|
|
|
|
/// Used by StorageSystemDistributionQueue
|
2020-06-03 23:50:47 +00:00
|
|
|
std::vector<StorageDistributedDirectoryMonitor::Status> getDirectoryMonitorsStatuses() const;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
static IColumn::Selector createSelector(ClusterPtr cluster, const ColumnWithTypeAndName & result);
|
2020-03-24 07:51:54 +00:00
|
|
|
/// Apply the following settings:
|
|
|
|
/// - optimize_skip_unused_shards
|
|
|
|
/// - force_optimize_skip_unused_shards
|
2021-07-23 16:47:43 +00:00
|
|
|
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const;
|
2021-07-24 01:52:18 +00:00
|
|
|
|
|
|
|
ClusterPtr skipUnusedShards(
|
2021-08-20 13:33:30 +00:00
|
|
|
ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
|
2020-03-24 07:51:54 +00:00
|
|
|
|
2021-07-17 13:31:06 +00:00
|
|
|
/// This method returns optimal query processing stage.
|
|
|
|
///
|
|
|
|
/// Here is the list of stages (from the less optimal to more optimal):
|
|
|
|
/// - WithMergeableState
|
|
|
|
/// - WithMergeableStateAfterAggregation
|
|
|
|
/// - WithMergeableStateAfterAggregationAndLimit
|
|
|
|
/// - Complete
|
|
|
|
///
|
|
|
|
/// Some simple queries w/o GROUP BY/DISTINCT can use more optimal stage.
|
|
|
|
///
|
|
|
|
/// Also in case of optimize_distributed_group_by_sharding_key=1 the queries
|
|
|
|
/// with GROUP BY/DISTINCT sharding_key can also use more optimal stage.
|
|
|
|
/// (see also optimize_skip_unused_shards/allow_nondeterministic_optimize_skip_unused_shards)
|
|
|
|
///
|
|
|
|
/// @return QueryProcessingStage or empty std::optoinal
|
|
|
|
/// (in this case regular WithMergeableState should be used)
|
|
|
|
std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const;
|
2020-03-24 07:51:54 +00:00
|
|
|
|
2020-12-23 16:04:05 +00:00
|
|
|
size_t getRandomShardIndex(const Cluster::ShardsInfo & shards);
|
2022-03-08 14:24:39 +00:00
|
|
|
std::string getClusterName() const { return cluster_name.empty() ? "<remote>" : cluster_name; }
|
2020-12-23 16:04:05 +00:00
|
|
|
|
2021-01-07 14:14:41 +00:00
|
|
|
const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; }
|
|
|
|
|
2021-01-27 18:43:41 +00:00
|
|
|
void delayInsertOrThrowIfNeeded() const;
|
2021-01-26 18:45:37 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
String remote_database;
|
|
|
|
String remote_table;
|
2018-07-24 13:10:34 +00:00
|
|
|
ASTPtr remote_table_function_ptr;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-05-30 21:57:37 +00:00
|
|
|
Poco::Logger * log;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// Used to implement TableFunctionRemote.
|
|
|
|
std::shared_ptr<Cluster> owned_cluster;
|
|
|
|
|
|
|
|
/// Is empty if this storage implements TableFunctionRemote.
|
|
|
|
const String cluster_name;
|
|
|
|
|
|
|
|
bool has_sharding_key;
|
2020-06-16 18:49:04 +00:00
|
|
|
bool sharding_key_is_deterministic = false;
|
2017-04-01 07:20:54 +00:00
|
|
|
ExpressionActionsPtr sharding_key_expr;
|
|
|
|
String sharding_key_column_name;
|
2017-05-10 06:39:37 +00:00
|
|
|
|
|
|
|
/// Used for global monotonic ordering of files to send.
|
|
|
|
SimpleIncrement file_names_increment;
|
2017-11-03 21:50:22 +00:00
|
|
|
|
2019-04-08 05:13:16 +00:00
|
|
|
ActionBlocker monitors_blocker;
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
String relative_data_path;
|
2020-07-23 14:10:48 +00:00
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
/// Can be empty if relative_data_path is empty. In this case, a directory for the data to be sent is not created.
|
2020-07-23 14:10:48 +00:00
|
|
|
StoragePolicyPtr storage_policy;
|
2020-09-15 09:26:56 +00:00
|
|
|
/// The main volume to store data.
|
|
|
|
/// Storage policy may have several configured volumes, but second and other volumes are used for parts movement in MergeTree engine.
|
|
|
|
/// For Distributed engine such configuration doesn't make sense and only the first (main) volume will be used to store data.
|
|
|
|
/// Other volumes will be ignored. It's needed to allow using the same multi-volume policy both for Distributed and other engines.
|
|
|
|
VolumePtr data_volume;
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2021-01-07 14:14:41 +00:00
|
|
|
DistributedSettings distributed_settings;
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
struct ClusterNodeData
|
|
|
|
{
|
2021-02-08 19:07:30 +00:00
|
|
|
std::shared_ptr<StorageDistributedDirectoryMonitor> directory_monitor;
|
2020-06-03 00:10:39 +00:00
|
|
|
ConnectionPoolPtr connection_pool;
|
2020-01-20 17:54:52 +00:00
|
|
|
};
|
|
|
|
std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
|
2020-06-03 23:50:47 +00:00
|
|
|
mutable std::mutex cluster_nodes_mutex;
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2020-12-23 16:04:05 +00:00
|
|
|
// For random shard index generation
|
|
|
|
mutable std::mutex rng_mutex;
|
|
|
|
pcg64 rng;
|
2012-05-21 20:38:34 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|