ClickHouse/dbms/include/DB/DataStreams/RemoteBlockInputStream.h

174 lines
5.5 KiB
C++
Raw Normal View History

2012-05-17 19:15:53 +00:00
#pragma once
2015-09-29 19:19:54 +00:00
#include <common/logger_useful.h>
2012-05-17 19:15:53 +00:00
#include <DB/DataStreams/IProfilingBlockInputStream.h>
#include <DB/Common/Throttler.h>
#include <DB/Interpreters/Context.h>
#include <DB/Client/ConnectionPool.h>
2015-11-06 17:44:01 +00:00
#include <DB/Client/MultiplexedConnections.h>
#include <DB/Interpreters/Cluster.h>
2012-05-17 19:15:53 +00:00
namespace DB
{
2017-02-10 20:13:43 +00:00
/** This class allowes one to launch queries on remote replicas of one shard and get results
2012-05-17 19:15:53 +00:00
*/
class RemoteBlockInputStream : public IProfilingBlockInputStream
{
public:
2017-02-10 20:13:43 +00:00
/// Takes already set connection
2015-11-06 17:44:01 +00:00
RemoteBlockInputStream(Connection & connection_, const String & query_, const Settings * settings_,
ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
const Context & context_ = getDefaultContext());
2017-02-10 20:13:43 +00:00
/// Takes already set connection. Takes the ownership of a connection from a pool
2015-11-06 17:44:01 +00:00
RemoteBlockInputStream(ConnectionPool::Entry & pool_entry_, const String & query_, const Settings * settings_,
ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
const Context & context_ = getDefaultContext());
2017-02-10 20:13:43 +00:00
/// Takes a pool and gets one or several connections from it
RemoteBlockInputStream(ConnectionPoolPtr & pool_, const String & query_, const Settings * settings_,
2015-11-06 17:44:01 +00:00
ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
const Context & context_ = getDefaultContext());
2017-02-10 20:13:43 +00:00
/// Takes a pool for each shard and gets one or several connections from it
2015-11-06 17:44:01 +00:00
RemoteBlockInputStream(ConnectionPoolsPtr & pools_, const String & query_, const Settings * settings_,
ThrottlerPtr throttler_ = nullptr, const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
const Context & context_ = getDefaultContext());
2012-10-18 19:24:46 +00:00
2015-11-06 17:44:01 +00:00
~RemoteBlockInputStream() override;
2012-05-17 19:15:53 +00:00
/// Specify how we allocate connections on a shard.
2016-03-01 17:47:53 +00:00
void setPoolMode(PoolMode pool_mode_);
2012-05-17 19:15:53 +00:00
2017-02-10 20:13:43 +00:00
/// Besides blocks themself, get blocks' extra info
2015-11-06 17:44:01 +00:00
void appendExtraInfo();
2017-02-10 20:13:43 +00:00
/// Sends query (initiates calculation) before read()
2015-11-06 17:44:01 +00:00
void readPrefix() override;
2012-10-18 19:24:46 +00:00
2017-02-10 20:13:43 +00:00
/** Prevent default progress notification because progress' callback is
called by its own
2012-05-17 19:15:53 +00:00
*/
void progress(const Progress & value) override {}
2012-05-17 19:15:53 +00:00
2015-11-06 17:44:01 +00:00
void cancel() override;
2012-10-18 19:24:46 +00:00
2015-11-06 17:44:01 +00:00
String getName() const override { return "Remote"; }
2015-10-12 14:53:16 +00:00
2015-11-06 17:44:01 +00:00
String getID() const override
2015-10-12 14:53:16 +00:00
{
2015-11-06 17:44:01 +00:00
std::stringstream res;
res << this;
return res.str();
2015-10-12 14:53:16 +00:00
}
BlockExtraInfo getBlockExtraInfo() const override
{
2015-11-06 17:44:01 +00:00
return multiplexed_connections->getBlockExtraInfo();
2015-10-12 14:53:16 +00:00
}
2012-10-20 02:10:47 +00:00
protected:
2017-02-10 20:13:43 +00:00
/// Send all temporary tables to remote servers
2015-11-06 17:44:01 +00:00
void sendExternalTables();
2015-11-06 17:44:01 +00:00
Block readImpl() override;
2015-11-06 17:44:01 +00:00
void readSuffixImpl() override;
2013-09-13 20:33:09 +00:00
2017-02-10 20:13:43 +00:00
/// Creates an object to talk to one shard's replicas performing query
2015-11-06 17:44:01 +00:00
void createMultiplexedConnections();
2017-02-10 20:13:43 +00:00
/// Returns true if query was sent
2015-11-06 17:44:01 +00:00
bool isQueryPending() const;
2017-02-10 20:13:43 +00:00
/// Returns true if exception was thrown
2015-11-06 17:44:01 +00:00
bool hasThrownException() const;
private:
2015-11-06 17:44:01 +00:00
void init(const Settings * settings_);
2015-11-06 17:44:01 +00:00
void sendQuery();
2017-02-10 20:13:43 +00:00
/// If wasn't sent yet, send request to cancell all connections to replicas
2015-11-06 17:44:01 +00:00
void tryCancel(const char * reason);
/// ITable::read requires a Context, therefore we should create one if the user can't supply it
static Context & getDefaultContext()
{
static Context instance;
return instance;
}
2012-05-17 19:15:53 +00:00
private:
2017-02-10 20:13:43 +00:00
/// Already set connection
ConnectionPool::Entry pool_entry;
Connection * connection = nullptr;
2015-11-06 17:44:01 +00:00
2017-02-10 20:13:43 +00:00
/// One shard's connections pool
ConnectionPoolPtr pool = nullptr;
2015-11-06 17:44:01 +00:00
2017-02-10 20:13:43 +00:00
/// Connections pools of one or several shards
2015-11-06 17:44:01 +00:00
ConnectionPoolsPtr pools;
std::unique_ptr<MultiplexedConnections> multiplexed_connections;
2012-05-23 19:51:30 +00:00
const String query;
bool send_settings;
Settings settings;
2017-02-10 20:13:43 +00:00
/// If != nullptr, used to limit network trafic
ThrottlerPtr throttler;
2017-02-10 20:13:43 +00:00
/// Temporary tables needed to be sent to remote servers
Tables external_tables;
2012-05-17 19:15:53 +00:00
QueryProcessingStage::Enum stage;
Context context;
2012-05-17 19:15:53 +00:00
2017-02-10 20:13:43 +00:00
/// Threads for reading from temporary tables and following sending of data
/// to remote servers for GLOBAL-subqueries
std::vector<ExternalTablesData> external_tables_data;
std::mutex external_tables_mutex;
2017-02-10 20:13:43 +00:00
/// Connections to replicas are established, but no queries are sent yet
std::atomic<bool> established { false };
2017-02-10 20:13:43 +00:00
/// Query is sent (used before getting first block)
std::atomic<bool> sent_query { false };
2017-02-10 20:13:43 +00:00
/** All data from all replicas are received, before EndOfStream packet.
* To prevent desynchronization, if not all data is read before object
* destruction, it's required to send cancel query request to replicas and
* read all packets before EndOfStream
2012-10-18 19:00:49 +00:00
*/
std::atomic<bool> finished { false };
2017-02-10 20:13:43 +00:00
/** Cancel query request was sent to all replicas beacuse data is not needed anymore
* This behaviour may occur when:
* - data size is already satisfactory (when using LIMIT, for example)
* - an exception was thrown from client side
2012-10-18 19:00:49 +00:00
*/
std::atomic<bool> was_cancelled { false };
2017-02-10 20:13:43 +00:00
/** An exception from replica was received. No need in receiving more packets or
* requesting to cancel query execution
*/
std::atomic<bool> got_exception_from_replica { false };
2017-02-10 20:13:43 +00:00
/** Unkown packet was received from replica. No need in receiving more packets or
* requesting to cancel query execution
*/
std::atomic<bool> got_unknown_packet_from_replica { false };
2012-10-18 19:00:49 +00:00
2015-10-12 14:53:16 +00:00
bool append_extra_info = false;
2016-03-01 17:47:53 +00:00
PoolMode pool_mode = PoolMode::GET_MANY;
2015-10-12 14:53:16 +00:00
Logger * log = &Logger::get("RemoteBlockInputStream");
2012-05-17 19:15:53 +00:00
};
}