ClickHouse/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h

65 lines
2.2 KiB
C++
Raw Normal View History

#pragma once
#include <Parsers/formatAST.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Core/Block.h>
#include <atomic>
#include <memory>
#include <chrono>
namespace Poco
{
class Logger;
}
2015-04-16 06:12:35 +00:00
namespace DB
{
2016-01-28 01:00:42 +00:00
class StorageDistributed;
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
2016-01-28 01:00:42 +00:00
/** The write is asynchronous - the data is first written to the local filesystem, and then sent to the remote servers.
2017-04-16 15:00:33 +00:00
* If the Distributed table uses more than one shard, then in order to support the write,
* when creating the table, an additional parameter must be specified for ENGINE - the sharding key.
* Sharding key is an arbitrary expression from the columns. For example, rand() or UserID.
* When writing, the data block is splitted by the remainder of the division of the sharding key by the total weight of the shards,
* and the resulting blocks are written in a compressed Native format in separate directories for sending.
* For each destination address (each directory with data to send), a separate thread is created in StorageDistributed,
* which monitors the directory and sends data. */
class DistributedBlockOutputStream : public IBlockOutputStream
{
public:
DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_ = 0);
2014-08-15 09:56:22 +00:00
void write(const Block & block) override;
void writePrefix() override { deadline = std::chrono::system_clock::now() + std::chrono::seconds(insert_timeout); }
private:
IColumn::Selector createSelector(Block block);
2016-01-26 01:56:42 +00:00
void writeSplit(const Block & block);
2016-01-26 01:56:42 +00:00
void writeImpl(const Block & block, const size_t shard_id = 0);
2016-01-26 01:56:42 +00:00
void writeToLocal(const Block & block, const size_t repeats);
2016-01-26 01:56:42 +00:00
void writeToShard(const Block & block, const std::vector<std::string> & dir_names);
void writeToShardDirect(const Block & block, const std::vector<std::string> & dir_names, std::atomic<bool> & timeout_exceeded);
2016-01-28 01:00:42 +00:00
private:
StorageDistributed & storage;
ASTPtr query_ast;
ClusterPtr cluster;
bool insert_sync = true;
UInt64 insert_timeout = 1;
size_t blocks_inserted = 0;
std::chrono::system_clock::time_point deadline;
Poco::Logger * log;
};
}