ClickHouse/dbms/Storages/Kafka/StorageKafka.h

121 lines
3.5 KiB
C++
Raw Normal View History

#pragma once
2018-12-27 13:27:01 +00:00
#include <Core/BackgroundSchedulePool.h>
2018-12-27 13:27:01 +00:00
#include <Storages/IStorage.h>
#include <Storages/Kafka/Buffer_fwd.h>
#include <Interpreters/Context.h>
#include <Poco/Semaphore.h>
2018-12-27 13:27:01 +00:00
#include <ext/shared_ptr_helper.h>
#include <mutex>
2019-08-27 23:47:30 +00:00
#include <atomic>
namespace cppkafka
{
class Configuration;
}
namespace DB
{
/** Implements a Kafka queue table engine that can be used as a persistent queue / buffer,
* or as a basic building block for creating pipelines with a continuous insertion / ETL.
*/
class StorageKafka final : public ext::shared_ptr_helper<StorageKafka>, public IStorage
{
2019-08-26 19:07:29 +00:00
friend struct ext::shared_ptr_helper<StorageKafka>;
public:
std::string getName() const override { return "Kafka"; }
2019-08-07 15:21:45 +00:00
bool supportsSettings() const override { return true; }
bool noPushingToViews() const override { return true; }
void startup() override;
void shutdown() override;
Pipes read(
const Names & column_names,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams) override;
BlockOutputStreamPtr write(
const ASTPtr & query,
2019-08-27 23:47:30 +00:00
const Context & context) override;
void pushReadBuffer(ConsumerBufferPtr buf);
ConsumerBufferPtr popReadBuffer();
ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout);
2020-02-04 21:19:34 +00:00
ProducerBufferPtr createWriteBuffer(const Block & header);
2019-05-22 19:38:43 +00:00
const auto & getTopics() const { return topics; }
const auto & getFormatName() const { return format_name; }
const auto & getSchemaName() const { return schema_name; }
const auto & skipBroken() const { return skip_broken; }
protected:
StorageKafka(
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
2019-05-22 19:38:43 +00:00
Context & context_,
const ColumnsDescription & columns_,
2019-12-04 16:06:55 +00:00
const String & brokers_,
const String & group_,
const Names & topics_,
const String & format_name_,
char row_delimiter_,
const String & schema_name_,
size_t num_consumers_,
UInt64 max_block_size_,
size_t skip_broken,
2019-05-22 19:38:43 +00:00
bool intermediate_commit_);
private:
// Configuration and state
Context global_context;
Context kafka_context;
Names topics;
const String brokers;
const String group;
const String format_name;
2019-05-21 11:24:32 +00:00
char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy.
const String schema_name;
2019-05-21 11:24:32 +00:00
size_t num_consumers; /// total number of consumers
UInt64 max_block_size; /// maximum block size for insertion into this table
/// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called).
/// In this case we still need to be able to shutdown() properly.
2019-05-21 11:24:32 +00:00
size_t num_created_consumers = 0; /// number of actually created consumers.
Poco::Logger * log;
// Consumer list
Poco::Semaphore semaphore;
std::mutex mutex;
std::vector<ConsumerBufferPtr> buffers; /// available buffers for Kafka consumers
size_t skip_broken;
bool intermediate_commit;
// Stream thread
BackgroundSchedulePool::TaskHolder task;
std::atomic<bool> stream_cancelled{false};
ConsumerBufferPtr createReadBuffer();
// Update Kafka configuration with values from CH user configuration.
void updateConfiguration(cppkafka::Configuration & conf);
void threadFunc();
bool streamToViews();
2019-12-03 16:25:32 +00:00
bool checkDependencies(const StorageID & table_id);
};
}