2017-10-03 23:39:24 +00:00
|
|
|
#pragma once
|
2018-12-27 13:27:01 +00:00
|
|
|
|
2018-09-26 05:12:32 +00:00
|
|
|
#include <Core/BackgroundSchedulePool.h>
|
2018-12-27 13:27:01 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2019-10-22 10:31:28 +00:00
|
|
|
#include <Storages/Kafka/Buffer_fwd.h>
|
2019-08-20 11:17:57 +00:00
|
|
|
|
2017-12-16 01:45:11 +00:00
|
|
|
#include <Poco/Semaphore.h>
|
2018-12-27 13:27:01 +00:00
|
|
|
#include <ext/shared_ptr_helper.h>
|
|
|
|
|
|
|
|
#include <mutex>
|
2019-08-27 23:47:30 +00:00
|
|
|
#include <atomic>
|
|
|
|
|
2019-10-22 10:31:28 +00:00
|
|
|
namespace cppkafka
|
|
|
|
{
|
|
|
|
|
|
|
|
class Configuration;
|
|
|
|
|
|
|
|
}
|
2017-10-03 23:39:24 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
/** Implements a Kafka queue table engine that can be used as a persistent queue / buffer,
|
|
|
|
* or as a basic building block for creating pipelines with a continuous insertion / ETL.
|
|
|
|
*/
|
|
|
|
class StorageKafka : public ext::shared_ptr_helper<StorageKafka>, public IStorage
|
|
|
|
{
|
2019-08-26 19:07:29 +00:00
|
|
|
friend struct ext::shared_ptr_helper<StorageKafka>;
|
2017-10-03 23:39:24 +00:00
|
|
|
public:
|
|
|
|
std::string getName() const override { return "Kafka"; }
|
|
|
|
std::string getTableName() const override { return table_name; }
|
2019-03-29 20:31:06 +00:00
|
|
|
std::string getDatabaseName() const override { return database_name; }
|
2019-10-22 10:31:28 +00:00
|
|
|
|
2019-08-07 15:21:45 +00:00
|
|
|
bool supportsSettings() const override { return true; }
|
2019-10-22 10:31:28 +00:00
|
|
|
bool noPushingToViews() const override { return true; }
|
2017-10-03 23:39:24 +00:00
|
|
|
|
|
|
|
void startup() override;
|
|
|
|
void shutdown() override;
|
|
|
|
|
|
|
|
BlockInputStreams read(
|
|
|
|
const Names & column_names,
|
|
|
|
const SelectQueryInfo & query_info,
|
|
|
|
const Context & context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-10-03 23:39:24 +00:00
|
|
|
unsigned num_streams) override;
|
|
|
|
|
2019-08-20 11:17:57 +00:00
|
|
|
BlockOutputStreamPtr write(
|
|
|
|
const ASTPtr & query,
|
2019-08-27 23:47:30 +00:00
|
|
|
const Context & context) override;
|
2019-08-20 11:17:57 +00:00
|
|
|
|
2019-08-27 23:47:30 +00:00
|
|
|
void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override;
|
2017-10-03 23:39:24 +00:00
|
|
|
|
|
|
|
void updateDependencies() override;
|
|
|
|
|
2019-08-20 11:17:57 +00:00
|
|
|
void pushReadBuffer(ConsumerBufferPtr buf);
|
|
|
|
ConsumerBufferPtr popReadBuffer();
|
|
|
|
ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout);
|
|
|
|
|
|
|
|
ProducerBufferPtr createWriteBuffer();
|
2019-05-22 19:38:43 +00:00
|
|
|
|
|
|
|
const auto & getTopics() const { return topics; }
|
|
|
|
const auto & getFormatName() const { return format_name; }
|
|
|
|
const auto & getSchemaName() const { return schema_name; }
|
|
|
|
const auto & skipBroken() const { return skip_broken; }
|
|
|
|
|
2019-08-31 21:15:40 +00:00
|
|
|
void checkSettingCanBeChanged(const String & setting_name) const override;
|
2019-08-27 09:48:20 +00:00
|
|
|
|
2019-05-22 19:38:43 +00:00
|
|
|
protected:
|
|
|
|
StorageKafka(
|
|
|
|
const std::string & table_name_,
|
|
|
|
const std::string & database_name_,
|
|
|
|
Context & context_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const String & brokers_, const String & group_, const Names & topics_,
|
|
|
|
const String & format_name_, char row_delimiter_, const String & schema_name_,
|
|
|
|
size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken,
|
|
|
|
bool intermediate_commit_);
|
|
|
|
|
2017-10-03 23:39:24 +00:00
|
|
|
private:
|
2017-12-16 01:45:11 +00:00
|
|
|
// Configuration and state
|
2017-10-03 23:39:24 +00:00
|
|
|
String table_name;
|
|
|
|
String database_name;
|
2019-01-04 12:10:00 +00:00
|
|
|
Context global_context;
|
2019-12-13 01:21:40 +00:00
|
|
|
Context kafka_context;
|
2017-10-03 23:39:24 +00:00
|
|
|
Names topics;
|
2017-12-16 01:45:11 +00:00
|
|
|
const String brokers;
|
|
|
|
const String group;
|
2017-10-03 23:39:24 +00:00
|
|
|
const String format_name;
|
2019-05-21 11:24:32 +00:00
|
|
|
char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy.
|
2017-10-03 23:39:24 +00:00
|
|
|
const String schema_name;
|
2019-05-21 11:24:32 +00:00
|
|
|
size_t num_consumers; /// total number of consumers
|
|
|
|
UInt64 max_block_size; /// maximum block size for insertion into this table
|
|
|
|
|
2018-03-19 17:52:48 +00:00
|
|
|
/// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called).
|
|
|
|
/// In this case we still need to be able to shutdown() properly.
|
2019-05-21 11:24:32 +00:00
|
|
|
size_t num_created_consumers = 0; /// number of actually created consumers.
|
|
|
|
|
2017-12-16 01:45:11 +00:00
|
|
|
Poco::Logger * log;
|
2017-10-03 23:39:24 +00:00
|
|
|
|
2017-12-16 01:45:11 +00:00
|
|
|
// Consumer list
|
|
|
|
Poco::Semaphore semaphore;
|
2017-10-03 23:39:24 +00:00
|
|
|
std::mutex mutex;
|
2019-08-20 11:17:57 +00:00
|
|
|
std::vector<ConsumerBufferPtr> buffers; /// available buffers for Kafka consumers
|
2017-12-16 01:45:11 +00:00
|
|
|
|
2019-01-18 12:48:38 +00:00
|
|
|
size_t skip_broken;
|
|
|
|
|
2019-05-16 15:20:30 +00:00
|
|
|
bool intermediate_commit;
|
|
|
|
|
2017-12-16 01:45:11 +00:00
|
|
|
// Stream thread
|
2018-09-26 05:12:32 +00:00
|
|
|
BackgroundSchedulePool::TaskHolder task;
|
2017-12-16 01:45:11 +00:00
|
|
|
std::atomic<bool> stream_cancelled{false};
|
|
|
|
|
2019-08-20 11:17:57 +00:00
|
|
|
ConsumerBufferPtr createReadBuffer();
|
|
|
|
|
|
|
|
// Update Kafka configuration with values from CH user configuration.
|
|
|
|
void updateConfiguration(cppkafka::Configuration & conf);
|
2017-10-03 23:39:24 +00:00
|
|
|
|
2019-08-20 11:17:57 +00:00
|
|
|
void threadFunc();
|
2018-09-26 05:12:32 +00:00
|
|
|
bool streamToViews();
|
2018-10-16 18:21:27 +00:00
|
|
|
bool checkDependencies(const String & database_name, const String & table_name);
|
2017-10-03 23:39:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|