2021-01-01 14:43:11 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-05-08 14:54:44 +00:00
|
|
|
#include <Core/PostgreSQL/Connection.h>
|
2021-04-08 12:43:02 +00:00
|
|
|
#include <Core/PostgreSQL/insertPostgreSQLValue.h>
|
2021-02-06 12:28:42 +00:00
|
|
|
|
|
|
|
#include <Core/BackgroundSchedulePool.h>
|
2021-02-19 10:40:59 +00:00
|
|
|
#include <Core/Names.h>
|
2021-01-31 19:03:03 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2021-02-13 20:46:52 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
2022-01-08 17:44:17 +00:00
|
|
|
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
|
2021-01-01 14:43:11 +00:00
|
|
|
|
|
|
|
|
2021-02-06 12:28:42 +00:00
|
|
|
namespace DB
|
2021-01-27 21:46:19 +00:00
|
|
|
{
|
2021-08-28 13:56:39 +00:00
|
|
|
struct SettingChange;
|
2021-01-27 21:46:19 +00:00
|
|
|
|
2022-01-08 17:44:17 +00:00
|
|
|
struct StorageInfo
|
|
|
|
{
|
|
|
|
StoragePtr storage;
|
|
|
|
PostgreSQLTableStructure::Attributes attributes;
|
|
|
|
|
|
|
|
StorageInfo(StoragePtr storage_, const PostgreSQLTableStructure::Attributes & attributes_)
|
|
|
|
: storage(storage_), attributes(attributes_) {}
|
2022-02-25 19:04:48 +00:00
|
|
|
|
|
|
|
StorageInfo(StoragePtr storage_, PostgreSQLTableStructure::Attributes && attributes_)
|
|
|
|
: storage(storage_), attributes(std::move(attributes_)) {}
|
2022-01-08 17:44:17 +00:00
|
|
|
};
|
2022-01-08 21:37:11 +00:00
|
|
|
using StorageInfos = std::unordered_map<String, StorageInfo>;
|
2022-01-08 17:44:17 +00:00
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
class MaterializedPostgreSQLConsumer
|
2021-01-01 14:43:11 +00:00
|
|
|
{
|
2022-01-08 21:37:11 +00:00
|
|
|
private:
|
|
|
|
struct StorageData
|
|
|
|
{
|
2023-12-06 10:39:04 +00:00
|
|
|
explicit StorageData(const StorageInfo & storage_info, Poco::Logger * log_);
|
|
|
|
|
|
|
|
size_t getColumnsNum() const { return table_description.sample_block.columns(); }
|
|
|
|
|
|
|
|
const Block & getSampleBlock() const { return table_description.sample_block; }
|
|
|
|
|
|
|
|
using ArrayInfo = std::unordered_map<size_t, PostgreSQLArrayInfo>;
|
|
|
|
|
|
|
|
const StoragePtr storage;
|
|
|
|
const ExternalResultDescription table_description;
|
|
|
|
const PostgreSQLTableStructure::Attributes columns_attributes;
|
2023-12-08 11:42:38 +00:00
|
|
|
const Names column_names;
|
2023-12-06 10:39:04 +00:00
|
|
|
const ArrayInfo array_info;
|
|
|
|
|
2023-12-14 13:01:27 +00:00
|
|
|
struct Buffer : private boost::noncopyable
|
2022-01-08 21:37:11 +00:00
|
|
|
{
|
2023-12-06 10:39:04 +00:00
|
|
|
Block sample_block;
|
2022-01-08 21:37:11 +00:00
|
|
|
MutableColumns columns;
|
2023-12-06 10:39:04 +00:00
|
|
|
ASTExpressionList columns_ast;
|
2022-01-08 21:37:11 +00:00
|
|
|
|
2023-12-06 10:39:04 +00:00
|
|
|
explicit Buffer(ColumnsWithTypeAndName && columns_, const ExternalResultDescription & table_description_);
|
2022-01-08 21:37:11 +00:00
|
|
|
|
2023-12-06 10:39:04 +00:00
|
|
|
void assertInsertIsPossible(size_t col_idx) const;
|
2022-01-08 21:37:11 +00:00
|
|
|
};
|
2023-12-14 13:01:27 +00:00
|
|
|
using BufferPtr = std::unique_ptr<Buffer>;
|
2022-01-08 21:37:11 +00:00
|
|
|
|
2023-12-14 13:01:27 +00:00
|
|
|
Buffer & getLastBuffer();
|
2022-01-08 21:37:11 +00:00
|
|
|
|
2023-12-14 13:01:27 +00:00
|
|
|
BufferPtr popBuffer();
|
|
|
|
|
|
|
|
void addBuffer(BufferPtr buffer);
|
|
|
|
|
|
|
|
void returnBuffer(BufferPtr buffer);
|
2023-12-06 10:39:04 +00:00
|
|
|
|
|
|
|
private:
|
2023-12-14 13:01:27 +00:00
|
|
|
std::deque<BufferPtr> buffers;
|
2022-01-08 21:37:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using Storages = std::unordered_map<String, StorageData>;
|
2021-02-08 19:32:30 +00:00
|
|
|
|
2022-01-08 21:37:11 +00:00
|
|
|
public:
|
2021-06-27 19:09:17 +00:00
|
|
|
MaterializedPostgreSQLConsumer(
|
2021-04-11 07:44:40 +00:00
|
|
|
ContextPtr context_,
|
2021-05-08 14:55:53 +00:00
|
|
|
std::shared_ptr<postgres::Connection> connection_,
|
2021-05-10 09:10:02 +00:00
|
|
|
const String & replication_slot_name_,
|
|
|
|
const String & publication_name_,
|
|
|
|
const String & start_lsn,
|
2022-01-08 17:44:17 +00:00
|
|
|
size_t max_block_size_,
|
2021-09-12 12:33:54 +00:00
|
|
|
bool schema_as_a_part_of_table_name_,
|
2022-01-08 21:37:11 +00:00
|
|
|
StorageInfos storages_,
|
2021-08-27 06:30:21 +00:00
|
|
|
const String & name_for_logger);
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2023-02-16 15:41:31 +00:00
|
|
|
bool consume();
|
2021-02-21 22:41:18 +00:00
|
|
|
|
2021-05-03 21:42:06 +00:00
|
|
|
/// Called from reloadFromSnapshot by replication handler. This method is needed to move a table back into synchronization
|
|
|
|
/// process if it was skipped due to schema changes.
|
2022-01-08 17:44:17 +00:00
|
|
|
void updateNested(const String & table_name, StorageInfo nested_storage_info, Int32 table_id, const String & table_start_lsn);
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2022-01-08 17:44:17 +00:00
|
|
|
void addNested(const String & postgres_table_name, StorageInfo nested_storage_info, const String & table_start_lsn);
|
2021-08-27 06:30:21 +00:00
|
|
|
|
2021-08-28 13:42:36 +00:00
|
|
|
void removeNested(const String & postgres_table_name);
|
|
|
|
|
2021-08-28 13:56:39 +00:00
|
|
|
void setSetting(const SettingChange & setting);
|
|
|
|
|
2021-01-01 14:43:11 +00:00
|
|
|
private:
|
2021-06-29 23:11:46 +00:00
|
|
|
void syncTables();
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2022-08-23 09:53:30 +00:00
|
|
|
void updateLsn();
|
|
|
|
|
2021-02-08 19:32:30 +00:00
|
|
|
String advanceLSN(std::shared_ptr<pqxx::nontransaction> ntx);
|
2021-01-27 21:46:19 +00:00
|
|
|
|
2021-02-08 23:23:51 +00:00
|
|
|
void processReplicationMessage(const char * replication_message, size_t size);
|
2021-02-03 16:13:18 +00:00
|
|
|
|
2021-08-27 06:30:21 +00:00
|
|
|
bool isSyncAllowed(Int32 relation_id, const String & relation_name);
|
2021-02-21 22:41:18 +00:00
|
|
|
|
2023-12-06 10:39:04 +00:00
|
|
|
static void insertDefaultValue(StorageData & storage_data, size_t column_idx);
|
|
|
|
void insertValue(StorageData & storage_data, const std::string & value, size_t column_idx);
|
2021-01-31 19:03:03 +00:00
|
|
|
|
2021-02-08 23:23:51 +00:00
|
|
|
enum class PostgreSQLQuery
|
|
|
|
{
|
|
|
|
INSERT,
|
|
|
|
UPDATE,
|
|
|
|
DELETE
|
|
|
|
};
|
|
|
|
|
2023-12-06 10:39:04 +00:00
|
|
|
void readTupleData(StorageData & storage_data, const char * message, size_t & pos, size_t size, PostgreSQLQuery type, bool old_value = false);
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-03-20 15:12:46 +00:00
|
|
|
template<typename T>
|
|
|
|
static T unhexN(const char * message, size_t pos, size_t n);
|
2021-02-18 18:20:52 +00:00
|
|
|
static void readString(const char * message, size_t & pos, size_t size, String & result);
|
|
|
|
static Int64 readInt64(const char * message, size_t & pos, size_t size);
|
|
|
|
static Int32 readInt32(const char * message, size_t & pos, size_t size);
|
|
|
|
static Int16 readInt16(const char * message, size_t & pos, size_t size);
|
|
|
|
static Int8 readInt8(const char * message, size_t & pos, size_t size);
|
2021-01-27 15:29:28 +00:00
|
|
|
|
2021-02-19 10:40:59 +00:00
|
|
|
void markTableAsSkipped(Int32 relation_id, const String & relation_name);
|
2021-02-18 23:33:01 +00:00
|
|
|
|
2022-09-02 08:54:48 +00:00
|
|
|
/// lsn - log sequence number, like wal offset (64 bit).
|
2022-01-08 17:44:17 +00:00
|
|
|
static Int64 getLSNValue(const std::string & lsn)
|
2021-02-21 22:41:18 +00:00
|
|
|
{
|
2021-02-22 12:35:53 +00:00
|
|
|
UInt32 upper_half, lower_half;
|
2022-03-13 12:23:51 +00:00
|
|
|
std::sscanf(lsn.data(), "%X/%X", &upper_half, &lower_half); /// NOLINT
|
2021-02-22 12:35:53 +00:00
|
|
|
return (static_cast<Int64>(upper_half) << 32) + lower_half;
|
2021-02-21 22:41:18 +00:00
|
|
|
}
|
|
|
|
|
2021-01-01 14:43:11 +00:00
|
|
|
Poco::Logger * log;
|
2021-04-11 07:44:40 +00:00
|
|
|
ContextPtr context;
|
2021-02-08 23:23:51 +00:00
|
|
|
const std::string replication_slot_name, publication_name;
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2022-08-24 10:25:03 +00:00
|
|
|
bool committed = false;
|
2022-08-23 09:53:30 +00:00
|
|
|
|
2021-05-08 14:55:53 +00:00
|
|
|
std::shared_ptr<postgres::Connection> connection;
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2021-02-06 12:28:42 +00:00
|
|
|
std::string current_lsn, final_lsn;
|
2021-05-10 09:10:02 +00:00
|
|
|
|
|
|
|
/// current_lsn converted from String to Int64 via getLSNValue().
|
|
|
|
UInt64 lsn_value;
|
|
|
|
|
2021-08-28 13:56:39 +00:00
|
|
|
size_t max_block_size;
|
2021-10-01 15:54:01 +00:00
|
|
|
|
2021-09-12 12:33:54 +00:00
|
|
|
bool schema_as_a_part_of_table_name;
|
2021-10-01 15:54:01 +00:00
|
|
|
|
2021-05-10 09:10:02 +00:00
|
|
|
String table_to_insert;
|
2021-02-21 22:41:18 +00:00
|
|
|
|
|
|
|
/// List of tables which need to be synced after last replication stream.
|
2021-08-27 06:30:21 +00:00
|
|
|
/// Holds `postgres_table_name` set.
|
2021-02-08 23:23:51 +00:00
|
|
|
std::unordered_set<std::string> tables_to_sync;
|
2021-02-08 19:32:30 +00:00
|
|
|
|
2022-01-08 21:37:11 +00:00
|
|
|
/// `postgres_table_name` -> StorageData.
|
2021-02-08 19:32:30 +00:00
|
|
|
Storages storages;
|
2021-02-18 23:33:01 +00:00
|
|
|
|
|
|
|
std::unordered_map<Int32, String> relation_id_to_name;
|
|
|
|
|
2021-08-27 06:30:21 +00:00
|
|
|
/// `postgres_relation_id` -> `start_lsn`
|
2021-02-21 23:13:58 +00:00
|
|
|
/// skip_list contains relation ids for tables on which ddl was performed, which can break synchronization.
|
2021-02-21 22:41:18 +00:00
|
|
|
/// This breaking changes are detected in replication stream in according replication message and table is added to skip list.
|
|
|
|
/// After it is finished, a temporary replication slot is created with 'export snapshot' option, and start_lsn is returned.
|
|
|
|
/// Skipped tables are reloaded from snapshot (nested tables are also updated). Afterwards, if a replication message is
|
|
|
|
/// related to a table in a skip_list, we compare current lsn with start_lsn, which was returned with according snapshot.
|
|
|
|
/// If current_lsn >= table_start_lsn, we can safely remove table from skip list and continue its synchronization.
|
2021-02-22 12:35:53 +00:00
|
|
|
/// No needed message, related to reloaded table will be missed, because messages are not consumed in the meantime,
|
|
|
|
/// i.e. we will not miss the first start_lsn position for reloaded table.
|
2021-02-21 22:41:18 +00:00
|
|
|
std::unordered_map<Int32, String> skip_list;
|
2021-08-27 06:30:21 +00:00
|
|
|
|
|
|
|
/// `postgres_table_name` -> `start_lsn`
|
|
|
|
/// For dynamically added tables. A new table is loaded via snapshot and we get a start lsn position.
|
|
|
|
/// Once consumer reaches this position, it starts applying replication messages to this table.
|
|
|
|
/// Inside replication handler we have to ensure that replication consumer does not read data from wal
|
|
|
|
/// while the process of adding a table to replication is not finished,
|
|
|
|
/// because we might go beyond this start lsn position before consumer knows that a new table was added.
|
|
|
|
std::unordered_map<String, String> waiting_list;
|
2021-08-28 13:42:36 +00:00
|
|
|
|
|
|
|
/// Since replication may be some time behind, we need to ensure that replication messages for deleted tables are ignored.
|
|
|
|
std::unordered_set<String> deleted_tables;
|
2021-01-01 14:43:11 +00:00
|
|
|
};
|
|
|
|
}
|