2021-01-01 14:43:11 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-02-18 06:06:37 +00:00
|
|
|
#if !defined(ARCADIA_BUILD)
|
|
|
|
#include "config_core.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if USE_LIBPQXX
|
2021-01-19 15:29:22 +00:00
|
|
|
#include "PostgreSQLConnection.h"
|
2021-02-03 16:13:18 +00:00
|
|
|
#include "PostgreSQLReplicaMetadata.h"
|
2021-02-18 06:06:37 +00:00
|
|
|
#include "insertPostgreSQLValue.h"
|
2021-02-06 12:28:42 +00:00
|
|
|
|
|
|
|
#include <Core/BackgroundSchedulePool.h>
|
2021-02-19 10:40:59 +00:00
|
|
|
#include <Core/Names.h>
|
2021-01-27 21:46:19 +00:00
|
|
|
#include <common/logger_useful.h>
|
2021-01-31 19:03:03 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2021-02-08 19:32:30 +00:00
|
|
|
#include <DataStreams/OneBlockInputStream.h>
|
2021-02-13 20:46:52 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
2021-02-18 06:06:37 +00:00
|
|
|
#include "pqxx/pqxx" // Y_IGNORE
|
2021-01-01 14:43:11 +00:00
|
|
|
|
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
/// TODO: There is ALTER PUBLICATION command to dynamically add and remove tables for replicating (the command is transactional).
|
2021-02-21 23:13:58 +00:00
|
|
|
/// This can also be supported. (Probably, if in a replication stream comes a relation name, which does not currently
|
2021-02-21 22:41:18 +00:00
|
|
|
/// exist in CH, it can be loaded from snapshot and handled the same way as some ddl by comparing lsn positions of wal,
|
|
|
|
/// but there is the case that a known table has been just renamed, then the previous version might be just dropped by user).
|
|
|
|
|
2021-02-06 12:28:42 +00:00
|
|
|
namespace DB
|
2021-01-27 21:46:19 +00:00
|
|
|
{
|
|
|
|
|
2021-01-01 14:43:11 +00:00
|
|
|
class PostgreSQLReplicaConsumer
|
|
|
|
{
|
|
|
|
public:
|
2021-02-08 19:32:30 +00:00
|
|
|
using Storages = std::unordered_map<String, StoragePtr>;
|
|
|
|
|
2021-01-01 14:43:11 +00:00
|
|
|
PostgreSQLReplicaConsumer(
|
2021-01-27 21:46:19 +00:00
|
|
|
std::shared_ptr<Context> context_,
|
2021-02-04 17:17:16 +00:00
|
|
|
PostgreSQLConnectionPtr connection_,
|
2021-01-01 14:43:11 +00:00
|
|
|
const std::string & replication_slot_name_,
|
|
|
|
const std::string & publication_name_,
|
2021-02-03 16:13:18 +00:00
|
|
|
const std::string & metadata_path,
|
2021-02-06 12:28:42 +00:00
|
|
|
const std::string & start_lsn,
|
2021-01-31 19:03:03 +00:00
|
|
|
const size_t max_block_size_,
|
2021-02-08 19:32:30 +00:00
|
|
|
Storages storages_);
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2021-02-19 10:40:59 +00:00
|
|
|
void readMetadata();
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
bool consume(std::vector<std::pair<Int32, String>> & skipped_tables);
|
|
|
|
|
|
|
|
void updateNested(const String & table_name, StoragePtr nested_table);
|
|
|
|
|
|
|
|
void updateSkipList(const std::unordered_map<Int32, String> & tables_with_lsn);
|
2021-01-01 14:43:11 +00:00
|
|
|
|
|
|
|
private:
|
2021-02-08 23:23:51 +00:00
|
|
|
void synchronizationStream();
|
2021-02-03 16:13:18 +00:00
|
|
|
|
2021-01-27 21:46:19 +00:00
|
|
|
bool readFromReplicationSlot();
|
2021-02-08 23:23:51 +00:00
|
|
|
|
|
|
|
void syncTables(std::shared_ptr<pqxx::nontransaction> tx);
|
|
|
|
|
2021-02-08 19:32:30 +00:00
|
|
|
String advanceLSN(std::shared_ptr<pqxx::nontransaction> ntx);
|
2021-01-27 21:46:19 +00:00
|
|
|
|
2021-02-08 23:23:51 +00:00
|
|
|
void processReplicationMessage(const char * replication_message, size_t size);
|
2021-02-03 16:13:18 +00:00
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
bool isSyncAllowed(Int32 relation_id);
|
|
|
|
|
|
|
|
struct Buffer
|
2021-02-08 19:32:30 +00:00
|
|
|
{
|
|
|
|
ExternalResultDescription description;
|
|
|
|
MutableColumns columns;
|
2021-02-13 20:46:52 +00:00
|
|
|
std::shared_ptr<ASTExpressionList> columnsAST;
|
2021-02-08 19:32:30 +00:00
|
|
|
/// Needed for insertPostgreSQLValue() method to parse array
|
|
|
|
std::unordered_map<size_t, PostgreSQLArrayInfo> array_info;
|
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
Buffer(StoragePtr storage) { fillBuffer(storage); }
|
|
|
|
void fillBuffer(StoragePtr storage);
|
2021-02-08 19:32:30 +00:00
|
|
|
};
|
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
using Buffers = std::unordered_map<String, Buffer>;
|
2021-02-08 19:32:30 +00:00
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
static void insertDefaultValue(Buffer & buffer, size_t column_idx);
|
|
|
|
static void insertValue(Buffer & buffer, const std::string & value, size_t column_idx);
|
2021-01-31 19:03:03 +00:00
|
|
|
|
2021-02-08 23:23:51 +00:00
|
|
|
enum class PostgreSQLQuery
|
|
|
|
{
|
|
|
|
INSERT,
|
|
|
|
UPDATE,
|
|
|
|
DELETE
|
|
|
|
};
|
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
void readTupleData(Buffer & buffer, const char * message, size_t & pos, size_t size, PostgreSQLQuery type, bool old_value = false);
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-02-18 18:20:52 +00:00
|
|
|
static void readString(const char * message, size_t & pos, size_t size, String & result);
|
|
|
|
static Int64 readInt64(const char * message, size_t & pos, size_t size);
|
|
|
|
static Int32 readInt32(const char * message, size_t & pos, size_t size);
|
|
|
|
static Int16 readInt16(const char * message, size_t & pos, size_t size);
|
|
|
|
static Int8 readInt8(const char * message, size_t & pos, size_t size);
|
2021-01-27 15:29:28 +00:00
|
|
|
|
2021-02-19 10:40:59 +00:00
|
|
|
void markTableAsSkipped(Int32 relation_id, const String & relation_name);
|
2021-02-18 23:33:01 +00:00
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
/// lsn - log sequnce nuumber, like wal offset (64 bit).
|
|
|
|
Int64 getLSNValue(const std::string & lsn)
|
|
|
|
{
|
|
|
|
Int64 upper_half, lower_half;
|
|
|
|
std::sscanf(lsn.data(), "%lX/%lX", &upper_half, &lower_half);
|
|
|
|
return (upper_half << 32) + lower_half;
|
|
|
|
}
|
|
|
|
|
2021-01-01 14:43:11 +00:00
|
|
|
Poco::Logger * log;
|
2021-01-27 21:46:19 +00:00
|
|
|
std::shared_ptr<Context> context;
|
2021-02-08 23:23:51 +00:00
|
|
|
const std::string replication_slot_name, publication_name;
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2021-02-08 23:23:51 +00:00
|
|
|
PostgreSQLReplicaMetadata metadata;
|
2021-02-04 17:17:16 +00:00
|
|
|
PostgreSQLConnectionPtr connection;
|
2021-01-01 14:43:11 +00:00
|
|
|
|
2021-02-06 12:28:42 +00:00
|
|
|
std::string current_lsn, final_lsn;
|
2021-02-08 19:32:30 +00:00
|
|
|
const size_t max_block_size;
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-02-08 19:32:30 +00:00
|
|
|
std::string table_to_insert;
|
2021-02-21 22:41:18 +00:00
|
|
|
|
|
|
|
/// List of tables which need to be synced after last replication stream.
|
2021-02-08 23:23:51 +00:00
|
|
|
std::unordered_set<std::string> tables_to_sync;
|
2021-02-08 19:32:30 +00:00
|
|
|
|
|
|
|
Storages storages;
|
|
|
|
Buffers buffers;
|
2021-02-18 23:33:01 +00:00
|
|
|
|
|
|
|
std::unordered_map<Int32, String> relation_id_to_name;
|
|
|
|
|
|
|
|
struct SchemaData
|
|
|
|
{
|
|
|
|
Int16 number_of_columns;
|
|
|
|
/// data_type_id and type_modifier
|
|
|
|
std::vector<std::pair<Int32, Int32>> column_identifiers;
|
|
|
|
|
|
|
|
SchemaData(Int16 number_of_columns_) : number_of_columns(number_of_columns_) {}
|
|
|
|
};
|
|
|
|
|
2021-02-21 22:41:18 +00:00
|
|
|
/// Cache for table schema data to be able to detect schema changes, because ddl is not
|
|
|
|
/// replicated with postgresql logical replication protocol, but some table schema info
|
|
|
|
/// is received if it is the first time we received dml message for given relation in current session or
|
|
|
|
/// if relation definition has changed since the last relation definition message.
|
2021-02-18 23:33:01 +00:00
|
|
|
std::unordered_map<Int32, SchemaData> schema_data;
|
2021-02-21 22:41:18 +00:00
|
|
|
|
2021-02-21 23:13:58 +00:00
|
|
|
/// skip_list contains relation ids for tables on which ddl was performed, which can break synchronization.
|
2021-02-21 22:41:18 +00:00
|
|
|
/// This breaking changes are detected in replication stream in according replication message and table is added to skip list.
|
|
|
|
/// After it is finished, a temporary replication slot is created with 'export snapshot' option, and start_lsn is returned.
|
|
|
|
/// Skipped tables are reloaded from snapshot (nested tables are also updated). Afterwards, if a replication message is
|
|
|
|
/// related to a table in a skip_list, we compare current lsn with start_lsn, which was returned with according snapshot.
|
|
|
|
/// If current_lsn >= table_start_lsn, we can safely remove table from skip list and continue its synchronization.
|
|
|
|
std::unordered_map<Int32, String> skip_list;
|
|
|
|
|
|
|
|
/// Mapping from table name which is currently in a skip_list to a table_start_lsn for future comparison with current_lsn.
|
|
|
|
//NameToNameMap start_lsn_for_skipped;
|
2021-01-01 14:43:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-02-18 06:06:37 +00:00
|
|
|
#endif
|