2021-01-27 15:29:28 +00:00
|
|
|
#pragma once
|
|
|
|
|
2022-09-28 13:29:29 +00:00
|
|
|
#include "config.h"
|
2021-01-27 15:29:28 +00:00
|
|
|
|
2021-02-18 06:06:37 +00:00
|
|
|
#if USE_LIBPQXX
|
2021-02-06 12:28:42 +00:00
|
|
|
#include "PostgreSQLReplicationHandler.h"
|
2021-06-27 19:09:17 +00:00
|
|
|
#include "MaterializedPostgreSQLSettings.h"
|
2021-02-06 12:28:42 +00:00
|
|
|
|
2021-01-27 21:46:19 +00:00
|
|
|
#include <Parsers/IAST.h>
|
|
|
|
#include <Parsers/ASTCreateQuery.h>
|
|
|
|
#include <Parsers/ASTColumnDeclaration.h>
|
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
|
|
|
#include <Interpreters/InterpreterCreateQuery.h>
|
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
2021-05-01 11:49:45 +00:00
|
|
|
#include <memory>
|
2021-02-06 12:28:42 +00:00
|
|
|
|
2021-01-27 15:29:28 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-09-12 12:33:54 +00:00
|
|
|
/** TODO list:
|
|
|
|
* - Actually I think we can support ddl even though logical replication does not fully support it.
|
2021-10-02 12:49:20 +00:00
|
|
|
* But some basic ddl like adding/dropping columns, changing column type, column names -- is manageable.
|
2021-09-12 12:33:54 +00:00
|
|
|
*/
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/** Case of single MaterializedPostgreSQL table engine.
|
2021-05-03 09:52:13 +00:00
|
|
|
*
|
2021-06-27 19:09:17 +00:00
|
|
|
* A user creates a table with engine MaterializedPostgreSQL. Order by expression must be specified (needed for
|
2021-05-03 09:52:13 +00:00
|
|
|
* nested ReplacingMergeTree table). This storage owns its own replication handler, which loads table data
|
|
|
|
* from PostgreSQL into nested ReplacingMergeTree table. If table is not created, but attached, replication handler
|
2021-05-04 10:43:21 +00:00
|
|
|
* will not start loading-from-snapshot procedure, instead it will continue from last committed lsn.
|
2021-05-03 09:52:13 +00:00
|
|
|
*
|
|
|
|
* Main point: Both tables exist on disk; database engine interacts only with the main table and main table takes
|
|
|
|
* total ownershot over nested table. Nested table has name `main_table_uuid` + NESTED_SUFFIX.
|
|
|
|
*
|
|
|
|
**/
|
|
|
|
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/** Case of MaterializedPostgreSQL database engine.
|
2021-05-02 11:50:29 +00:00
|
|
|
*
|
2021-06-27 19:09:17 +00:00
|
|
|
* MaterializedPostgreSQL table exists only in memory and acts as a wrapper for nested table, i.e. only provides an
|
2021-05-03 09:52:13 +00:00
|
|
|
* interface to work with nested table. Both tables share the same StorageID.
|
|
|
|
*
|
2021-05-04 10:43:21 +00:00
|
|
|
* Main table is never created or dropped via database method. The only way database engine interacts with
|
2021-06-27 19:09:17 +00:00
|
|
|
* MaterializedPostgreSQL table - in tryGetTable() method, a MaterializedPostgreSQL table is returned in order to wrap
|
2021-05-03 09:52:13 +00:00
|
|
|
* and redirect read requests. Set of such wrapper-tables is cached inside database engine. All other methods in
|
|
|
|
* regard to materializePostgreSQL table are handled by replication handler.
|
2021-05-02 11:50:29 +00:00
|
|
|
*
|
2021-05-03 09:52:13 +00:00
|
|
|
* All database methods, apart from tryGetTable(), are devoted only to nested table.
|
2021-06-27 19:09:17 +00:00
|
|
|
* NOTE: It makes sense to allow rename method for MaterializedPostgreSQL table via database method.
|
2021-05-03 09:52:13 +00:00
|
|
|
*
|
|
|
|
* Also main table has the same InMemoryMetadata as its nested table, so if metadata of nested table changes - main table also has
|
2021-06-27 19:09:17 +00:00
|
|
|
* to update its metadata, because all read requests are passed to MaterializedPostgreSQL table and then it redirects read
|
2021-05-02 11:50:29 +00:00
|
|
|
* into nested table.
|
|
|
|
*
|
2021-06-27 19:09:17 +00:00
|
|
|
* When there is a need to update table structure, there will be created a new MaterializedPostgreSQL table with its own nested table,
|
2021-05-03 09:52:13 +00:00
|
|
|
* it will have updated table schema and all data will be loaded from scratch in the background, while previous table with outadted table
|
2021-08-27 06:30:21 +00:00
|
|
|
* structure will still serve read requests. When data is loaded, nested tables will be swapped, metadata of materialized table will be
|
2021-05-03 09:52:13 +00:00
|
|
|
* updated according to nested table.
|
2021-05-02 11:50:29 +00:00
|
|
|
*
|
|
|
|
**/
|
|
|
|
|
2022-05-03 06:43:28 +00:00
|
|
|
class StorageMaterializedPostgreSQL final : public IStorage, WithContext
|
2021-01-27 15:29:28 +00:00
|
|
|
{
|
|
|
|
public:
|
2021-08-27 06:30:21 +00:00
|
|
|
StorageMaterializedPostgreSQL(const StorageID & table_id_, ContextPtr context_,
|
|
|
|
const String & postgres_database_name, const String & postgres_table_name);
|
2021-05-02 11:50:29 +00:00
|
|
|
|
2021-08-27 06:30:21 +00:00
|
|
|
StorageMaterializedPostgreSQL(StoragePtr nested_storage_, ContextPtr context_,
|
|
|
|
const String & postgres_database_name, const String & postgres_table_name);
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2022-04-19 20:47:29 +00:00
|
|
|
StorageMaterializedPostgreSQL(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
bool is_attach_,
|
|
|
|
const String & remote_database_name,
|
|
|
|
const String & remote_table_name,
|
|
|
|
const postgres::ConnectionInfo & connection_info,
|
|
|
|
const StorageInMemoryMetadata & storage_metadata,
|
|
|
|
ContextPtr context_,
|
|
|
|
std::unique_ptr<MaterializedPostgreSQLSettings> replication_settings);
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
String getName() const override { return "MaterializedPostgreSQL"; }
|
2021-01-27 15:29:28 +00:00
|
|
|
|
|
|
|
void shutdown() override;
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Used only for single MaterializedPostgreSQL storage.
|
2022-06-23 07:59:13 +00:00
|
|
|
void dropInnerTableIfAny(bool sync, ContextPtr local_context) override;
|
2021-04-10 17:58:09 +00:00
|
|
|
|
2021-01-27 15:29:28 +00:00
|
|
|
NamesAndTypesList getVirtuals() const override;
|
|
|
|
|
2021-07-05 19:07:56 +00:00
|
|
|
bool needRewriteQueryWithFinal(const Names & column_names) const override;
|
|
|
|
|
2022-05-20 19:49:31 +00:00
|
|
|
void read(
|
|
|
|
QueryPlan & query_plan,
|
2021-01-27 15:29:28 +00:00
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2021-01-27 15:29:28 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-11 07:44:40 +00:00
|
|
|
ContextPtr context_,
|
2021-01-27 15:29:28 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
|
|
|
size_t max_block_size,
|
2022-10-07 10:46:45 +00:00
|
|
|
size_t num_streams) override;
|
2021-01-27 15:29:28 +00:00
|
|
|
|
2021-05-03 21:42:06 +00:00
|
|
|
/// This method is called only from MateriaizePostgreSQL database engine, because it needs to maintain
|
|
|
|
/// an invariant: a table exists only if its nested table exists. This atomic variable is set to _true_
|
|
|
|
/// only once - when nested table is successfully created and is never changed afterwards.
|
2021-05-03 09:52:13 +00:00
|
|
|
bool hasNested() { return has_nested.load(); }
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-12-14 13:53:47 +00:00
|
|
|
void createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override);
|
|
|
|
|
|
|
|
ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override);
|
|
|
|
|
|
|
|
std::shared_ptr<ASTExpressionList> getColumnsExpressionList(const NamesAndTypesList & columns) const;
|
2021-05-01 11:49:45 +00:00
|
|
|
|
2021-04-10 17:58:09 +00:00
|
|
|
StoragePtr getNested() const;
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-04-10 17:58:09 +00:00
|
|
|
StoragePtr tryGetNested() const;
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Create a temporary MaterializedPostgreSQL table with current_table_name + TMP_SUFFIX.
|
2021-05-03 21:42:06 +00:00
|
|
|
/// An empty wrapper is returned - it does not have inMemory metadata, just acts as an empty wrapper over
|
|
|
|
/// temporary nested, which will be created shortly after.
|
2021-05-03 09:52:13 +00:00
|
|
|
StoragePtr createTemporary() const;
|
2021-02-08 23:23:51 +00:00
|
|
|
|
2021-05-03 09:52:13 +00:00
|
|
|
ContextPtr getNestedTableContext() const { return nested_context; }
|
2021-04-08 20:39:56 +00:00
|
|
|
|
2021-05-03 17:28:54 +00:00
|
|
|
StorageID getNestedStorageID() const;
|
|
|
|
|
2021-11-05 14:25:02 +00:00
|
|
|
void set(StoragePtr nested_storage);
|
2021-05-02 11:50:29 +00:00
|
|
|
|
|
|
|
static std::shared_ptr<Context> makeNestedTableContext(ContextPtr from_context);
|
|
|
|
|
2021-07-05 19:07:56 +00:00
|
|
|
bool supportsFinal() const override { return true; }
|
|
|
|
|
2021-01-27 15:29:28 +00:00
|
|
|
private:
|
2021-02-18 18:20:52 +00:00
|
|
|
static std::shared_ptr<ASTColumnDeclaration> getMaterializedColumnsDeclaration(
|
2022-03-13 12:23:51 +00:00
|
|
|
String name, String type, UInt64 default_value);
|
2021-02-08 23:23:51 +00:00
|
|
|
|
|
|
|
ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const;
|
|
|
|
|
2021-05-02 11:50:29 +00:00
|
|
|
String getNestedTableName() const;
|
2021-01-27 15:29:28 +00:00
|
|
|
|
2021-08-27 06:30:21 +00:00
|
|
|
Poco::Logger * log;
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Not nullptr only for single MaterializedPostgreSQL storage, because for MaterializedPostgreSQL
|
2021-05-03 09:52:13 +00:00
|
|
|
/// database engine there is one replication handler for all tables.
|
2021-01-27 15:29:28 +00:00
|
|
|
std::unique_ptr<PostgreSQLReplicationHandler> replication_handler;
|
2021-05-03 09:52:13 +00:00
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Distinguish between single MaterilizePostgreSQL table engine and MaterializedPostgreSQL database engine,
|
2021-05-03 09:52:13 +00:00
|
|
|
/// because table with engine MaterilizePostgreSQL acts differently in each case.
|
2021-06-27 19:09:17 +00:00
|
|
|
bool is_materialized_postgresql_database = false;
|
2021-05-03 09:52:13 +00:00
|
|
|
|
|
|
|
/// Will be set to `true` only once - when nested table was loaded by replication thread.
|
2021-06-27 19:09:17 +00:00
|
|
|
/// After that, it will never be changed. Needed for MaterializedPostgreSQL database engine
|
2021-05-03 09:52:13 +00:00
|
|
|
/// because there is an invariant - table exists only if its nested table exists, but nested
|
|
|
|
/// table is not loaded immediately. It is made atomic, because it is accessed only by database engine,
|
|
|
|
/// and updated by replication handler (only once).
|
|
|
|
std::atomic<bool> has_nested = false;
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Nested table context is a copy of global context, but modified to answer isInternalQuery() == true.
|
|
|
|
/// This is needed to let database engine know whether to access nested table or a wrapper over nested (materialized table).
|
2021-06-01 17:43:15 +00:00
|
|
|
ContextMutablePtr nested_context;
|
2021-05-03 09:52:13 +00:00
|
|
|
|
2021-05-03 21:42:06 +00:00
|
|
|
/// Save nested storageID to be able to fetch it. It is set once nested is created and will be
|
|
|
|
/// updated only when nested is reloaded or renamed.
|
2021-05-03 17:28:54 +00:00
|
|
|
std::optional<StorageID> nested_table_id;
|
2021-05-03 18:38:44 +00:00
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Needed only for the case of single MaterializedPostgreSQL storage - in order to make
|
2021-05-03 18:38:44 +00:00
|
|
|
/// delayed storage forwarding into replication handler.
|
|
|
|
String remote_table_name;
|
|
|
|
|
2021-06-27 19:09:17 +00:00
|
|
|
/// Needed only for the case of single MaterializedPostgreSQL storage, because in case of create
|
2021-05-04 10:43:21 +00:00
|
|
|
/// query (not attach) initial setup will be done immediately and error message is thrown at once.
|
2021-06-27 19:09:17 +00:00
|
|
|
/// It results in the fact: single MaterializedPostgreSQL storage is created only if its nested table is created.
|
2021-05-03 21:42:06 +00:00
|
|
|
/// In case of attach - this setup will be done in a separate thread in the background. It will also
|
|
|
|
/// be checked for nested table and attempted to load it if it does not exist for some reason.
|
2021-05-08 14:55:53 +00:00
|
|
|
bool is_attach = true;
|
2021-01-27 15:29:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-02-18 06:06:37 +00:00
|
|
|
#endif
|