mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
More correct startup on create query
This commit is contained in:
parent
8351f1db99
commit
32b7d7b750
@ -23,6 +23,7 @@ void Connection::execWithRetry(const std::function<void(pqxx::nontransaction &)>
|
|||||||
{
|
{
|
||||||
pqxx::nontransaction tx(getRef());
|
pqxx::nontransaction tx(getRef());
|
||||||
exec(tx);
|
exec(tx);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
catch (const pqxx::broken_connection & e)
|
catch (const pqxx::broken_connection & e)
|
||||||
{
|
{
|
||||||
|
@ -293,7 +293,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
|||||||
postgresql_replica_settings->loadFromQuery(*engine_define);
|
postgresql_replica_settings->loadFromQuery(*engine_define);
|
||||||
|
|
||||||
return std::make_shared<DatabaseMaterializedPostgreSQL>(
|
return std::make_shared<DatabaseMaterializedPostgreSQL>(
|
||||||
context, metadata_path, uuid, engine_define,
|
context, metadata_path, uuid, engine_define, create.attach,
|
||||||
database_name, postgres_database_name, connection_info,
|
database_name, postgres_database_name, connection_info,
|
||||||
std::move(postgresql_replica_settings));
|
std::move(postgresql_replica_settings));
|
||||||
}
|
}
|
||||||
|
@ -37,12 +37,14 @@ DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
|
|||||||
const String & metadata_path_,
|
const String & metadata_path_,
|
||||||
UUID uuid_,
|
UUID uuid_,
|
||||||
const ASTStorage * database_engine_define_,
|
const ASTStorage * database_engine_define_,
|
||||||
|
bool is_attach_,
|
||||||
const String & database_name_,
|
const String & database_name_,
|
||||||
const String & postgres_database_name,
|
const String & postgres_database_name,
|
||||||
const postgres::ConnectionInfo & connection_info_,
|
const postgres::ConnectionInfo & connection_info_,
|
||||||
std::unique_ptr<MaterializedPostgreSQLSettings> settings_)
|
std::unique_ptr<MaterializedPostgreSQLSettings> settings_)
|
||||||
: DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_)
|
: DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_)
|
||||||
, database_engine_define(database_engine_define_->clone())
|
, database_engine_define(database_engine_define_->clone())
|
||||||
|
, is_attach(is_attach_)
|
||||||
, remote_database_name(postgres_database_name)
|
, remote_database_name(postgres_database_name)
|
||||||
, connection_info(connection_info_)
|
, connection_info(connection_info_)
|
||||||
, settings(std::move(settings_))
|
, settings(std::move(settings_))
|
||||||
@ -58,6 +60,7 @@ void DatabaseMaterializedPostgreSQL::startSynchronization()
|
|||||||
database_name,
|
database_name,
|
||||||
connection_info,
|
connection_info,
|
||||||
getContext(),
|
getContext(),
|
||||||
|
is_attach,
|
||||||
settings->materialized_postgresql_max_block_size.value,
|
settings->materialized_postgresql_max_block_size.value,
|
||||||
settings->materialized_postgresql_allow_automatic_update,
|
settings->materialized_postgresql_allow_automatic_update,
|
||||||
/* is_materialized_postgresql_database = */ true,
|
/* is_materialized_postgresql_database = */ true,
|
||||||
|
@ -33,6 +33,7 @@ public:
|
|||||||
const String & metadata_path_,
|
const String & metadata_path_,
|
||||||
UUID uuid_,
|
UUID uuid_,
|
||||||
const ASTStorage * database_engine_define_,
|
const ASTStorage * database_engine_define_,
|
||||||
|
bool is_attach_,
|
||||||
const String & database_name_,
|
const String & database_name_,
|
||||||
const String & postgres_database_name,
|
const String & postgres_database_name,
|
||||||
const postgres::ConnectionInfo & connection_info,
|
const postgres::ConnectionInfo & connection_info,
|
||||||
@ -63,6 +64,7 @@ private:
|
|||||||
void startSynchronization();
|
void startSynchronization();
|
||||||
|
|
||||||
ASTPtr database_engine_define;
|
ASTPtr database_engine_define;
|
||||||
|
bool is_attach;
|
||||||
String remote_database_name;
|
String remote_database_name;
|
||||||
postgres::ConnectionInfo connection_info;
|
postgres::ConnectionInfo connection_info;
|
||||||
std::unique_ptr<MaterializedPostgreSQLSettings> settings;
|
std::unique_ptr<MaterializedPostgreSQLSettings> settings;
|
||||||
|
@ -29,12 +29,14 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
|
|||||||
const String & current_database_name_,
|
const String & current_database_name_,
|
||||||
const postgres::ConnectionInfo & connection_info_,
|
const postgres::ConnectionInfo & connection_info_,
|
||||||
ContextPtr context_,
|
ContextPtr context_,
|
||||||
|
bool is_attach_,
|
||||||
const size_t max_block_size_,
|
const size_t max_block_size_,
|
||||||
bool allow_automatic_update_,
|
bool allow_automatic_update_,
|
||||||
bool is_materialized_postgresql_database_,
|
bool is_materialized_postgresql_database_,
|
||||||
const String tables_list_)
|
const String tables_list_)
|
||||||
: log(&Poco::Logger::get("PostgreSQLReplicationHandler"))
|
: log(&Poco::Logger::get("PostgreSQLReplicationHandler"))
|
||||||
, context(context_)
|
, context(context_)
|
||||||
|
, is_attach(is_attach_)
|
||||||
, remote_database_name(remote_database_name_)
|
, remote_database_name(remote_database_name_)
|
||||||
, current_database_name(current_database_name_)
|
, current_database_name(current_database_name_)
|
||||||
, connection_info(connection_info_)
|
, connection_info(connection_info_)
|
||||||
@ -145,10 +147,8 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error)
|
|||||||
{
|
{
|
||||||
initial_sync();
|
initial_sync();
|
||||||
}
|
}
|
||||||
/// Replication slot depends on publication, so if replication slot exists and new
|
/// Always drop replication slot if it is CREATE query and not ATTACH.
|
||||||
/// publication was just created - drop that replication slot and start from scratch.
|
else if (!is_attach || new_publication)
|
||||||
/// TODO: tests
|
|
||||||
else if (new_publication_created)
|
|
||||||
{
|
{
|
||||||
dropReplicationSlot(tx);
|
dropReplicationSlot(tx);
|
||||||
initial_sync();
|
initial_sync();
|
||||||
@ -285,22 +285,25 @@ bool PostgreSQLReplicationHandler::isPublicationExist(pqxx::work & tx)
|
|||||||
std::string query_str = fmt::format("SELECT exists (SELECT 1 FROM pg_publication WHERE pubname = '{}')", publication_name);
|
std::string query_str = fmt::format("SELECT exists (SELECT 1 FROM pg_publication WHERE pubname = '{}')", publication_name);
|
||||||
pqxx::result result{tx.exec(query_str)};
|
pqxx::result result{tx.exec(query_str)};
|
||||||
assert(!result.empty());
|
assert(!result.empty());
|
||||||
bool publication_exists = (result[0][0].as<std::string>() == "t");
|
return result[0][0].as<std::string>() == "t";
|
||||||
|
|
||||||
if (publication_exists)
|
|
||||||
LOG_INFO(log, "Publication {} already exists. Using existing version", publication_name);
|
|
||||||
|
|
||||||
return publication_exists;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx, bool create_without_check)
|
void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx)
|
||||||
{
|
{
|
||||||
/// For database engine a publication can be created earlier than in startReplication().
|
auto publication_exists = isPublicationExist(tx);
|
||||||
if (new_publication_created)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (create_without_check || !isPublicationExist(tx))
|
if (!is_attach && publication_exists)
|
||||||
|
{
|
||||||
|
/// This is a case for single Materialized storage. In case of database engine this check is done in advance.
|
||||||
|
LOG_WARNING(log,
|
||||||
|
"Publication {} already exists, but it is a CREATE query, not ATTACH. Publication will be dropped",
|
||||||
|
publication_name);
|
||||||
|
|
||||||
|
connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_attach || !publication_exists)
|
||||||
{
|
{
|
||||||
if (tables_list.empty())
|
if (tables_list.empty())
|
||||||
{
|
{
|
||||||
@ -320,8 +323,8 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx, bo
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
tx.exec(query_str);
|
tx.exec(query_str);
|
||||||
new_publication_created = true;
|
|
||||||
LOG_TRACE(log, "Created publication {} with tables list: {}", publication_name, tables_list);
|
LOG_TRACE(log, "Created publication {} with tables list: {}", publication_name, tables_list);
|
||||||
|
new_publication = true;
|
||||||
}
|
}
|
||||||
catch (Exception & e)
|
catch (Exception & e)
|
||||||
{
|
{
|
||||||
@ -329,6 +332,10 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx, bo
|
|||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LOG_TRACE(log, "Using existing publication ({}) version", publication_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -401,6 +408,7 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)
|
|||||||
{
|
{
|
||||||
std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", publication_name);
|
std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", publication_name);
|
||||||
tx.exec(query_str);
|
tx.exec(query_str);
|
||||||
|
LOG_TRACE(log, "Dropped publication: {}", publication_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -438,9 +446,11 @@ void PostgreSQLReplicationHandler::shutdownFinal()
|
|||||||
NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & connection_)
|
NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & connection_)
|
||||||
{
|
{
|
||||||
pqxx::work tx(connection_.getRef());
|
pqxx::work tx(connection_.getRef());
|
||||||
bool publication_exists_before_startup = isPublicationExist(tx);
|
|
||||||
NameSet result_tables;
|
NameSet result_tables;
|
||||||
|
|
||||||
|
bool publication_exists_before_startup = isPublicationExist(tx);
|
||||||
|
LOG_DEBUG(log, "Publication exists: {}, is attach: {}", publication_exists_before_startup, is_attach);
|
||||||
|
|
||||||
Strings expected_tables;
|
Strings expected_tables;
|
||||||
if (!tables_list.empty())
|
if (!tables_list.empty())
|
||||||
{
|
{
|
||||||
@ -453,49 +463,58 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection &
|
|||||||
|
|
||||||
if (publication_exists_before_startup)
|
if (publication_exists_before_startup)
|
||||||
{
|
{
|
||||||
if (tables_list.empty())
|
if (!is_attach)
|
||||||
{
|
{
|
||||||
/// There is no tables list, but publication already exists, then the expected behaviour
|
|
||||||
/// is to replicate the whole database. But it could be a server restart, so we can't drop it.
|
|
||||||
LOG_WARNING(log,
|
LOG_WARNING(log,
|
||||||
"Publication {} already exists and tables list is empty. Assuming publication is correct",
|
"Publication {} already exists, but it is a CREATE query, not ATTACH. Publication will be dropped",
|
||||||
publication_name);
|
publication_name);
|
||||||
|
|
||||||
result_tables = fetchPostgreSQLTablesList(tx);
|
connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); });
|
||||||
}
|
}
|
||||||
/// Check tables list from publication is the same as expected tables list.
|
|
||||||
/// If not - drop publication and return expected tables list.
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
result_tables = fetchTablesFromPublication(tx);
|
if (tables_list.empty())
|
||||||
NameSet diff;
|
|
||||||
std::set_symmetric_difference(expected_tables.begin(), expected_tables.end(),
|
|
||||||
result_tables.begin(), result_tables.end(),
|
|
||||||
std::inserter(diff, diff.begin()));
|
|
||||||
if (!diff.empty())
|
|
||||||
{
|
{
|
||||||
String diff_tables;
|
|
||||||
for (const auto & table_name : diff)
|
|
||||||
{
|
|
||||||
if (!diff_tables.empty())
|
|
||||||
diff_tables += ", ";
|
|
||||||
diff_tables += table_name;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_WARNING(log,
|
LOG_WARNING(log,
|
||||||
"Publication {} already exists, but specified tables list differs from publication tables list in tables: {}",
|
"Publication {} already exists and tables list is empty. Assuming publication is correct.",
|
||||||
publication_name, diff_tables);
|
publication_name);
|
||||||
|
|
||||||
connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); });
|
result_tables = fetchPostgreSQLTablesList(tx);
|
||||||
|
}
|
||||||
|
/// Check tables list from publication is the same as expected tables list.
|
||||||
|
/// If not - drop publication and return expected tables list.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result_tables = fetchTablesFromPublication(tx);
|
||||||
|
NameSet diff;
|
||||||
|
std::set_symmetric_difference(expected_tables.begin(), expected_tables.end(),
|
||||||
|
result_tables.begin(), result_tables.end(),
|
||||||
|
std::inserter(diff, diff.begin()));
|
||||||
|
if (!diff.empty())
|
||||||
|
{
|
||||||
|
String diff_tables;
|
||||||
|
for (const auto & table_name : diff)
|
||||||
|
{
|
||||||
|
if (!diff_tables.empty())
|
||||||
|
diff_tables += ", ";
|
||||||
|
diff_tables += table_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_WARNING(log,
|
||||||
|
"Publication {} already exists, but specified tables list differs from publication tables list in tables: {}.",
|
||||||
|
publication_name, diff_tables);
|
||||||
|
|
||||||
|
connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if (result_tables.empty())
|
||||||
{
|
{
|
||||||
if (!tables_list.empty())
|
if (!tables_list.empty())
|
||||||
{
|
{
|
||||||
tx.commit();
|
result_tables = NameSet(expected_tables.begin(), expected_tables.end());
|
||||||
return NameSet(expected_tables.begin(), expected_tables.end());
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -24,6 +24,7 @@ public:
|
|||||||
const String & current_database_name_,
|
const String & current_database_name_,
|
||||||
const postgres::ConnectionInfo & connection_info_,
|
const postgres::ConnectionInfo & connection_info_,
|
||||||
ContextPtr context_,
|
ContextPtr context_,
|
||||||
|
bool is_attach_,
|
||||||
const size_t max_block_size_,
|
const size_t max_block_size_,
|
||||||
bool allow_automatic_update_,
|
bool allow_automatic_update_,
|
||||||
bool is_materialized_postgresql_database_,
|
bool is_materialized_postgresql_database_,
|
||||||
@ -54,7 +55,7 @@ private:
|
|||||||
|
|
||||||
bool isPublicationExist(pqxx::work & tx);
|
bool isPublicationExist(pqxx::work & tx);
|
||||||
|
|
||||||
void createPublicationIfNeeded(pqxx::work & tx, bool create_without_check = false);
|
void createPublicationIfNeeded(pqxx::work & tx);
|
||||||
|
|
||||||
NameSet fetchTablesFromPublication(pqxx::work & tx);
|
NameSet fetchTablesFromPublication(pqxx::work & tx);
|
||||||
|
|
||||||
@ -83,6 +84,12 @@ private:
|
|||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
ContextPtr context;
|
ContextPtr context;
|
||||||
|
|
||||||
|
/// If it is not attach, i.e. a create query, then if publication already exists - always drop it.
|
||||||
|
bool is_attach;
|
||||||
|
|
||||||
|
/// If new publication is created at start up - always drop replication slot if it exists.
|
||||||
|
bool new_publication = false;
|
||||||
|
|
||||||
const String remote_database_name, current_database_name;
|
const String remote_database_name, current_database_name;
|
||||||
|
|
||||||
/// Connection string and address for logs.
|
/// Connection string and address for logs.
|
||||||
@ -113,11 +120,6 @@ private:
|
|||||||
|
|
||||||
std::atomic<bool> stop_synchronization = false;
|
std::atomic<bool> stop_synchronization = false;
|
||||||
|
|
||||||
/// For database engine there are 2 places where it is checked for publication:
|
|
||||||
/// 1. to fetch tables list from already created publication when database is loaded
|
|
||||||
/// 2. at replication startup
|
|
||||||
bool new_publication_created = false;
|
|
||||||
|
|
||||||
/// MaterializedPostgreSQL tables. Used for managing all operations with its internal nested tables.
|
/// MaterializedPostgreSQL tables. Used for managing all operations with its internal nested tables.
|
||||||
MaterializedStorages materialized_storages;
|
MaterializedStorages materialized_storages;
|
||||||
|
|
||||||
|
@ -70,6 +70,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
|
|||||||
table_id_.database_name,
|
table_id_.database_name,
|
||||||
connection_info,
|
connection_info,
|
||||||
getContext(),
|
getContext(),
|
||||||
|
is_attach,
|
||||||
replication_settings->materialized_postgresql_max_block_size.value,
|
replication_settings->materialized_postgresql_max_block_size.value,
|
||||||
/* allow_automatic_update */ false, /* is_materialized_postgresql_database */false);
|
/* allow_automatic_update */ false, /* is_materialized_postgresql_database */false);
|
||||||
}
|
}
|
||||||
|
@ -1068,7 +1068,7 @@ class ClickHouseCluster:
|
|||||||
logging.error("Can't connect to MySQL:{}".format(errors))
|
logging.error("Can't connect to MySQL:{}".format(errors))
|
||||||
raise Exception("Cannot wait MySQL container")
|
raise Exception("Cannot wait MySQL container")
|
||||||
|
|
||||||
def wait_postgres_to_start(self, timeout=180):
|
def wait_postgres_to_start(self, timeout=260):
|
||||||
self.postgres_ip = self.get_instance_ip(self.postgres_host)
|
self.postgres_ip = self.get_instance_ip(self.postgres_host)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
while time.time() - start < timeout:
|
while time.time() - start < timeout:
|
||||||
|
Loading…
Reference in New Issue
Block a user