2020-12-03 17:08:17 +00:00
# include <filesystem>
2017-04-13 13:42:29 +00:00
# include <Interpreters/DDLWorker.h>
2020-11-03 13:47:26 +00:00
# include <Interpreters/DDLTask.h>
2017-04-13 16:12:56 +00:00
# include <Parsers/ASTAlterQuery.h>
2019-06-26 14:52:20 +00:00
# include <Parsers/ASTDropQuery.h>
# include <Parsers/ASTOptimizeQuery.h>
2017-04-21 12:39:28 +00:00
# include <Parsers/ASTQueryWithOnCluster.h>
2019-06-26 14:52:20 +00:00
# include <Parsers/ASTQueryWithTableAndOutput.h>
2017-04-25 15:21:03 +00:00
# include <Parsers/ParserQuery.h>
# include <Parsers/parseQuery.h>
# include <Parsers/queryToString.h>
2017-04-17 17:04:31 +00:00
# include <IO/WriteHelpers.h>
# include <IO/ReadHelpers.h>
# include <IO/ReadBufferFromString.h>
2017-04-25 15:21:03 +00:00
# include <Storages/IStorage.h>
2017-04-13 13:42:29 +00:00
# include <Interpreters/executeQuery.h>
2017-04-13 16:12:56 +00:00
# include <Interpreters/Cluster.h>
2020-05-20 20:16:32 +00:00
# include <Interpreters/Context.h>
2017-04-27 15:19:11 +00:00
# include <Common/setThreadName.h>
2017-09-09 23:17:38 +00:00
# include <Common/randomSeed.h>
2020-03-12 14:36:54 +00:00
# include <Common/ZooKeeper/ZooKeeper.h>
# include <Common/ZooKeeper/KeeperException.h>
# include <Common/isLocalAddress.h>
2019-06-26 14:52:20 +00:00
# include <Storages/StorageReplicatedMergeTree.h>
2017-04-19 14:21:27 +00:00
# include <Poco/Timestamp.h>
2020-03-12 14:36:54 +00:00
# include <common/sleep.h>
2020-09-02 22:35:47 +00:00
# include <common/getFQDNOrHostName.h>
2020-11-03 13:47:26 +00:00
# include <common/logger_useful.h>
2017-09-09 23:17:38 +00:00
# include <random>
# include <pcg_random.hpp>
2020-12-03 17:04:24 +00:00
namespace fs = std : : filesystem ;
2017-04-13 13:42:29 +00:00
2020-11-27 04:27:43 +00:00
namespace CurrentMetrics
{
extern const Metric MaxDDLEntryID ;
}
2017-04-13 13:42:29 +00:00
namespace DB
{
namespace ErrorCodes
{
2020-02-25 18:02:41 +00:00
extern const int NOT_IMPLEMENTED ;
2018-10-25 09:40:30 +00:00
extern const int LOGICAL_ERROR ;
2017-04-27 15:19:11 +00:00
extern const int TIMEOUT_EXCEEDED ;
2017-08-02 14:42:35 +00:00
extern const int UNFINISHED ;
2020-12-04 20:12:32 +00:00
extern const int NOT_A_LEADER ;
extern const int KEEPER_EXCEPTION ;
extern const int CANNOT_ASSIGN_ALTER ;
extern const int CANNOT_ALLOCATE_MEMORY ;
extern const int MEMORY_LIMIT_EXCEEDED ;
2017-04-13 13:42:29 +00:00
}
2020-08-18 19:02:07 +00:00
namespace
{
/** Caveats: usage of locks in ZooKeeper is incorrect in 99% of cases,
* and highlights your poor understanding of distributed systems .
*
* It ' s only correct if all the operations that are performed under lock
* are atomically checking that the lock still holds
* or if we ensure that these operations will be undone if lock is lost
* ( due to ZooKeeper session loss ) that ' s very difficult to achieve .
*
* It ' s Ok if every operation that we perform under lock is actually operation in ZooKeeper .
*
* In 1 % of cases when you can correctly use Lock , the logic is complex enough , so you don ' t need this class .
*
* TLDR : Don ' t use this code .
* We only have a few cases of it ' s usage and it will be removed .
*/
class ZooKeeperLock
2017-08-02 14:42:35 +00:00
{
2020-08-18 19:02:07 +00:00
public :
/// lock_prefix - path where the ephemeral lock node will be created
/// lock_name - the name of the ephemeral lock node
ZooKeeperLock (
const zkutil : : ZooKeeperPtr & zookeeper_ ,
const std : : string & lock_prefix_ ,
const std : : string & lock_name_ ,
const std : : string & lock_message_ = " " )
:
zookeeper ( zookeeper_ ) ,
2020-12-03 17:53:55 +00:00
lock_path ( fs : : path ( lock_prefix_ ) / lock_name_ ) ,
2020-08-18 19:02:07 +00:00
lock_message ( lock_message_ ) ,
log ( & Poco : : Logger : : get ( " zkutil::Lock " ) )
{
zookeeper - > createIfNotExists ( lock_prefix_ , " " ) ;
}
~ ZooKeeperLock ( )
{
try
{
unlock ( ) ;
}
catch ( . . . )
{
DB : : tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
}
void unlock ( )
{
Coordination : : Stat stat ;
std : : string dummy ;
bool result = zookeeper - > tryGet ( lock_path , dummy , & stat ) ;
if ( result & & stat . ephemeralOwner = = zookeeper - > getClientID ( ) )
zookeeper - > remove ( lock_path , - 1 ) ;
else
LOG_WARNING ( log , " Lock is lost. It is normal if session was expired. Path: {}/{} " , lock_path , lock_message ) ;
}
bool tryLock ( )
{
std : : string dummy ;
Coordination : : Error code = zookeeper - > tryCreate ( lock_path , lock_message , zkutil : : CreateMode : : Ephemeral , dummy ) ;
if ( code = = Coordination : : Error : : ZNODEEXISTS )
{
return false ;
}
else if ( code = = Coordination : : Error : : ZOK )
{
return true ;
}
else
{
throw Coordination : : Exception ( code ) ;
}
}
private :
zkutil : : ZooKeeperPtr zookeeper ;
std : : string lock_path ;
std : : string lock_message ;
Poco : : Logger * log ;
} ;
std : : unique_ptr < ZooKeeperLock > createSimpleZooKeeperLock (
const zkutil : : ZooKeeperPtr & zookeeper , const String & lock_prefix , const String & lock_name , const String & lock_message )
2017-08-02 14:42:35 +00:00
{
2020-08-18 19:02:07 +00:00
return std : : make_unique < ZooKeeperLock > ( zookeeper , lock_prefix , lock_name , lock_message ) ;
}
2017-08-02 14:42:35 +00:00
}
2020-11-13 18:35:45 +00:00
DDLWorker : : DDLWorker ( int pool_size_ , const std : : string & zk_root_dir , const Context & context_ , const Poco : : Util : : AbstractConfiguration * config , const String & prefix ,
2020-11-27 14:04:03 +00:00
const String & logger_name )
2020-09-02 22:35:47 +00:00
: context ( context_ )
2020-11-27 14:04:03 +00:00
, log ( & Poco : : Logger : : get ( logger_name ) )
2020-12-03 18:14:27 +00:00
, pool_size ( pool_size_ )
2017-04-13 16:12:56 +00:00
{
2020-11-27 04:27:43 +00:00
CurrentMetrics : : set ( CurrentMetrics : : MaxDDLEntryID , 0 ) ;
2021-01-18 14:09:39 +00:00
2020-12-03 18:14:27 +00:00
if ( 1 < pool_size )
{
LOG_WARNING ( log , " DDLWorker is configured to use multiple threads. "
" It's not recommended because queries can be reordered. Also it may cause some unknown issues to appear. " ) ;
2021-01-18 14:09:39 +00:00
worker_pool = std : : make_unique < ThreadPool > ( pool_size ) ;
2020-12-03 18:14:27 +00:00
}
2020-09-02 22:35:47 +00:00
2017-04-27 15:19:11 +00:00
queue_dir = zk_root_dir ;
if ( queue_dir . back ( ) = = ' / ' )
queue_dir . resize ( queue_dir . size ( ) - 1 ) ;
2017-04-13 13:42:29 +00:00
2017-08-01 14:41:00 +00:00
if ( config )
{
2017-08-14 05:44:04 +00:00
task_max_lifetime = config - > getUInt64 ( prefix + " .task_max_lifetime " , static_cast < UInt64 > ( task_max_lifetime ) ) ;
cleanup_delay_period = config - > getUInt64 ( prefix + " .cleanup_delay_period " , static_cast < UInt64 > ( cleanup_delay_period ) ) ;
2019-03-12 12:06:17 +00:00
max_tasks_in_queue = std : : max < UInt64 > ( 1 , config - > getUInt64 ( prefix + " .max_tasks_in_queue " , max_tasks_in_queue ) ) ;
2018-02-01 13:52:29 +00:00
if ( config - > has ( prefix + " .profile " ) )
context . setSetting ( " profile " , config - > getString ( prefix + " .profile " ) ) ;
}
2018-03-11 00:15:26 +00:00
if ( context . getSettingsRef ( ) . readonly )
2018-02-01 13:52:29 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Distributed DDL worker is run with readonly settings, it will not be able to execute DDL queries Set appropriate system_profile or distributed_ddl.profile to fix this. " ) ;
2017-08-01 14:41:00 +00:00
}
2017-07-26 19:31:32 +00:00
host_fqdn = getFQDNOrHostName ( ) ;
host_fqdn_id = Cluster : : Address : : toString ( host_fqdn , context . getTCPPort ( ) ) ;
2020-12-01 17:20:42 +00:00
}
2017-04-13 13:42:29 +00:00
2020-12-01 17:20:42 +00:00
void DDLWorker : : startup ( )
{
2019-02-22 18:36:36 +00:00
main_thread = ThreadFromGlobalPool ( & DDLWorker : : runMainThread , this ) ;
cleanup_thread = ThreadFromGlobalPool ( & DDLWorker : : runCleanupThread , this ) ;
2017-04-13 13:42:29 +00:00
}
2020-11-13 18:35:45 +00:00
void DDLWorker : : shutdown ( )
2017-04-13 13:42:29 +00:00
{
stop_flag = true ;
2019-02-22 18:36:36 +00:00
queue_updated_event - > set ( ) ;
cleanup_event - > set ( ) ;
2020-11-13 18:35:45 +00:00
2020-12-01 17:20:42 +00:00
if ( main_thread . joinable ( ) )
main_thread . join ( ) ;
if ( cleanup_thread . joinable ( ) )
cleanup_thread . join ( ) ;
2021-01-28 13:48:17 +00:00
worker_pool . reset ( ) ;
2017-04-13 13:42:29 +00:00
}
2021-01-25 18:59:23 +00:00
DDLWorker : : ~ DDLWorker ( )
{
shutdown ( ) ;
}
2017-04-13 16:12:56 +00:00
2020-11-13 18:35:45 +00:00
ZooKeeperPtr DDLWorker : : tryGetZooKeeper ( ) const
2019-02-22 16:14:48 +00:00
{
std : : lock_guard lock ( zookeeper_mutex ) ;
return current_zookeeper ;
}
2020-11-13 18:35:45 +00:00
ZooKeeperPtr DDLWorker : : getAndSetZooKeeper ( )
2019-02-22 16:14:48 +00:00
{
std : : lock_guard lock ( zookeeper_mutex ) ;
if ( ! current_zookeeper | | current_zookeeper - > expired ( ) )
current_zookeeper = context . getZooKeeper ( ) ;
return current_zookeeper ;
}
2020-09-02 22:35:47 +00:00
DDLTaskPtr DDLWorker : : initAndCheckTask ( const String & entry_name , String & out_reason , const ZooKeeperPtr & zookeeper )
2017-04-13 13:42:29 +00:00
{
2017-07-28 16:14:49 +00:00
String node_data ;
2020-12-03 17:53:55 +00:00
String entry_path = fs : : path ( queue_dir ) / entry_name ;
2017-08-01 14:41:00 +00:00
2020-11-27 14:04:03 +00:00
auto task = std : : make_unique < DDLTask > ( entry_name , entry_path ) ;
2020-11-19 10:34:45 +00:00
2017-08-01 14:41:00 +00:00
if ( ! zookeeper - > tryGet ( entry_path , node_data ) )
2017-07-28 16:14:49 +00:00
{
/// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
2017-08-12 20:00:00 +00:00
out_reason = " The task was deleted " ;
2020-09-02 22:35:47 +00:00
return { } ;
2017-07-28 16:14:49 +00:00
}
2017-04-13 16:12:56 +00:00
2020-11-29 11:45:32 +00:00
auto write_error_status = [ & ] ( const String & host_id , const String & error_message , const String & reason )
{
LOG_ERROR ( log , " Cannot parse DDL task {}: {}. Will try to send error status: {} " , entry_name , reason , error_message ) ;
createStatusDirs ( entry_path , zookeeper ) ;
2021-01-18 14:09:39 +00:00
zookeeper - > tryCreate ( fs : : path ( entry_path ) / " finished " / host_id , error_message , zkutil : : CreateMode : : Persistent ) ;
2020-11-29 11:45:32 +00:00
} ;
try
{
/// Stage 1: parse entry
task - > entry . parse ( node_data ) ;
}
catch ( . . . )
2017-07-28 16:14:49 +00:00
{
/// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
2019-01-22 19:56:53 +00:00
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
2019-02-28 07:27:30 +00:00
/// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
2017-08-12 20:00:00 +00:00
out_reason = " Incorrect task format " ;
2020-11-29 11:45:32 +00:00
write_error_status ( host_fqdn_id , ExecutionStatus : : fromCurrentException ( ) . serializeText ( ) , out_reason ) ;
2020-09-02 22:35:47 +00:00
return { } ;
2017-07-28 16:14:49 +00:00
}
2020-11-29 11:45:32 +00:00
/// Stage 2: resolve host_id and check if we should execute query or not
2020-11-27 14:04:03 +00:00
if ( ! task - > findCurrentHostID ( context , log ) )
2020-09-02 22:35:47 +00:00
{
2017-08-12 20:00:00 +00:00
out_reason = " There is no a local address in host list " ;
2020-09-02 22:35:47 +00:00
return { } ;
}
2017-08-01 14:41:00 +00:00
2020-11-29 11:45:32 +00:00
try
{
/// Stage 3.1: parse query
task - > parseQueryFromEntry ( context ) ;
/// Stage 3.2: check cluster and find the host in cluster
task - > setClusterInfo ( context , log ) ;
}
catch ( . . . )
{
out_reason = " Cannot parse query or obtain cluster info " ;
write_error_status ( task - > host_id_str , ExecutionStatus : : fromCurrentException ( ) . serializeText ( ) , out_reason ) ;
return { } ;
}
2020-11-27 14:04:03 +00:00
2020-12-03 18:14:27 +00:00
if ( zookeeper - > exists ( task - > getFinishedNodePath ( ) ) )
{
out_reason = " Task has been already processed " ;
return { } ;
}
2020-11-29 11:45:32 +00:00
/// Now task is ready for execution
2020-09-02 22:35:47 +00:00
return task ;
2017-07-28 16:14:49 +00:00
}
2017-07-27 11:30:27 +00:00
2017-08-02 14:42:35 +00:00
static void filterAndSortQueueNodes ( Strings & all_nodes )
{
all_nodes . erase ( std : : remove_if ( all_nodes . begin ( ) , all_nodes . end ( ) , [ ] ( const String & s ) { return ! startsWith ( s , " query- " ) ; } ) , all_nodes . end ( ) ) ;
std : : sort ( all_nodes . begin ( ) , all_nodes . end ( ) ) ;
}
2020-09-02 22:35:47 +00:00
void DDLWorker : : scheduleTasks ( )
2017-07-28 16:14:49 +00:00
{
2020-09-02 22:35:47 +00:00
LOG_DEBUG ( log , " Scheduling tasks " ) ;
2019-02-22 16:14:48 +00:00
auto zookeeper = tryGetZooKeeper ( ) ;
2017-07-27 11:30:27 +00:00
2020-12-04 20:12:32 +00:00
for ( auto & task : current_tasks )
{
/// Main thread of DDLWorker was restarted, probably due to lost connection with ZooKeeper.
/// We have some unfinished tasks. To avoid duplication of some queries, try to write execution status.
bool status_written = task - > ops . empty ( ) ;
bool task_still_exists = zookeeper - > exists ( task - > entry_path ) ;
if ( task - > was_executed & & ! status_written & & task_still_exists )
{
assert ( ! zookeeper - > exists ( task - > getFinishedNodePath ( ) ) ) ;
processTask ( * task ) ;
}
}
2019-02-22 18:36:36 +00:00
Strings queue_nodes = zookeeper - > getChildren ( queue_dir , nullptr , queue_updated_event ) ;
2017-08-02 14:42:35 +00:00
filterAndSortQueueNodes ( queue_nodes ) ;
2017-07-28 16:14:49 +00:00
if ( queue_nodes . empty ( ) )
2020-11-24 10:24:39 +00:00
{
LOG_TRACE ( log , " No tasks to schedule " ) ;
2017-07-28 16:14:49 +00:00
return ;
2020-11-24 10:24:39 +00:00
}
2017-07-26 19:31:32 +00:00
2020-12-03 18:14:27 +00:00
bool server_startup = current_tasks . empty ( ) ;
2020-12-04 20:12:32 +00:00
auto begin_node = queue_nodes . begin ( ) ;
2017-07-28 16:14:49 +00:00
2020-12-04 20:12:32 +00:00
if ( ! server_startup )
{
/// We will recheck status of last executed tasks. It's useful if main thread was just restarted.
auto & min_task = * std : : min_element ( current_tasks . begin ( ) , current_tasks . end ( ) ) ;
begin_node = std : : upper_bound ( queue_nodes . begin ( ) , queue_nodes . end ( ) , min_task - > entry_name ) ;
current_tasks . clear ( ) ;
}
2017-07-28 16:14:49 +00:00
2021-01-26 17:51:25 +00:00
assert ( current_tasks . empty ( ) ) ;
2020-11-19 10:34:45 +00:00
for ( auto it = begin_node ; it ! = queue_nodes . end ( ) & & ! stop_flag ; + + it )
2017-07-28 16:14:49 +00:00
{
String entry_name = * it ;
2020-11-24 10:24:39 +00:00
LOG_TRACE ( log , " Checking task {} " , entry_name ) ;
2017-07-28 16:14:49 +00:00
2020-09-02 22:35:47 +00:00
String reason ;
auto task = initAndCheckTask ( entry_name , reason , zookeeper ) ;
if ( ! task )
2017-07-26 19:31:32 +00:00
{
2020-09-02 22:35:47 +00:00
LOG_DEBUG ( log , " Will not execute task {}: {} " , entry_name , reason ) ;
continue ;
2017-07-26 19:31:32 +00:00
}
2020-12-03 18:14:27 +00:00
auto & saved_task = saveTask ( std : : move ( task ) ) ;
2017-04-17 17:04:31 +00:00
2020-12-03 18:14:27 +00:00
if ( worker_pool )
2017-04-17 17:04:31 +00:00
{
2020-12-03 18:14:27 +00:00
worker_pool - > scheduleOrThrowOnError ( [ this , & saved_task ] ( )
2020-11-19 10:34:45 +00:00
{
2020-12-03 18:14:27 +00:00
setThreadName ( " DDLWorkerExec " ) ;
processTask ( saved_task ) ;
} ) ;
2017-04-17 17:04:31 +00:00
}
2017-04-27 15:19:11 +00:00
else
{
2020-12-03 18:14:27 +00:00
processTask ( saved_task ) ;
2017-07-28 16:14:49 +00:00
}
2020-12-03 18:14:27 +00:00
}
}
2017-07-28 16:14:49 +00:00
2020-12-03 18:14:27 +00:00
DDLTaskBase & DDLWorker : : saveTask ( DDLTaskPtr & & task )
{
2021-01-28 13:48:17 +00:00
current_tasks . remove_if ( [ ] ( const DDLTaskPtr & t ) { return t - > completely_processed . load ( ) ; } ) ;
2021-01-26 17:51:25 +00:00
assert ( current_tasks . size ( ) < = pool_size ) ;
2020-12-03 18:14:27 +00:00
current_tasks . emplace_back ( std : : move ( task ) ) ;
return * current_tasks . back ( ) ;
2017-07-28 16:14:49 +00:00
}
2020-12-04 20:12:32 +00:00
bool DDLWorker : : tryExecuteQuery ( const String & query , DDLTaskBase & task )
2017-04-25 15:21:03 +00:00
{
2017-07-28 16:14:49 +00:00
/// Add special comment at the start of query to easily identify DDL-produced queries in query_log
2017-08-11 20:20:15 +00:00
String query_prefix = " /* ddl_entry= " + task . entry_name + " */ " ;
2017-07-27 18:44:55 +00:00
String query_to_execute = query_prefix + query ;
ReadBufferFromString istr ( query_to_execute ) ;
String dummy_string ;
WriteBufferFromString ostr ( dummy_string ) ;
2021-01-28 13:48:17 +00:00
std : : optional < CurrentThread : : QueryScope > query_scope ;
2017-07-27 18:44:55 +00:00
2017-04-25 15:21:03 +00:00
try
{
2020-11-27 14:04:03 +00:00
auto query_context = task . makeQueryContext ( context ) ;
2021-01-28 13:48:17 +00:00
query_scope . emplace ( * query_context ) ;
2020-11-27 14:04:03 +00:00
executeQuery ( istr , ostr , false , * query_context , { } ) ;
2021-01-28 19:02:39 +00:00
if ( auto txn = query_context - > getMetadataTransaction ( ) )
{
if ( txn - > state = = MetadataTransaction : : CREATED )
txn - > commit ( ) ;
}
2017-04-25 15:21:03 +00:00
}
2020-12-04 20:12:32 +00:00
catch ( const DB : : Exception & e )
{
task . execution_status = ExecutionStatus : : fromCurrentException ( ) ;
tryLogCurrentException ( log , " Query " + query + " wasn't finished successfully " ) ;
/// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
/// if replica has stopped being leader and we should retry query.
/// However, for the majority of exceptions there is no sense to retry, because most likely we will just
/// get the same exception again. So we return false only for several special exception codes,
/// and consider query as executed with status "failed" and return true in other cases.
bool no_sense_to_retry = e . code ( ) ! = ErrorCodes : : KEEPER_EXCEPTION & &
e . code ( ) ! = ErrorCodes : : NOT_A_LEADER & &
e . code ( ) ! = ErrorCodes : : CANNOT_ASSIGN_ALTER & &
e . code ( ) ! = ErrorCodes : : CANNOT_ALLOCATE_MEMORY & &
e . code ( ) ! = ErrorCodes : : MEMORY_LIMIT_EXCEEDED ;
return no_sense_to_retry ;
}
2017-04-25 15:21:03 +00:00
catch ( . . . )
{
2020-12-04 20:12:32 +00:00
task . execution_status = ExecutionStatus : : fromCurrentException ( ) ;
2017-07-27 18:44:55 +00:00
tryLogCurrentException ( log , " Query " + query + " wasn't finished successfully " ) ;
2017-04-25 15:21:03 +00:00
2020-12-04 20:12:32 +00:00
/// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
/// so we consider unknown exception as retryable error.
2017-04-25 15:21:03 +00:00
return false ;
}
2020-12-04 20:12:32 +00:00
task . execution_status = ExecutionStatus ( 0 ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Executed query: {} " , query ) ;
2017-04-25 15:21:03 +00:00
return true ;
}
2020-11-27 14:04:03 +00:00
void DDLWorker : : processTask ( DDLTaskBase & task )
2017-04-25 15:21:03 +00:00
{
2020-09-02 22:35:47 +00:00
auto zookeeper = tryGetZooKeeper ( ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Processing task {} ({}) " , task . entry_name , task . entry . query ) ;
2017-04-25 15:21:03 +00:00
2020-11-27 14:04:03 +00:00
String active_node_path = task . getActiveNodePath ( ) ;
String finished_node_path = task . getFinishedNodePath ( ) ;
2017-08-02 14:42:35 +00:00
2020-11-27 14:04:03 +00:00
String dummy ;
2020-12-04 20:12:32 +00:00
zookeeper - > createAncestors ( active_node_path ) ;
auto active_node = zkutil : : EphemeralNodeHolder : : create ( active_node_path , * zookeeper , " " ) ;
2018-11-06 14:42:30 +00:00
2020-12-04 20:12:32 +00:00
if ( ! task . was_executed )
2017-08-02 14:42:35 +00:00
{
2020-12-04 20:12:32 +00:00
/// If table and database engine supports it, they will execute task.ops by their own in a single transaction
/// with other zk operations (such as appending something to ReplicatedMergeTree log, or
/// updating metadata in Replicated database), so we make create request for finished_node_path with status "0",
/// which means that query executed successfully.
task . ops . emplace_back ( zkutil : : makeRemoveRequest ( active_node_path , - 1 ) ) ;
task . ops . emplace_back ( zkutil : : makeCreateRequest ( finished_node_path , " 0 " , zkutil : : CreateMode : : Persistent ) ) ;
2017-04-25 15:21:03 +00:00
2017-04-27 15:19:11 +00:00
try
{
2020-11-27 14:04:03 +00:00
String rewritten_query = queryToString ( task . query ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Executing query: {} " , rewritten_query ) ;
2017-04-25 15:21:03 +00:00
2020-11-27 14:04:03 +00:00
StoragePtr storage ;
if ( auto * query_with_table = dynamic_cast < ASTQueryWithTableAndOutput * > ( task . query . get ( ) ) ; query_with_table )
2017-04-27 15:19:11 +00:00
{
2020-01-09 16:01:44 +00:00
if ( ! query_with_table - > table . empty ( ) )
{
/// It's not CREATE DATABASE
2020-04-07 14:05:51 +00:00
auto table_id = context . tryResolveStorageID ( * query_with_table , Context : : ResolveOrdinary ) ;
2020-05-28 23:01:18 +00:00
storage = DatabaseCatalog : : instance ( ) . tryGetTable ( table_id , context ) ;
2020-01-09 16:01:44 +00:00
}
2019-06-26 14:52:20 +00:00
2020-11-27 14:04:03 +00:00
task . execute_on_leader = storage & & taskShouldBeExecutedOnLeader ( task . query , storage ) & & ! task . is_circular_replicated ;
2017-04-27 15:19:11 +00:00
}
2020-11-27 14:04:03 +00:00
if ( task . execute_on_leader )
2021-01-25 18:59:23 +00:00
{
2020-11-27 14:04:03 +00:00
tryExecuteQueryOnLeaderReplica ( task , storage , rewritten_query , task . entry_path , zookeeper ) ;
2021-01-25 18:59:23 +00:00
}
2017-04-27 15:19:11 +00:00
else
2021-01-25 18:59:23 +00:00
{
storage . reset ( ) ;
2020-12-04 20:12:32 +00:00
tryExecuteQuery ( rewritten_query , task ) ;
2021-01-25 18:59:23 +00:00
}
2017-04-27 15:19:11 +00:00
}
2018-08-25 01:58:14 +00:00
catch ( const Coordination : : Exception & )
2017-04-27 15:19:11 +00:00
{
throw ;
}
catch ( . . . )
{
2019-12-19 19:39:49 +00:00
tryLogCurrentException ( log , " An error occurred before execution of DDL task: " ) ;
task . execution_status = ExecutionStatus : : fromCurrentException ( " An error occurred before execution " ) ;
2017-04-27 15:19:11 +00:00
}
2020-12-04 20:12:32 +00:00
if ( task . execution_status . code ! = 0 )
{
bool status_written_by_table_or_db = task . ops . empty ( ) ;
if ( status_written_by_table_or_db )
{
throw Exception ( ErrorCodes : : UNFINISHED , " Unexpected error: {} " , task . execution_status . serializeText ( ) ) ;
}
else
{
/// task.ops where not executed by table or database engine, se DDLWorker is responsible for
/// writing query execution status into ZooKeeper.
task . ops . emplace_back ( zkutil : : makeSetRequest ( finished_node_path , task . execution_status . serializeText ( ) , - 1 ) ) ;
}
}
2020-08-08 01:01:47 +00:00
/// We need to distinguish ZK errors occurred before and after query executing
2017-07-28 16:14:49 +00:00
task . was_executed = true ;
2017-04-25 15:21:03 +00:00
}
2020-11-27 04:27:43 +00:00
{
DB : : ReadBufferFromString in ( task . entry_name ) ;
DB : : assertString ( " query- " , in ) ;
UInt64 id ;
readText ( id , in ) ;
auto prev_id = max_id . load ( std : : memory_order_relaxed ) ;
while ( prev_id < id )
{
if ( max_id . compare_exchange_weak ( prev_id , id ) )
{
CurrentMetrics : : set ( CurrentMetrics : : MaxDDLEntryID , id ) ;
break ;
}
}
}
2017-07-28 16:14:49 +00:00
/// FIXME: if server fails right here, the task will be executed twice. We need WAL here.
2020-12-04 20:12:32 +00:00
/// If ZooKeeper connection is lost here, we will try again to write query status.
2020-12-03 18:14:27 +00:00
2020-12-04 20:12:32 +00:00
bool status_written = task . ops . empty ( ) ;
if ( ! status_written )
{
zookeeper - > multi ( task . ops ) ;
active_node - > reset ( ) ;
task . ops . clear ( ) ;
}
2021-01-26 17:51:25 +00:00
task . completely_processed = true ;
2017-04-27 15:19:11 +00:00
}
2017-04-25 15:21:03 +00:00
2020-03-18 00:57:00 +00:00
bool DDLWorker : : taskShouldBeExecutedOnLeader ( const ASTPtr ast_ddl , const StoragePtr storage )
2017-04-27 15:19:11 +00:00
{
2019-06-26 14:52:20 +00:00
/// Pure DROP queries have to be executed on each node separately
2020-04-22 06:01:33 +00:00
if ( auto * query = ast_ddl - > as < ASTDropQuery > ( ) ; query & & query - > kind ! = ASTDropQuery : : Kind : : Truncate )
2019-06-26 14:52:20 +00:00
return false ;
2017-04-25 15:21:03 +00:00
2019-06-26 14:52:20 +00:00
if ( ! ast_ddl - > as < ASTAlterQuery > ( ) & & ! ast_ddl - > as < ASTOptimizeQuery > ( ) & & ! ast_ddl - > as < ASTDropQuery > ( ) )
return false ;
2017-04-25 15:21:03 +00:00
2021-01-06 12:05:19 +00:00
if ( auto * alter = ast_ddl - > as < ASTAlterQuery > ( ) )
{
// Setting alters should be executed on all replicas
if ( alter - > isSettingsAlter ( ) )
return false ;
2021-01-26 17:51:25 +00:00
if ( alter - > isFreezeAlter ( ) )
return false ;
2021-01-06 12:05:19 +00:00
}
2019-06-26 14:52:20 +00:00
return storage - > supportsReplication ( ) ;
}
2017-04-25 15:21:03 +00:00
2019-06-26 14:52:20 +00:00
bool DDLWorker : : tryExecuteQueryOnLeaderReplica (
2020-11-27 14:04:03 +00:00
DDLTaskBase & task ,
2019-06-26 14:52:20 +00:00
StoragePtr storage ,
const String & rewritten_query ,
2020-11-27 14:04:03 +00:00
const String & /*node_path*/ ,
2019-06-26 14:52:20 +00:00
const ZooKeeperPtr & zookeeper )
{
StorageReplicatedMergeTree * replicated_storage = dynamic_cast < StorageReplicatedMergeTree * > ( storage . get ( ) ) ;
/// If we will develop new replicated storage
if ( ! replicated_storage )
2020-12-04 20:12:32 +00:00
throw Exception ( ErrorCodes : : LOGICAL_ERROR , " Storage type '{}' is not supported by distributed DDL " , storage - > getName ( ) ) ;
2017-04-25 15:21:03 +00:00
2020-11-27 14:04:03 +00:00
String shard_path = task . getShardNodePath ( ) ;
2020-12-03 17:53:55 +00:00
String is_executed_path = fs : : path ( shard_path ) / " executed " ;
String tries_to_execute_path = fs : : path ( shard_path ) / " tries_to_execute " ;
zookeeper - > createAncestors ( fs : : path ( shard_path ) / " " ) ; /* appends "/" at the end of shard_path */
2017-04-25 15:21:03 +00:00
2020-12-04 20:12:32 +00:00
/// Leader replica creates is_executed_path node on successful query execution.
/// We will remove create_shard_flag from zk operations list, if current replica is just waiting for leader to execute the query.
auto create_shard_flag = zkutil : : makeCreateRequest ( is_executed_path , task . host_id_str , zkutil : : CreateMode : : Persistent ) ;
2020-08-07 09:18:34 +00:00
/// Node exists, or we will create or we will get an exception
zookeeper - > tryCreate ( tries_to_execute_path , " 0 " , zkutil : : CreateMode : : Persistent ) ;
2018-12-21 17:28:21 +00:00
2020-08-07 09:18:34 +00:00
static constexpr int MAX_TRIES_TO_EXECUTE = 3 ;
2020-11-16 12:34:12 +00:00
static constexpr int MAX_EXECUTION_TIMEOUT_SEC = 3600 ;
2020-08-07 09:18:34 +00:00
String executed_by ;
zkutil : : EventPtr event = std : : make_shared < Poco : : Event > ( ) ;
2020-11-16 12:34:12 +00:00
/// We must use exists request instead of get, because zookeeper will not setup event
/// for non existing node after get request
if ( zookeeper - > exists ( is_executed_path , nullptr , event ) )
2020-08-07 09:18:34 +00:00
{
2020-11-16 12:34:12 +00:00
LOG_DEBUG ( log , " Task {} has already been executed by replica ({}) of the same shard. " , task . entry_name , zookeeper - > get ( is_executed_path ) ) ;
2020-08-07 09:18:34 +00:00
return true ;
}
2018-12-21 17:28:21 +00:00
2019-06-26 14:52:20 +00:00
pcg64 rng ( randomSeed ( ) ) ;
2018-12-21 17:28:21 +00:00
2019-06-26 14:52:20 +00:00
auto lock = createSimpleZooKeeperLock ( zookeeper , shard_path , " lock " , task . host_id_str ) ;
2020-08-07 09:18:34 +00:00
2020-11-16 12:34:12 +00:00
Stopwatch stopwatch ;
2020-12-04 20:12:32 +00:00
bool executed_by_us = false ;
bool executed_by_other_leader = false ;
2020-11-16 12:34:12 +00:00
/// Defensive programming. One hour is more than enough to execute almost all DDL queries.
/// If it will be very long query like ALTER DELETE for a huge table it's still will be executed,
/// but DDL worker can continue processing other queries.
while ( stopwatch . elapsedSeconds ( ) < = MAX_EXECUTION_TIMEOUT_SEC )
2019-06-26 14:52:20 +00:00
{
StorageReplicatedMergeTree : : Status status ;
replicated_storage - > getStatus ( status ) ;
2017-04-25 15:21:03 +00:00
2020-08-07 09:18:34 +00:00
/// Any replica which is leader tries to take lock
2019-06-26 14:52:20 +00:00
if ( status . is_leader & & lock - > tryLock ( ) )
{
2020-08-24 09:07:37 +00:00
/// In replicated merge tree we can have multiple leaders. So we can
2020-11-16 12:34:12 +00:00
/// be "leader" and took lock, but another "leader" replica may have
/// already executed this task.
2020-08-24 09:07:37 +00:00
if ( zookeeper - > tryGet ( is_executed_path , executed_by ) )
{
LOG_DEBUG ( log , " Task {} has already been executed by replica ({}) of the same shard. " , task . entry_name , executed_by ) ;
2020-12-04 20:12:32 +00:00
executed_by_other_leader = true ;
2020-08-24 09:07:37 +00:00
break ;
}
2020-11-16 12:34:12 +00:00
/// Checking and incrementing counter exclusively.
2020-08-07 09:18:34 +00:00
size_t counter = parse < int > ( zookeeper - > get ( tries_to_execute_path ) ) ;
if ( counter > MAX_TRIES_TO_EXECUTE )
2019-06-26 14:52:20 +00:00
break ;
2020-08-24 09:07:37 +00:00
2020-08-07 09:18:34 +00:00
zookeeper - > set ( tries_to_execute_path , toString ( counter + 1 ) ) ;
2017-04-25 15:21:03 +00:00
2020-12-04 20:12:32 +00:00
task . ops . push_back ( create_shard_flag ) ;
SCOPE_EXIT ( { if ( ! executed_by_us & & ! task . ops . empty ( ) ) task . ops . pop_back ( ) ; } ) ;
2019-06-26 14:52:20 +00:00
/// If the leader will unexpectedly changed this method will return false
/// and on the next iteration new leader will take lock
2020-12-04 20:12:32 +00:00
if ( tryExecuteQuery ( rewritten_query , task ) )
2019-06-26 14:52:20 +00:00
{
2020-12-04 20:12:32 +00:00
executed_by_us = true ;
2019-06-26 14:52:20 +00:00
break ;
2017-04-25 15:21:03 +00:00
}
2020-08-07 09:18:34 +00:00
lock - > unlock ( ) ;
2018-04-19 19:25:54 +00:00
}
2019-06-26 14:52:20 +00:00
2020-11-16 12:34:12 +00:00
/// Waiting for someone who will execute query and change is_executed_path node
2020-08-07 09:18:34 +00:00
if ( event - > tryWait ( std : : uniform_int_distribution < int > ( 0 , 1000 ) ( rng ) ) )
{
2020-09-30 14:58:27 +00:00
LOG_DEBUG ( log , " Task {} has already been executed by replica ({}) of the same shard. " , task . entry_name , zookeeper - > get ( is_executed_path ) ) ;
2020-12-04 20:12:32 +00:00
executed_by_other_leader = true ;
2020-08-07 09:18:34 +00:00
break ;
}
2020-11-16 12:34:12 +00:00
else
2020-08-07 09:18:34 +00:00
{
2020-11-16 12:34:12 +00:00
String tries_count ;
zookeeper - > tryGet ( tries_to_execute_path , tries_count ) ;
if ( parse < int > ( tries_count ) > MAX_TRIES_TO_EXECUTE )
{
/// Nobody will try to execute query again
LOG_WARNING ( log , " Maximum retries count for task {} exceeded, cannot execute replicated DDL query " , task . entry_name ) ;
break ;
}
else
{
/// Will try to wait or execute
LOG_TRACE ( log , " Task {} still not executed, will try to wait for it or execute ourselves, tries count {} " , task . entry_name , tries_count ) ;
}
2020-08-07 09:18:34 +00:00
}
2017-04-25 15:21:03 +00:00
}
2019-06-26 14:52:20 +00:00
2020-12-04 20:12:32 +00:00
assert ( ! ( executed_by_us & & executed_by_other_leader ) ) ;
2019-06-26 14:52:20 +00:00
/// Not executed by leader so was not executed at all
2020-12-04 20:12:32 +00:00
if ( ! executed_by_us & & ! executed_by_other_leader )
2017-04-25 15:21:03 +00:00
{
2020-11-16 12:34:12 +00:00
/// If we failed with timeout
if ( stopwatch . elapsedSeconds ( ) > = MAX_EXECUTION_TIMEOUT_SEC )
{
LOG_WARNING ( log , " Task {} was not executed by anyone, maximum timeout {} seconds exceeded " , task . entry_name , MAX_EXECUTION_TIMEOUT_SEC ) ;
2020-11-17 11:04:27 +00:00
task . execution_status = ExecutionStatus ( ErrorCodes : : TIMEOUT_EXCEEDED , " Cannot execute replicated DDL query, timeout exceeded " ) ;
2020-11-16 12:34:12 +00:00
}
else /// If we exceeded amount of tries
{
LOG_WARNING ( log , " Task {} was not executed by anyone, maximum number of retries exceeded " , task . entry_name ) ;
task . execution_status = ExecutionStatus ( ErrorCodes : : UNFINISHED , " Cannot execute replicated DDL query, maximum retires exceeded " ) ;
}
2019-06-26 14:52:20 +00:00
return false ;
2017-04-25 15:21:03 +00:00
}
2020-08-07 09:18:34 +00:00
2020-12-04 20:12:32 +00:00
if ( executed_by_us )
LOG_DEBUG ( log , " Task {} executed by current replica " , task . entry_name ) ;
else // if (executed_by_other_leader)
LOG_DEBUG ( log , " Task {} has already been executed by replica ({}) of the same shard. " , task . entry_name , zookeeper - > get ( is_executed_path ) ) ;
2019-06-26 14:52:20 +00:00
return true ;
2017-04-25 15:21:03 +00:00
}
2019-02-22 18:36:36 +00:00
void DDLWorker : : cleanupQueue ( Int64 current_time_seconds , const ZooKeeperPtr & zookeeper )
2017-04-19 14:21:27 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Cleaning queue " ) ;
2017-04-27 15:19:11 +00:00
2017-08-02 14:42:35 +00:00
Strings queue_nodes = zookeeper - > getChildren ( queue_dir ) ;
filterAndSortQueueNodes ( queue_nodes ) ;
size_t num_outdated_nodes = ( queue_nodes . size ( ) > max_tasks_in_queue ) ? queue_nodes . size ( ) - max_tasks_in_queue : 0 ;
auto first_non_outdated_node = queue_nodes . begin ( ) + num_outdated_nodes ;
2017-04-19 14:21:27 +00:00
2017-08-02 14:42:35 +00:00
for ( auto it = queue_nodes . cbegin ( ) ; it < queue_nodes . cend ( ) ; + + it )
2017-04-19 14:21:27 +00:00
{
2019-02-22 18:36:36 +00:00
if ( stop_flag )
return ;
2017-08-02 14:42:35 +00:00
String node_name = * it ;
2020-12-03 17:53:55 +00:00
String node_path = fs : : path ( queue_dir ) / node_name ;
String lock_path = fs : : path ( node_path ) / " lock " ;
2017-08-01 14:41:00 +00:00
2018-08-25 01:58:14 +00:00
Coordination : : Stat stat ;
2017-08-02 14:42:35 +00:00
String dummy ;
2017-08-01 14:41:00 +00:00
2017-04-19 14:21:27 +00:00
try
{
2017-08-12 17:39:14 +00:00
/// Already deleted
if ( ! zookeeper - > exists ( node_path , & stat ) )
continue ;
2020-01-11 09:50:41 +00:00
/// Delete node if its lifetime is expired (according to task_max_lifetime parameter)
2019-02-22 18:36:36 +00:00
constexpr UInt64 zookeeper_time_resolution = 1000 ;
2017-08-13 09:27:38 +00:00
Int64 zookeeper_time_seconds = stat . ctime / zookeeper_time_resolution ;
2017-08-12 17:39:14 +00:00
bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < current_time_seconds ;
/// If too many nodes in task queue (> max_tasks_in_queue), delete oldest one
bool node_is_outside_max_window = it < first_non_outdated_node ;
if ( ! node_lifetime_is_expired & & ! node_is_outside_max_window )
continue ;
/// Skip if there are active nodes (it is weak guard)
2020-12-03 17:53:55 +00:00
if ( zookeeper - > exists ( fs : : path ( node_path ) / " active " , & stat ) & & stat . numChildren > 0 )
2017-08-12 17:39:14 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Task {} should be deleted, but there are active workers. Skipping it. " , node_name ) ;
2017-08-12 17:39:14 +00:00
continue ;
}
2017-08-10 19:12:52 +00:00
/// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners)
/// But the lock will be required to implement system.distributed_ddl_queue table
2017-08-02 14:42:35 +00:00
auto lock = createSimpleZooKeeperLock ( zookeeper , node_path , " lock " , host_fqdn_id ) ;
if ( ! lock - > tryLock ( ) )
2017-08-12 17:39:14 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Task {} should be deleted, but it is locked. Skipping it. " , node_name ) ;
2017-08-02 14:42:35 +00:00
continue ;
2017-08-12 17:39:14 +00:00
}
2017-08-02 14:42:35 +00:00
2017-08-12 17:39:14 +00:00
if ( node_lifetime_is_expired )
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Lifetime of task {} is expired, deleting it " , node_name ) ;
2017-08-12 17:39:14 +00:00
else if ( node_is_outside_max_window )
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Task {} is outdated, deleting it " , node_name ) ;
2017-08-12 17:39:14 +00:00
/// Deleting
2017-08-01 14:41:00 +00:00
{
2020-08-08 01:01:47 +00:00
Strings children = zookeeper - > getChildren ( node_path ) ;
for ( const String & child : children )
2017-08-01 14:41:00 +00:00
{
2017-08-02 14:42:35 +00:00
if ( child ! = " lock " )
2020-12-03 17:53:55 +00:00
zookeeper - > tryRemoveRecursive ( fs : : path ( node_path ) / child ) ;
2017-08-01 14:41:00 +00:00
}
2017-04-19 14:21:27 +00:00
2017-08-02 14:42:35 +00:00
/// Remove the lock node and its parent atomically
2018-08-25 01:58:14 +00:00
Coordination : : Requests ops ;
2018-03-24 00:45:04 +00:00
ops . emplace_back ( zkutil : : makeRemoveRequest ( lock_path , - 1 ) ) ;
ops . emplace_back ( zkutil : : makeRemoveRequest ( node_path , - 1 ) ) ;
2017-08-02 14:42:35 +00:00
zookeeper - > multi ( ops ) ;
2017-04-19 14:21:27 +00:00
}
}
catch ( . . . )
{
2020-08-08 01:01:47 +00:00
LOG_INFO ( log , " An error occurred while checking and cleaning task {} from queue: {} " , node_name , getCurrentExceptionMessage ( false ) ) ;
2017-04-19 14:21:27 +00:00
}
}
}
2017-04-13 16:12:56 +00:00
2017-08-11 20:20:15 +00:00
/// Try to create nonexisting "status" dirs for a node
2019-02-22 16:14:48 +00:00
void DDLWorker : : createStatusDirs ( const std : : string & node_path , const ZooKeeperPtr & zookeeper )
2017-04-13 13:42:29 +00:00
{
2018-08-25 01:58:14 +00:00
Coordination : : Requests ops ;
2018-03-24 00:45:04 +00:00
{
2018-08-25 01:58:14 +00:00
Coordination : : CreateRequest request ;
2020-12-03 17:53:55 +00:00
request . path = fs : : path ( node_path ) / " active " ;
2018-08-25 01:58:14 +00:00
ops . emplace_back ( std : : make_shared < Coordination : : CreateRequest > ( std : : move ( request ) ) ) ;
2018-03-24 00:45:04 +00:00
}
{
2018-08-25 01:58:14 +00:00
Coordination : : CreateRequest request ;
2020-12-03 17:53:55 +00:00
request . path = fs : : path ( node_path ) / " finished " ;
2018-08-25 01:58:14 +00:00
ops . emplace_back ( std : : make_shared < Coordination : : CreateRequest > ( std : : move ( request ) ) ) ;
2018-03-24 00:45:04 +00:00
}
2018-08-25 01:58:14 +00:00
Coordination : : Responses responses ;
2020-06-12 15:09:12 +00:00
Coordination : : Error code = zookeeper - > tryMulti ( ops , responses ) ;
if ( code ! = Coordination : : Error : : ZOK
& & code ! = Coordination : : Error : : ZNODEEXISTS )
2018-08-25 01:58:14 +00:00
throw Coordination : : Exception ( code ) ;
2017-04-17 17:04:31 +00:00
}
2017-04-13 13:42:29 +00:00
2017-04-18 15:44:31 +00:00
String DDLWorker : : enqueueQuery ( DDLLogEntry & entry )
2017-04-13 16:12:56 +00:00
{
2020-11-27 14:04:03 +00:00
if ( entry . hosts . empty ( ) )
2017-08-02 14:42:35 +00:00
throw Exception ( " Empty host list in a distributed DDL task " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-13 16:12:56 +00:00
2019-02-22 16:14:48 +00:00
auto zookeeper = getAndSetZooKeeper ( ) ;
2020-12-03 17:53:55 +00:00
String query_path_prefix = fs : : path ( queue_dir ) / " query- " ;
2017-04-17 17:04:31 +00:00
zookeeper - > createAncestors ( query_path_prefix ) ;
2017-04-13 16:12:56 +00:00
2020-11-27 14:04:03 +00:00
String node_path = zookeeper - > create ( query_path_prefix , entry . toString ( ) , zkutil : : CreateMode : : PersistentSequential ) ;
2017-08-02 14:42:35 +00:00
2020-12-03 18:14:27 +00:00
/// We cannot create status dirs in a single transaction with previous request,
/// because we don't know node_path until previous request is executed.
/// Se we try to create status dirs here or later when we will execute entry.
2017-08-02 14:42:35 +00:00
try
{
2019-02-22 16:14:48 +00:00
createStatusDirs ( node_path , zookeeper ) ;
2017-08-02 14:42:35 +00:00
}
catch ( . . . )
{
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " An error occurred while creating auxiliary ZooKeeper directories in {} . They will be created later. Error : {} " , node_path , getCurrentExceptionMessage ( true ) ) ;
2017-08-02 14:42:35 +00:00
}
2017-04-18 15:44:31 +00:00
return node_path ;
2017-04-13 13:42:29 +00:00
}
2017-04-13 16:12:56 +00:00
2020-12-03 18:14:27 +00:00
void DDLWorker : : initializeMainThread ( )
2017-04-13 13:42:29 +00:00
{
2021-01-26 17:51:25 +00:00
assert ( ! initialized ) ;
assert ( max_id = = 0 ) ;
assert ( current_tasks . empty ( ) ) ;
2017-04-27 15:19:11 +00:00
setThreadName ( " DDLWorker " ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Started DDLWorker thread " ) ;
2017-04-18 15:44:31 +00:00
2021-01-26 17:51:25 +00:00
while ( ! stop_flag )
2017-07-31 18:57:13 +00:00
{
2017-08-01 14:41:00 +00:00
try
{
2019-12-27 19:30:13 +00:00
auto zookeeper = getAndSetZooKeeper ( ) ;
2020-12-03 17:53:55 +00:00
zookeeper - > createAncestors ( fs : : path ( queue_dir ) / " " ) ;
2019-12-27 19:30:13 +00:00
initialized = true ;
2021-01-26 17:51:25 +00:00
return ;
2019-12-27 19:30:13 +00:00
}
catch ( const Coordination : : Exception & e )
{
if ( ! Coordination : : isHardwareError ( e . code ) )
2020-12-21 02:05:23 +00:00
{
/// A logical error.
2020-12-30 12:25:00 +00:00
LOG_ERROR ( log , " ZooKeeper error: {}. Failed to start DDLWorker. " , getCurrentExceptionMessage ( true ) ) ;
assert ( false ) ; /// Catch such failures in tests with debug build
2020-12-21 02:05:23 +00:00
}
2018-11-10 17:52:25 +00:00
2019-12-27 19:30:13 +00:00
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2017-08-01 14:41:00 +00:00
}
catch ( . . . )
{
2020-12-30 12:25:00 +00:00
tryLogCurrentException ( log , " Cannot initialize DDL queue. " ) ;
2017-08-01 14:41:00 +00:00
}
2021-01-26 17:51:25 +00:00
/// Avoid busy loop when ZooKeeper is not available.
sleepForSeconds ( 5 ) ;
2019-02-22 18:36:36 +00:00
}
2020-12-03 18:14:27 +00:00
}
void DDLWorker : : runMainThread ( )
{
2021-01-26 17:51:25 +00:00
auto reset_state = [ & ] ( )
2021-01-18 14:09:39 +00:00
{
initialized = false ;
/// It will wait for all threads in pool to finish and will not rethrow exceptions (if any).
/// We create new thread pool to forget previous exceptions.
2021-01-26 17:51:25 +00:00
if ( 1 < pool_size )
2021-01-18 14:09:39 +00:00
worker_pool = std : : make_unique < ThreadPool > ( pool_size ) ;
/// Clear other in-memory state, like server just started.
current_tasks . clear ( ) ;
max_id = 0 ;
} ;
2020-12-03 18:14:27 +00:00
setThreadName ( " DDLWorker " ) ;
LOG_DEBUG ( log , " Starting DDLWorker thread " ) ;
2017-08-01 14:41:00 +00:00
2017-04-13 13:42:29 +00:00
while ( ! stop_flag )
{
try
{
2020-12-03 18:14:27 +00:00
/// Reinitialize DDLWorker state (including ZooKeeper connection) if required
if ( ! initialized )
{
initializeMainThread ( ) ;
LOG_DEBUG ( log , " Initialized DDLWorker thread " ) ;
}
2018-09-28 15:30:03 +00:00
2019-02-22 18:36:36 +00:00
cleanup_event - > set ( ) ;
2020-09-02 22:35:47 +00:00
scheduleTasks ( ) ;
2017-04-13 13:42:29 +00:00
2020-12-03 18:14:27 +00:00
LOG_DEBUG ( log , " Waiting for queue updates " ) ;
2019-02-22 18:36:36 +00:00
queue_updated_event - > wait ( ) ;
2017-04-19 14:21:27 +00:00
}
2018-09-24 18:44:09 +00:00
catch ( const Coordination : : Exception & e )
2017-04-27 15:19:11 +00:00
{
2018-08-25 01:58:14 +00:00
if ( Coordination : : isHardwareError ( e . code ) )
2017-07-28 16:14:49 +00:00
{
2020-12-03 18:14:27 +00:00
initialized = false ;
2021-01-26 17:51:25 +00:00
LOG_INFO ( log , " Lost ZooKeeper connection, will try to connect again: {} " , getCurrentExceptionMessage ( true ) ) ;
2017-07-28 16:14:49 +00:00
}
2020-06-12 15:09:12 +00:00
else if ( e . code = = Coordination : : Error : : ZNONODE )
2019-05-30 17:25:29 +00:00
{
2020-12-03 18:14:27 +00:00
// TODO add comment: when it happens and why it's expected?
// maybe because cleanup thread may remove nodes inside queue entry which are currently processed
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " ZooKeeper error: {} " , getCurrentExceptionMessage ( true ) ) ;
2019-05-30 17:25:29 +00:00
}
2017-07-28 16:14:49 +00:00
else
{
2021-01-26 17:51:25 +00:00
LOG_ERROR ( log , " Unexpected ZooKeeper error, will try to restart main thread: {} " , getCurrentExceptionMessage ( true ) ) ;
2020-12-30 12:25:00 +00:00
reset_state ( ) ;
2017-07-28 16:14:49 +00:00
}
2021-01-26 17:51:25 +00:00
sleepForSeconds ( 1 ) ;
2017-04-27 15:19:11 +00:00
}
2017-04-19 14:21:27 +00:00
catch ( . . . )
{
2020-12-03 18:14:27 +00:00
tryLogCurrentException ( log , " Unexpected error, will try to restart main thread: " ) ;
2020-12-30 12:25:00 +00:00
reset_state ( ) ;
2021-01-25 18:59:23 +00:00
sleepForSeconds ( 5 ) ;
2017-04-19 14:21:27 +00:00
}
2017-04-13 13:42:29 +00:00
}
}
2017-04-13 16:12:56 +00:00
2019-02-22 18:36:36 +00:00
void DDLWorker : : runCleanupThread ( )
{
setThreadName ( " DDLWorkerClnr " ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Started DDLWorker cleanup thread " ) ;
2019-02-22 18:36:36 +00:00
Int64 last_cleanup_time_seconds = 0 ;
while ( ! stop_flag )
{
try
{
cleanup_event - > wait ( ) ;
if ( stop_flag )
break ;
Int64 current_time_seconds = Poco : : Timestamp ( ) . epochTime ( ) ;
if ( last_cleanup_time_seconds & & current_time_seconds < last_cleanup_time_seconds + cleanup_delay_period )
{
2020-05-23 22:24:01 +00:00
LOG_TRACE ( log , " Too early to clean queue, will do it later. " ) ;
2019-02-22 18:36:36 +00:00
continue ;
}
2020-12-03 18:14:27 +00:00
/// ZooKeeper connection is recovered by main thread. We will wait for it on cleanup_event.
2019-02-22 18:36:36 +00:00
auto zookeeper = tryGetZooKeeper ( ) ;
if ( zookeeper - > expired ( ) )
continue ;
cleanupQueue ( current_time_seconds , zookeeper ) ;
last_cleanup_time_seconds = current_time_seconds ;
}
catch ( . . . )
{
tryLogCurrentException ( log , __PRETTY_FUNCTION__ ) ;
}
}
}
2017-04-13 13:42:29 +00:00
}