Fix a race between Distributed table creation and INSERT into it

Initializing queues for pending on-disk files for async INSERT cannot be
done after table had been attached and visible to user, since it
initializes the per-table counter, that is used during INSERT.

Now there is a window, when this counter is not initialized and it will
start from the beginning, and this could lead to CANNOT_LINK error:

    Destination file /data/clickhouse/data/urls_v1/urls_in/shard6_replica1/13129817.bin is already exist and have different inode

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-01-23 09:51:32 +01:00
parent c3d545b64a
commit 51019bc9f3
2 changed files with 5 additions and 3 deletions

View File

@ -396,6 +396,8 @@ StorageDistributed::StorageDistributed(
if (num_local_shards && (remote_database.empty() || remote_database == id_.database_name) && remote_table == id_.table_name)
throw Exception("Distributed table " + id_.table_name + " looks at itself", ErrorCodes::INFINITE_LOOP);
}
initializeFromDisk();
}
@ -1084,8 +1086,7 @@ void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_co
setInMemoryMetadata(new_metadata);
}
void StorageDistributed::startup()
void StorageDistributed::initializeFromDisk()
{
if (!storage_policy)
return;
@ -1134,6 +1135,7 @@ void StorageDistributed::shutdown()
cluster_nodes_data.clear();
LOG_DEBUG(log, "Background threads for async INSERT joined");
}
void StorageDistributed::drop()
{
// Some INSERT in-between shutdown() and drop() can call

View File

@ -133,7 +133,7 @@ public:
/// the structure of the sub-table is not checked
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
void startup() override;
void initializeFromDisk();
void shutdown() override;
void flush() override;
void drop() override;