Merge pull request #73578 from ClickHouse/backport/24.10/73517

Backport #73517 to 24.10: Fix race in `MergeTreeIndexVectorSimilarity`
This commit is contained in:
Antonio Andelic 2024-12-19 11:05:49 +01:00 committed by GitHub
commit a89bdc375c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -7,6 +7,7 @@
#include <Common/formatReadable.h>
#include <Common/getNumberOfCPUCoresToUse.h>
#include <Common/logger_useful.h>
#include <Common/threadPoolCallbackRunner.h>
#include <Common/typeid_cast.h>
#include <Core/Field.h>
#include <Core/ServerSettings.h>
@ -292,16 +293,9 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
/// indexes are build simultaneously (e.g. multiple merges run at the same time).
auto & thread_pool = Context::getGlobalContextInstance()->getBuildVectorSimilarityIndexThreadPool();
auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row, ThreadGroupPtr thread_group)
ThreadPoolCallbackRunnerLocal<void> runner(thread_pool, "VectorSimIndex");
auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row)
{
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachFromGroupIfNotDetached();
);
if (thread_group)
CurrentThread::attachToGroupIfDetached(thread_group);
/// add is thread-safe
auto result = index->add(key, &column_array_data_float_data[column_array_offsets[row - 1]]);
if (!result)
@ -319,11 +313,10 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
for (size_t row = 0; row < rows; ++row)
{
auto key = static_cast<USearchIndex::vector_key_t>(index_size + row);
auto task = [group = CurrentThread::getGroup(), &add_vector_to_index, key, row] { add_vector_to_index(key, row, group); };
thread_pool.scheduleOrThrowOnError(task);
runner([&add_vector_to_index, key, row] { add_vector_to_index(key, row); });
}
thread_pool.wait();
runner.waitForAllToFinishAndRethrowFirstError();
}
}