mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Speedup insert data with vector similarity index by add data to index parallel
This commit is contained in:
parent
26f21a0d94
commit
cf12e3924f
@ -178,6 +178,9 @@
|
||||
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
|
||||
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
|
||||
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
|
||||
M(UsearchUpdateThreads, "Number of threads in the Aggregator thread pool.") \
|
||||
M(UsearchUpdateThreadsActive, "Number of threads in the Aggregator thread pool running a task.") \
|
||||
M(UsearchUpdateThreadsScheduled, "Number of queued or active jobs in the Aggregator thread pool.") \
|
||||
\
|
||||
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
|
||||
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
|
||||
|
@ -4,16 +4,20 @@
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event USearchAddCount;
|
||||
@ -24,6 +28,13 @@ namespace ProfileEvents
|
||||
extern const Event USearchSearchComputedDistances;
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric UsearchUpdateThreads;
|
||||
extern const Metric UsearchUpdateThreadsActive;
|
||||
extern const Metric UsearchUpdateThreadsScheduled;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -273,17 +284,42 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
|
||||
if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows)))
|
||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
|
||||
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
size_t max_threads = Context::getGlobalContextInstance()->getSettingsRef().max_threads;
|
||||
max_threads = max_threads > 0 ? max_threads : getNumberOfPhysicalCPUCores();
|
||||
|
||||
auto thread_pool = std::make_unique<ThreadPool>(
|
||||
CurrentMetrics::UsearchUpdateThreads,
|
||||
CurrentMetrics::UsearchUpdateThreadsActive,
|
||||
CurrentMetrics::UsearchUpdateThreadsScheduled,
|
||||
max_threads);
|
||||
|
||||
auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row, ThreadGroupPtr thread_group)
|
||||
{
|
||||
if (auto result = index->add(static_cast<USearchIndex::vector_key_t>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
|
||||
SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached(););
|
||||
|
||||
if (thread_group)
|
||||
CurrentThread::attachToGroupIfDetached(thread_group);
|
||||
|
||||
if (auto result = index->add(key, &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::USearchAddCount);
|
||||
ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
|
||||
ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
|
||||
}
|
||||
};
|
||||
|
||||
size_t current_index_size = index->size();
|
||||
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
auto key = static_cast<USearchIndex::vector_key_t>(current_index_size + row);
|
||||
auto task = [group = CurrentThread::getGroup(), &add_vector_to_index, key, row] { add_vector_to_index(key, row, group); };
|
||||
thread_pool->scheduleOrThrowOnError(task);
|
||||
}
|
||||
thread_pool->wait();
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user