Merge a2f9329e18 into 733c57dae7

2024-09-19 16:20:50 +00:00 · 2024-09-16 06:05:55 +00:00
32 changed files with 51 additions and 328 deletions
--- a/.github/actions/debug/action.yml
+++ b/.github/actions/debug/action.yml
@ -4,31 +4,15 @@ description: Prints workflow debug info
 runs:
  using: "composite"
  steps:
-    - name: Envs, event.json and contexts
+    - name: Print envs
      shell: bash
      run: |
-          echo '::group::Environment variables'
-          env | sort
-          echo '::endgroup::'
-
-          echo '::group::event.json'
+          echo "::group::Envs"
+          env
+          echo "::endgroup::"
+    - name: Print Event.json
+      shell: bash
+      run: |
+          echo "::group::Event.json"
          python3 -m json.tool "$GITHUB_EVENT_PATH"
-          echo '::endgroup::'
-
-          cat << 'EOF'
-          ::group::github context
-          ${{ toJSON(github) }}
-          ::endgroup::
-
-          ::group::env context
-          ${{ toJSON(env) }}
-          ::endgroup::
-
-          ::group::runner context
-          ${{ toJSON(runner) }}
-          ::endgroup::
-
-          ::group::job context
-          ${{ toJSON(job) }}
-          ::endgroup::
-          EOF
+          echo "::endgroup::"
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -27,8 +27,6 @@ jobs:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Labels check
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
--- a/.github/workflows/cherry_pick.yml
+++ b/.github/workflows/cherry_pick.yml
@ -33,8 +33,6 @@ jobs:
          clear-repository: true
          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
          fetch-depth: 0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Cherry pick
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -56,13 +56,13 @@ jobs:
      GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
    runs-on: [self-hosted, release-maker]
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
          fetch-depth: 0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Prepare Release Info
        shell: bash
        run: |
--- a/.github/workflows/docker_test_images.yml
+++ b/.github/workflows/docker_test_images.yml
@ -11,7 +11,6 @@ name: Build docker images
        required: false
        type: boolean
        default: false
-
 jobs:
  DockerBuildAarch64:
    runs-on: [self-hosted, style-checker-aarch64]
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@ -8,28 +8,27 @@ on: # yamllint disable-line rule:truthy
  schedule:
    - cron: '0 */6 * * *'
  workflow_dispatch:
-
 jobs:
  RunConfig:
    runs-on: [self-hosted, style-checker-aarch64]
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: PrepareRunConfig
        id: runconfig
        run: |
          echo "::group::configure CI run"
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --workflow "$GITHUB_WORKFLOW" --outfile ${{ runner.temp }}/ci_run_data.json
          echo "::endgroup::"
-
+          
          echo "::group::CI run configure results"
          python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
          echo "::endgroup::"
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -15,14 +15,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Merge sync PR
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
--- a/.github/workflows/merge_queue.yml
+++ b/.github/workflows/merge_queue.yml
@ -14,14 +14,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get a version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Cancel PR workflow
        run: |
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -15,14 +15,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: PrepareRunConfig
        id: runconfig
        run: |
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -25,14 +25,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get a version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Cancel previous Sync PR workflow
        run: |
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -24,8 +24,6 @@ jobs:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Labels check
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
--- a/.github/workflows/reusable_simple_job.yml
+++ b/.github/workflows/reusable_simple_job.yml
@ -62,6 +62,8 @@ jobs:
    env:
      GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
@ -70,8 +72,6 @@ jobs:
          submodules: ${{inputs.submodules}}
          fetch-depth: ${{inputs.checkout_depth}}
          filter: tree:0
-      - name: Debug Info
-        uses: ./.github/actions/debug
      - name: Set build envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
--- a/docs/en/engines/database-engines/index.md
+++ b/docs/en/engines/database-engines/index.md
@ -13,17 +13,16 @@ Here is a complete list of available database engines. Follow the links for more

 - [Atomic](../../engines/database-engines/atomic.md)

- [Lazy](../../engines/database-engines/lazy.md)
-
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
+- [MySQL](../../engines/database-engines/mysql.md)

 - [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)

- [MySQL](../../engines/database-engines/mysql.md)
+- [Lazy](../../engines/database-engines/lazy.md)

 - [PostgreSQL](../../engines/database-engines/postgresql.md)

+- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
+
 - [Replicated](../../engines/database-engines/replicated.md)

 - [SQLite](../../engines/database-engines/sqlite.md)
-
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@ -107,10 +107,6 @@ The vector similarity index currently does not work with per-table, non-default
 [here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
 :::

-Vector index creation is known to be slow. To speed the process up, index creation can be parallelized. The maximum number of threads can be
-configured using server configuration
-setting [max_build_vector_similarity_index_thread_pool_size](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size).
-
 ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
 tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
 requests.
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -491,14 +491,6 @@ Type: Double

 Default: 0.9

-## max_build_vector_similarity_index_thread_pool_size {#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size}
-
-The maximum number of threads to use for building vector indexes. 0 means all cores.
-
-Type: UInt64
-
-Default: 16
-
 ## cgroups_memory_usage_observer_wait_time

 Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -178,9 +178,6 @@
    M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
    M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
    M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
-    M(BuildVectorSimilarityIndexThreads, "Number of threads in the build vector similarity index thread pool.") \
-    M(BuildVectorSimilarityIndexThreadsActive, "Number of threads in the build vector similarity index thread pool running a task.") \
-    M(BuildVectorSimilarityIndexThreadsScheduled, "Number of queued or active jobs in the build vector similarity index thread pool.") \
    \
    M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
    M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@ -63,7 +63,6 @@ static struct InitFiu
    REGULAR(keepermap_fail_drop_data) \
    REGULAR(lazy_pipe_fds_fail_close) \
    PAUSEABLE(infinite_sleep) \
-    PAUSEABLE(stop_moving_part_before_swap_with_active) \


 namespace FailPoints
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -50,7 +50,7 @@ namespace DB
    M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
    M(String, default_database, "default", "Default database name.", 0) \
    M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
-    M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
+    M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
    M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
    M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
    M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
@ -65,7 +65,6 @@ namespace DB
    M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
    M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
    M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0)  \
-    M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
    \
    /* Database Catalog */ \
    M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
--- a/src/Functions/printf.cpp
+++ b/src/Functions/printf.cpp
@ -50,6 +50,13 @@ private:
                return executeNonconstant(input);
        }

+        [[maybe_unused]] String toString() const
+        {
+            WriteBufferFromOwnString buf;
+            buf << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << "\n";
+            return buf.str();
+        }
+
    private:
        ColumnWithTypeAndName executeLiteral(std::string_view literal) const
        {
@ -224,7 +231,9 @@ public:
            const auto & instruction = instructions[i];
            try
            {
+                // std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl;
                concat_args[i] = instruction.execute();
+                // std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl;
            }
            catch (const fmt::v9::format_error & e)
            {
@ -349,14 +358,7 @@ private:

 REGISTER_FUNCTION(Printf)
 {
-    factory.registerFunction<FunctionPrintf>(
-        FunctionDocumentation{.description=R"(
-The `printf` function formats the given string with the values (strings, integers, floating-points etc.) listed in the arguments, similar to printf function in C++.
-The format string can contain format specifiers starting with `%` character.
-Anything not contained in `%` and the following format specifier is considered literal text and copied verbatim into the output.
-Literal `%` character can be escaped by `%%`.)", .examples{{"sum", "select printf('%%%s %s %d', 'Hello', 'World', 2024);", "%Hello World 2024"}}, .categories{"String"}
-});
-
+    factory.registerFunction<FunctionPrintf>();
 }

 }
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -10,7 +10,6 @@
 #include <Common/SensitiveDataMasker.h>
 #include <Common/Macros.h>
 #include <Common/EventNotifier.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <Common/Throttler.h>
@ -122,6 +121,7 @@
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <base/defines.h>

+
 namespace fs = std::filesystem;

 namespace ProfileEvents
@ -164,9 +164,6 @@ namespace CurrentMetrics
    extern const Metric TablesLoaderForegroundThreadsActive;
    extern const Metric TablesLoaderForegroundThreadsScheduled;
    extern const Metric IOWriterThreadsScheduled;
-    extern const Metric BuildVectorSimilarityIndexThreads;
-    extern const Metric BuildVectorSimilarityIndexThreadsActive;
-    extern const Metric BuildVectorSimilarityIndexThreadsScheduled;
    extern const Metric AttachedTable;
    extern const Metric AttachedView;
    extern const Metric AttachedDictionary;
@ -300,8 +297,6 @@ struct ContextSharedPart : boost::noncopyable
    mutable std::unique_ptr<ThreadPool> load_marks_threadpool;  /// Threadpool for loading marks cache.
    mutable OnceFlag prefetch_threadpool_initialized;
    mutable std::unique_ptr<ThreadPool> prefetch_threadpool;    /// Threadpool for loading marks cache.
-    mutable OnceFlag build_vector_similarity_index_threadpool_initialized;
-    mutable std::unique_ptr<ThreadPool> build_vector_similarity_index_threadpool; /// Threadpool for vector-similarity index creation.
    mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex);      /// The cache of decompressed blocks for MergeTree indices.
    mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex);                          /// Cache of query results.
    mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex);                      /// Cache of marks in compressed files of MergeTree indices.
@ -3302,21 +3297,6 @@ size_t Context::getPrefetchThreadpoolSize() const
    return config.getUInt(".prefetch_threadpool_pool_size", 100);
 }

-ThreadPool & Context::getBuildVectorSimilarityIndexThreadPool() const
-{
-    callOnce(shared->build_vector_similarity_index_threadpool_initialized, [&] {
-            size_t pool_size = shared->server_settings.max_build_vector_similarity_index_thread_pool_size > 0
-                ? shared->server_settings.max_build_vector_similarity_index_thread_pool_size
-                : getNumberOfPhysicalCPUCores();
-            shared->build_vector_similarity_index_threadpool = std::make_unique<ThreadPool>(
-                CurrentMetrics::BuildVectorSimilarityIndexThreads,
-                CurrentMetrics::BuildVectorSimilarityIndexThreadsActive,
-                CurrentMetrics::BuildVectorSimilarityIndexThreadsScheduled,
-                pool_size);
-        });
-    return *shared->build_vector_similarity_index_threadpool;
-}
-
 BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
 {
    callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -1097,8 +1097,6 @@ public:
    /// and make a prefetch by putting a read task to threadpoolReader.
    size_t getPrefetchThreadpoolSize() const;

-    ThreadPool & getBuildVectorSimilarityIndexThreadPool() const;
-
    /// Settings for MergeTree background tasks stored in config.xml
    BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
    BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;
--- a/src/Parsers/FunctionSecretArgumentsFinderAST.h
+++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h
@ -74,8 +74,7 @@ private:
            findMySQLFunctionSecretArguments();
        }
        else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
-                 (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
-                 (function.name == "gcs"))
+                    (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg"))
        {
            /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
            findS3FunctionSecretArguments(/* is_cluster_function= */ false);
--- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
@ -5,11 +5,9 @@
 #include <Columns/ColumnArray.h>
 #include <Common/BitHelpers.h>
 #include <Common/formatReadable.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Core/Field.h>
-#include <Core/ServerSettings.h>
 #include <DataTypes/DataTypeArray.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
@ -31,6 +29,7 @@ namespace DB

 namespace ErrorCodes
 {
+    extern const int CANNOT_ALLOCATE_MEMORY;
    extern const int FORMAT_VERSION_TOO_OLD;
    extern const int ILLEGAL_COLUMN;
    extern const int INCORRECT_DATA;
@ -132,7 +131,8 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
        /// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
        throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release()));

-    try_reserve(limits());
+    if (!try_reserve(limits()))
+        throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
 }

 USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
@ -270,49 +270,20 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");

    /// Reserving space is mandatory
-    size_t max_thread_pool_size = Context::getGlobalContextInstance()->getServerSettings().max_build_vector_similarity_index_thread_pool_size;
-    if (max_thread_pool_size == 0)
-        max_thread_pool_size = getNumberOfPhysicalCPUCores();
-    unum::usearch::index_limits_t limits(roundUpToPowerOfTwoOrZero(index->size() + rows), max_thread_pool_size);
-    index->reserve(limits);
+    if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows)))
+        throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");

-    /// Vector index creation is slooooow. Add the new rows in parallel. The threadpool is global to avoid oversubscription when multiple
-    /// indexes are build simultaneously (e.g. multiple merges run at the same time).
-    auto & thread_pool = Context::getGlobalContextInstance()->getBuildVectorSimilarityIndexThreadPool();
-
-    auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row, ThreadGroupPtr thread_group)
+    for (size_t row = 0; row < rows; ++row)
    {
-        SCOPE_EXIT_SAFE(
-            if (thread_group)
-                CurrentThread::detachFromGroupIfNotDetached();
-        );
-
-        if (thread_group)
-            CurrentThread::attachToGroupIfDetached(thread_group);
-
-        /// add is thread-safe
-        if (auto result = index->add(key, &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
-        {
+        if (auto result = index->add(static_cast<USearchIndex::vector_key_t>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
            throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
-        }
        else
        {
            ProfileEvents::increment(ProfileEvents::USearchAddCount);
            ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
            ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
        }
-    };
-
-    size_t index_size = index->size();
-
-    for (size_t row = 0; row < rows; ++row)
-    {
-        auto key = static_cast<USearchIndex::vector_key_t>(index_size + row);
-        auto task = [group = CurrentThread::getGroup(), &add_vector_to_index, key, row] { add_vector_to_index(key, row, group); };
-        thread_pool.scheduleOrThrowOnError(task);
    }
-
-    thread_pool.wait();
 }

 }
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@ -1,7 +1,6 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreePartsMover.h>
 #include <Storages/MergeTree/MergeTreeSettings.h>
-#include <Common/FailPoint.h>
 #include <Common/logger_useful.h>

 #include <set>
@ -16,11 +15,6 @@ namespace ErrorCodes
    extern const int DIRECTORY_ALREADY_EXISTS;
 }

-namespace FailPoints
-{
-    extern const char stop_moving_part_before_swap_with_active[];
-}
-
 namespace
 {

@ -232,7 +226,6 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
    cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name);

    MutableDataPartStoragePtr cloned_part_storage;
-    bool preserve_blobs = false;
    if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
    {
        /// Try zero-copy replication and fallback to default copy if it's not possible
@ -260,7 +253,6 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
        if (zero_copy_part)
        {
            /// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder
-            preserve_blobs = true;
            zero_copy_part->is_temp = false;    /// Do not remove it in dtor
            cloned_part_storage = zero_copy_part->getDataPartStoragePtr();
        }
@ -280,17 +272,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
    cloned_part.part = std::move(builder).withPartFormatFromDisk().build();
    LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath());

-    cloned_part.part->is_temp = false;
-    if (data->allowRemoveStaleMovingParts())
-    {
-        cloned_part.part->is_temp = true;
-        /// Setting it in case connection to zookeeper is lost while moving
-        /// Otherwise part might be stuck in the moving directory due to the KEEPER_EXCEPTION in part's destructor
-        if (preserve_blobs)
-            cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::PRESERVE_BLOBS;
-        else
-            cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
-    }
+    cloned_part.part->is_temp = data->allowRemoveStaleMovingParts();
    cloned_part.part->loadColumnsChecksumsIndexes(true, true);
    cloned_part.part->loadVersionMetadata();
    cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime();
@ -300,8 +282,6 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me

 void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const
 {
-    /// Used to get some stuck parts in the moving directory by stopping moves while pause is active
-    FailPointInjection::pauseFailPoint(FailPoints::stop_moving_part_before_swap_with_active);
    if (moves_blocker.isCancelled())
        throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");

--- a/tests/config/config.d/keeper_port.xml
+++ b/tests/config/config.d/keeper_port.xml
@ -42,7 +42,6 @@
            <multi_read>1</multi_read>
            <check_not_exists>1</check_not_exists>
            <create_if_not_exists>1</create_if_not_exists>
-            <remove_recursive>1</remove_recursive>
        </feature_flags>
    </keeper_server>
 </clickhouse>
--- a/tests/docker_scripts/stress_tests.lib
+++ b/tests/docker_scripts/stress_tests.lib
@ -64,7 +64,6 @@ function configure()
        randomize_config_boolean_value multi_read keeper_port
        randomize_config_boolean_value check_not_exists keeper_port
        randomize_config_boolean_value create_if_not_exists keeper_port
-        randomize_config_boolean_value remove_recursive keeper_port
    fi

    sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
--- a/tests/integration/test_mask_sensitive_info/test.py
+++ b/tests/integration/test_mask_sensitive_info/test.py
@ -393,7 +393,6 @@ def test_table_functions():
        f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
        f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')",
        f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
-        f"gcs('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
    ]

    def make_test_case(i):
--- a/tests/integration/test_remove_stale_moving_parts/init.py
+++ b/tests/integration/test_remove_stale_moving_parts/init.py
--- a/tests/integration/test_remove_stale_moving_parts/config.xml
+++ b/tests/integration/test_remove_stale_moving_parts/config.xml
@ -1,46 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <cluster>
-            <shard>
-                <replica>
-                    <host>ch1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </cluster>
-    </remote_servers>
-    <macros>
-        <shard>01</shard>
-    </macros>
-    <storage_configuration>
-        <disks>
-            <s3>
-                <type>s3</type>
-                <endpoint>http://minio1:9001/root/data/</endpoint>
-                <access_key_id>minio</access_key_id>
-                <secret_access_key>minio123</secret_access_key>
-            </s3>
-        </disks>
-        <policies>
-            <s3>
-                <volumes>
-                    <default>
-                        <disk>default</disk>
-                        <perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
-                    </default>
-                    <s3>
-                        <disk>s3</disk>
-                        <perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
-                    </s3>
-                </volumes>
-                <move_factor>0.0</move_factor>
-            </s3>
-        </policies>
-    </storage_configuration>
-
-    <merge_tree>
-        <allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
-        <storage_policy>s3</storage_policy>
-    </merge_tree>
-    <allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
-</clickhouse>
--- a/tests/integration/test_remove_stale_moving_parts/test.py
+++ b/tests/integration/test_remove_stale_moving_parts/test.py
@ -1,117 +0,0 @@
-from pathlib import Path
-import time
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-ch1 = cluster.add_instance(
-    "ch1",
-    main_configs=[
-        "config.xml",
-    ],
-    macros={"replica": "node1"},
-    with_zookeeper=True,
-    with_minio=True,
-)
-
-DATABASE_NAME = "stale_moving_parts"
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        cluster.start()
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def q(node, query):
-    return node.query(database=DATABASE_NAME, sql=query)
-
-
-# .../disks/s3/store/
-def get_table_path(node, table):
-    return (
-        node.query(
-            sql=f"SELECT data_paths FROM system.tables WHERE table = '{table}' and database = '{DATABASE_NAME}' LIMIT 1"
-        )
-        .strip('"\n[]')
-        .split(",")[1]
-        .strip("'")
-    )
-
-
-def exec(node, cmd, path):
-    return node.exec_in_container(
-        [
-            "bash",
-            "-c",
-            f"{cmd} {path}",
-        ]
-    )
-
-
-def wait_part_is_stuck(node, table_moving_path, moving_part):
-    num_tries = 5
-    while q(node, "SELECT part_name FROM system.moves").strip() != moving_part:
-        if num_tries == 0:
-            raise Exception("Part has not started to move")
-        num_tries -= 1
-        time.sleep(1)
-    num_tries = 5
-    while exec(node, "ls", table_moving_path).strip() != moving_part:
-        if num_tries == 0:
-            raise Exception("Part is not stuck in the moving directory")
-        num_tries -= 1
-        time.sleep(1)
-
-
-def wait_zookeeper_node_to_start(zk_nodes, timeout=60):
-    start = time.time()
-    while time.time() - start < timeout:
-        try:
-            for instance in zk_nodes:
-                conn = cluster.get_kazoo_client(instance)
-                conn.get_children("/")
-            print("All instances of ZooKeeper started")
-            return
-        except Exception as ex:
-            print(("Can't connect to ZooKeeper " + str(ex)))
-            time.sleep(0.5)
-
-
-def test_remove_stale_moving_parts_without_zookeeper(started_cluster):
-    ch1.query(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}")
-
-    q(
-        ch1,
-        "CREATE TABLE test_remove ON CLUSTER cluster ( id UInt32 ) ENGINE ReplicatedMergeTree() ORDER BY id;",
-    )
-
-    table_moving_path = Path(get_table_path(ch1, "test_remove")) / "moving"
-
-    q(ch1, "SYSTEM ENABLE FAILPOINT stop_moving_part_before_swap_with_active")
-    q(ch1, "INSERT INTO test_remove SELECT number FROM numbers(100);")
-    moving_part = "all_0_0_0"
-    move_response = ch1.get_query_request(
-        sql=f"ALTER TABLE test_remove MOVE PART '{moving_part}' TO DISK 's3'",
-        database=DATABASE_NAME,
-    )
-
-    wait_part_is_stuck(ch1, table_moving_path, moving_part)
-
-    cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
-    # Stop moves in case table is not read-only yet
-    q(ch1, "SYSTEM STOP MOVES")
-    q(ch1, "SYSTEM DISABLE FAILPOINT stop_moving_part_before_swap_with_active")
-
-    assert "Cancelled moving parts" in move_response.get_error()
-    assert exec(ch1, "ls", table_moving_path).strip() == ""
-
-    cluster.start_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
-    wait_zookeeper_node_to_start(["zoo1", "zoo2", "zoo3"])
-    q(ch1, "SYSTEM START MOVES")
-
-    q(ch1, f"DROP TABLE test_remove")
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@ -560,6 +560,7 @@ positionCaseInsensitive
 positionCaseInsensitiveUTF8
 positionUTF8
 pow
+printf
 proportionsZTest
 protocol
 queryID
--- a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference
+++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference
@ -1,2 +1,2 @@
-default	127.0.0.1	9181	0	0	0	1	1	['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS','REMOVE_RECURSIVE']
+default	127.0.0.1	9181	0	0	0	1	1	['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS']
 zookeeper2	localhost	9181	0	0	0	1