Merge branch 'master' into format-settings-parsing

2024-09-19 16:20:50 +00:00 · 2024-08-16 04:15:48 +02:00 · 2024-08-16 04:15:48 +02:00 · 77c8bbda43
commit 77c8bbda43
parent 98e246d99c a4ade2347c
34 changed files with 440 additions and 159 deletions
--- a/contrib/usearch-cmake/CMakeLists.txt
+++ b/contrib/usearch-cmake/CMakeLists.txt
@ -9,4 +9,14 @@ target_include_directories(_usearch SYSTEM INTERFACE
    ${SIMSIMD_PROJECT_DIR}/include
    ${USEARCH_PROJECT_DIR}/include)

+target_compile_definitions(_usearch INTERFACE USEARCH_USE_FP16LIB)
+
+# target_compile_definitions(_usearch INTERFACE USEARCH_USE_SIMSIMD)
+# ^^ simsimd is not enabled at the moment. Reasons:
+# - Vectorization is important for raw scans but not so much for HNSW. We use usearch only for HNSW.
+# - Simsimd does compile-time dispatch (choice of SIMD kernels determined by capabilities of the build machine) or dynamic dispatch (SIMD
+#   kernels chosen at runtime based on cpuid instruction). Since current builds are limited to SSE 4.2 (x86) and NEON (ARM), the speedup of
+#   the former would be moderate compared to AVX-512 / SVE. The latter is at the moment too fragile with respect to portability across x86
+#   and ARM machines ... certain conbinations of quantizations / distance functions / SIMD instructions are not implemented at the moment.
+
 add_library(ch_contrib::usearch ALIAS _usearch)
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@ -129,6 +129,7 @@ configure

 # Check that all new/changed setting were added in settings changes history.
 # Some settings can be different for builds with sanitizers, so we check
+# Also the automatic value of 'max_threads' and similar was displayed as "'auto(...)'" in previous versions instead of "auto(...)".
 # settings changes only for non-sanitizer builds.
 IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'")
 if [ "${IS_SANITIZED}" -eq "0" ]
@ -145,7 +146,9 @@ then
      old_settings.value AS old_value
  FROM new_settings
  LEFT JOIN old_settings ON new_settings.name = old_settings.name
-  WHERE (new_settings.value != old_settings.value) AND (name NOT IN (
+  WHERE (new_value != old_value)
+      AND NOT (startsWith(new_value, 'auto(') AND old_value LIKE '%auto(%')
+      AND (name NOT IN (
      SELECT arrayJoin(tupleElement(changes, 'name'))
      FROM
      (
@ -177,7 +180,7 @@ then
  if [ -s changed_settings.txt ]
  then
      mv changed_settings.txt /test_output/
-      echo -e "Changed settings are not reflected in settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
+      echo -e "Changed settings are not reflected in the settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
  else
      echo -e "There are no changed settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv
  fi
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@ -22,10 +22,10 @@ ORDER BY Distance(vectors, Point)
 LIMIT N
 ```

-`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md), for example embeddings.
-Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function but [other
-distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
-0.33, ...)`, and `N` limits the number of search results.
+`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md) or Array(Float64), for example
+embeddings. Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function
+but [other distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point,
+e.g. `(0.17, 0.33, ...)`, and `N` limits the number of search results.

 This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
 situations where `MaxDistance` is difficult to determine in advance.
--- a/src/Access/AccessBackup.cpp
+++ b/src/Access/AccessBackup.cpp
@ -93,7 +93,7 @@ namespace
                        break;
                    }

-                    UUID id = parse<UUID>(line);
+                    UUID id = parse<UUID>(line.substr(0, line.find('\t')));
                    line.clear();

                    String queries;
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@ -44,7 +44,7 @@ namespace ErrorCodes
 namespace zkutil
 {

-/// Preferred size of multi() command (in number of ops)
+/// Preferred size of multi command (in the number of operations)
 constexpr size_t MULTI_BATCH_SIZE = 100;

 struct ShuffleHost
--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@ -79,11 +79,16 @@ std::vector<String> parseRemoteDescription(
            /// Look for the corresponding closing bracket
            for (m = i + 1; m < r; ++m)
            {
-                if (description[m] == '{') ++cnt;
-                if (description[m] == '}') --cnt;
-                if (description[m] == '.' && description[m-1] == '.') last_dot = m;
-                if (description[m] == separator) have_splitter = true;
-                if (cnt == 0) break;
+                if (description[m] == '{')
+                    ++cnt;
+                if (description[m] == '}')
+                    --cnt;
+                if (description[m] == '.' && description[m-1] == '.')
+                    last_dot = m;
+                if (description[m] == separator)
+                    have_splitter = true;
+                if (cnt == 0)
+                    break;
            }
            if (cnt != 0)
                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}': incorrect brace sequence in first argument", func_name);
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@ -54,7 +54,7 @@ namespace
        std::filesystem::path path(snapshot_path);
        std::string filename = path.stem();
        Strings name_parts;
-        splitInto<'_'>(name_parts, filename);
+        splitInto<'_', '.'>(name_parts, filename);
        return parse<uint64_t>(name_parts[1]);
    }

--- a/src/Coordination/RaftServerConfig.cpp
+++ b/src/Coordination/RaftServerConfig.cpp
@ -26,12 +26,16 @@ std::optional<RaftServerConfig> RaftServerConfig::parse(std::string_view server)
    if (!with_id_endpoint && !with_server_type && !with_priority)
        return std::nullopt;

-    const std::string_view id_str = parts[0];
+    std::string_view id_str = parts[0];
    if (!id_str.starts_with("server."))
        return std::nullopt;

+    id_str = id_str.substr(7);
+    if (auto eq_pos = id_str.find('='); std::string_view::npos != eq_pos)
+        id_str = id_str.substr(0, eq_pos);
+
    Int32 id;
-    if (!tryParse(id, std::next(id_str.begin(), 7)))
+    if (!tryParse(id, id_str))
        return std::nullopt;
    if (id <= 0)
        return std::nullopt;
--- a/src/Core/MySQL/MySQLGtid.cpp
+++ b/src/Core/MySQL/MySQLGtid.cpp
@ -24,9 +24,7 @@ void GTIDSet::tryMerge(size_t i)
 void GTIDSets::parse(String gtid_format)
 {
    if (gtid_format.empty())
-    {
        return;
-    }

    std::vector<String> gtid_sets;
    boost::split(gtid_sets, gtid_format, [](char c) { return c == ','; });
--- a/src/Core/MySQL/tests/gtest_MySQLGtid.cpp
+++ b/src/Core/MySQL/tests/gtest_MySQLGtid.cpp
@ -10,20 +10,19 @@ GTEST_TEST(GTIDSetsContains, Tests)
             contained1, contained2, contained3, contained4, contained5,
             not_contained1, not_contained2, not_contained3, not_contained4, not_contained5, not_contained6;

-    gtid_set.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60");
-    contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60");
+    gtid_set.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60");
+    contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60");
    contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:2-3:11:47-49");
    contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:11");
-    contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:47-49:60");
-    contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:60");
+    contained4.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:2-16:47-49:60");
+    contained5.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:60");

-    not_contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-50, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60");
+    not_contained1.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-50, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60");
    not_contained2.parse("2174B383-5441-11E8-B90A-C80AA9429562:0-3:11:47-49");
    not_contained3.parse("2174B383-5441-11E8-B90A-C80AA9429562:99");
-    not_contained4.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:2-16:46-49:60");
-    not_contained5.parse("24DA167-0C0C-11E8-8442-00059A3C7B00:99");
-    not_contained6.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, 24DA167-0C0C-11E8-8442-00059A3C7B00:1-19:47-49:60, 00000000-0000-0000-0000-000000000000");
-
+    not_contained4.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:2-16:46-49:60");
+    not_contained5.parse("FBC30C64-F8C9-4DDF-8CDD-066208EB433B:99");
+    not_contained6.parse("2174B383-5441-11E8-B90A-C80AA9429562:1-3:11:47-49, FBC30C64-F8C9-4DDF-8CDD-066208EB433B:1-19:47-49:60, 00000000-0000-0000-0000-000000000000");

    ASSERT_TRUE(gtid_set.contains(contained1));
    ASSERT_TRUE(gtid_set.contains(contained2));
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@ -237,7 +237,7 @@ SettingFieldMaxThreads & SettingFieldMaxThreads::operator=(const Field & f)
 String SettingFieldMaxThreads::toString() const
 {
    if (is_auto)
-        return "'auto(" + ::DB::toString(value) + ")'";
+        return "auto(" + ::DB::toString(value) + ")";
    else
        return ::DB::toString(value);
 }
--- a/src/Core/SettingsFields.h
+++ b/src/Core/SettingsFields.h
@ -153,7 +153,7 @@ struct SettingFieldMaxThreads
    operator UInt64() const { return value; } /// NOLINT
    explicit operator Field() const { return value; }

-    /// Writes "auto(<number>)" instead of simple "<number>" if `is_auto==true`.
+    /// Writes "auto(<number>)" instead of simple "<number>" if `is_auto == true`.
    String toString() const;
    void parseFromString(const String & str);

--- a/src/Databases/TablesLoader.h
+++ b/src/Databases/TablesLoader.h
@ -1,4 +1,5 @@
 #pragma once
+
 #include <map>
 #include <mutex>
 #include <unordered_map>
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -258,6 +258,20 @@ inline void readBoolText(bool & x, ReadBuffer & buf)
    char tmp = '0';
    readChar(tmp, buf);
    x = tmp != '0';
+
+    if (!buf.eof() && isAlphaASCII(tmp))
+    {
+        if (tmp == 't' || tmp == 'T')
+        {
+            assertStringCaseInsensitive("rue", buf);
+            x = true;
+        }
+        else if (tmp == 'f' || tmp == 'F')
+        {
+            assertStringCaseInsensitive("alse", buf);
+            x = false;
+        }
+    }
 }

 template <typename ReturnType = void>
@ -1735,6 +1749,7 @@ inline T parse(const char * data, size_t size)
    T res;
    ReadBufferFromMemory buf(data, size);
    readText(res, buf);
+    assertEOF(buf);
    return res;
 }

@ -1742,7 +1757,9 @@ template <typename T>
 inline bool tryParse(T & res, const char * data, size_t size)
 {
    ReadBufferFromMemory buf(data, size);
-    return tryReadText(res, buf);
+    if (!tryReadText(res, buf))
+        return false;
+    return buf.eof();
 }

 template <typename T>
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@ -230,21 +230,37 @@ String Cluster::Address::toFullString(bool use_compact_format) const
    }
 }

-Cluster::Address Cluster::Address::fromFullString(const String & full_string)
+Cluster::Address Cluster::Address::fromFullString(std::string_view full_string)
 {
-    const char * address_begin = full_string.data();
-    const char * address_end = address_begin + full_string.size();
-
-    const char * user_pw_end = strchr(full_string.data(), '@');
+    std::string_view user_password;
+    if (auto pos = full_string.find('@'); pos != std::string_view::npos)
+        user_password = full_string.substr(pos + 1);

    /// parsing with the new shard{shard_index}[_replica{replica_index}] format
-    if (!user_pw_end && startsWith(full_string, "shard"))
+    if (user_password.empty() && full_string.starts_with("shard"))
    {
-        const char * underscore = strchr(full_string.data(), '_');
-
        Address address;
-        address.shard_index = parse<UInt32>(address_begin + strlen("shard"));
-        address.replica_index = underscore ? parse<UInt32>(underscore + strlen("_replica")) : 0;
+
+        if (auto underscore_pos = full_string.find('_'); underscore_pos != std::string_view::npos)
+        {
+            address.shard_index = parse<UInt32>(full_string.substr(0, underscore_pos).substr(strlen("shard")));
+
+            if (full_string.substr(underscore_pos + 1).starts_with("replica"))
+            {
+                address.replica_index = parse<UInt32>(full_string.substr(underscore_pos + 1 + strlen("replica")));
+            }
+            else if (full_string.substr(underscore_pos + 1).starts_with("all_replicas"))
+            {
+                address.replica_index = 0;
+            }
+            else
+                throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect address '{}', should be in a form of `shardN_all_replicas` or `shardN_replicaM`", full_string);
+        }
+        else
+        {
+            address.shard_index = parse<UInt32>(full_string.substr(strlen("shard")));
+            address.replica_index = 0;
+        }

        return address;
    }
@ -255,9 +271,13 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string)
        /// - credentials are exposed in file name;
        /// - the file name can be too long.

+        const char * address_begin = full_string.data();
+        const char * address_end = address_begin + full_string.size();
+        const char * user_pw_end = strchr(address_begin, '@');
+
        Protocol::Secure secure = Protocol::Secure::Disable;
        const char * secure_tag = "+secure";
-        if (endsWith(full_string, secure_tag))
+        if (full_string.ends_with(secure_tag))
        {
            address_end -= strlen(secure_tag);
            secure = Protocol::Secure::Enable;
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@ -168,7 +168,7 @@ public:
        String toFullString(bool use_compact_format) const;

        /// Returns address with only shard index and replica index or full address without shard index and replica index
-        static Address fromFullString(const String & address_full_string);
+        static Address fromFullString(std::string_view full_string);

        /// Returns resolved address if it does resolve.
        std::optional<Poco::Net::SocketAddress> getResolvedAddress() const;
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@ -888,13 +888,22 @@ static Field applyFunctionForField(
    return (*col)[0];
 }

+/// applyFunction will execute the function with one `field` or the column which `field` refers to.
 static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
 {
+    chassert(func != nullptr);
    /// Fallback for fields without block reference.
    if (field.isExplicit())
        return applyFunctionForField(func, current_type, field);

-    String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
+    /// We will cache the function result inside `field.columns`, because this function will call many times
+    /// from many fields from same column. When the column is huge, for example there are thousands of marks, we need a cache.
+    /// The cache key is like `_[function_pointer]_[param_column_id]` to identify a unique <function, param> pair.
+    WriteBufferFromOwnString buf;
+    writeText("_", buf);
+    writePointerHex(func.get(), buf);
+    writeText("_" + toString(field.column_idx), buf);
+    String result_name = buf.str();
    const auto & columns = field.columns;
    size_t result_idx = columns->size();

@ -906,6 +915,7 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr &

    if (result_idx == columns->size())
    {
+        /// When cache is missed, we calculate the whole column where the field comes from. This will avoid repeated calculation.
        ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
        field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name});
        (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
--- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
@ -55,6 +55,7 @@ const std::unordered_map<String, unum::usearch::scalar_kind_t> quantizationToSca
    {"f32", unum::usearch::scalar_kind_t::f32_k},
    {"f16", unum::usearch::scalar_kind_t::f16_k},
    {"i8", unum::usearch::scalar_kind_t::i8_k}};
+/// Usearch provides more quantizations but ^^ above ones seem the only ones comprehensively supported across all distance functions.

 template<typename T>
 concept is_set = std::same_as<T, std::set<typename T::key_type, typename T::key_compare, typename T::allocator_type>>;
@ -98,9 +99,6 @@ USearchIndexWithSerialization::USearchIndexWithSerialization(
    unum::usearch::index_dense_config_t config(usearch_hnsw_params.m, usearch_hnsw_params.ef_construction, usearch_hnsw_params.ef_search);
    config.enable_key_lookups = false; /// we don't do row-to-vector lookups

-    if (auto error = config.validate(); error) /// already called in vectorSimilarityIndexValidator, call again because usearch may change the config in-place
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid parameters passed to vector similarity index. Error: {}", String(error.release()));
-
    if (auto result = USearchIndex::make(metric, config); !result)
        throw Exception(ErrorCodes::INCORRECT_DATA, "Could not create vector similarity index. Error: {}", String(result.error.release()));
    else
@ -250,14 +248,47 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorVectorSimilarity::getGranuleAnd
    return granule;
 }

+namespace
+{
+
+template <typename Column>
+void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & column_array_offsets, USearchIndexWithSerializationPtr & index, size_t dimensions, size_t rows)
+{
+    const auto & column_array_data = column_array->getData();
+    const auto & column_array_data_float = typeid_cast<const Column &>(column_array_data);
+    const auto & column_array_data_float_data = column_array_data_float.getData();
+
+    /// Check all sizes are the same
+    for (size_t row = 0; row < rows - 1; ++row)
+        if (column_array_offsets[row + 1] - column_array_offsets[row] != dimensions)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");
+
+    /// Reserving space is mandatory
+    if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows)))
+        throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
+
+    for (size_t row = 0; row < rows; ++row)
+    {
+        if (auto result = index->add(static_cast<USearchIndex::vector_key_t>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
+        else
+        {
+            ProfileEvents::increment(ProfileEvents::USearchAddCount);
+            ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
+            ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
+        }
+    }
+}
+
+}
+
 void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_t * pos, size_t limit)
 {
    if (*pos >= block.rows())
        throw Exception(
            ErrorCodes::LOGICAL_ERROR,
            "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.",
-            *pos,
-            block.rows());
+            *pos, block.rows());

    size_t rows_read = std::min(limit, block.rows() - *pos);

@ -271,63 +302,53 @@ void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column");

    const String & index_column_name = index_sample_block.getByPosition(0).name;
-    ColumnPtr column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read);
+    const ColumnPtr & index_column = block.getByName(index_column_name).column;
+    ColumnPtr column_cut = index_column->cut(*pos, rows_read);

-    if (const auto & column_array = typeid_cast<const ColumnArray *>(column_cut.get()))
-    {
-        const auto & column_array_data = column_array->getData();
-        const auto & column_array_data_float = typeid_cast<const ColumnFloat32 &>(column_array_data);
-        const auto & column_array_data_float_data = column_array_data_float.getData();
+    const auto * column_array = typeid_cast<const ColumnArray *>(column_cut.get());
+    if (!column_array)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array(Float*) column");

-        const auto & column_array_offsets = column_array->getOffsets();
-        const size_t num_rows = column_array_offsets.size();
+    if (column_array->empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty");

-        if (column_array->empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty");
+    /// The vector similarity algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
+    /// are INSERTed into an vector-similarity-indexed column or if no value was specified at all in which case the arrays take on their default
+    /// values which is also empty.
+    if (column_array->isDefaultAt(0))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);

-        /// The vector similarity algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
-        /// are INSERTed into an vector-similarity-indexed column or if no value was specified at all in which case the arrays take on their default
-        /// values which is also empty.
-        if (column_array->isDefaultAt(0))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
+    const size_t rows = column_array->size();

-        /// Check all sizes are the same
-        const size_t dimensions = column_array_offsets[0];
-        for (size_t i = 0; i < num_rows - 1; ++i)
-            if (column_array_offsets[i + 1] - column_array_offsets[i] != dimensions)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
+    const auto & column_array_offsets = column_array->getOffsets();
+    const size_t dimensions = column_array_offsets[0];

-        /// Also check that previously inserted blocks have the same size as this block.
-        /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across
-        /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42.
-        if (index && index->dimensions() != dimensions)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
+    if (!index)
+        index = std::make_shared<USearchIndexWithSerialization>(dimensions, metric_kind, scalar_kind, usearch_hnsw_params);

-        if (!index)
-            index = std::make_shared<USearchIndexWithSerialization>(dimensions, metric_kind, scalar_kind, usearch_hnsw_params);
+    /// Also check that previously inserted blocks have the same size as this block.
+    /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across
+    /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42.
+    if (index->dimensions() != dimensions)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");

-        /// We use Usearch's index_dense_t as index type which supports only 4 bio entries according to https://github.com/unum-cloud/usearch/tree/main/cpp
-        if (index->size() + num_rows > std::numeric_limits<UInt32>::max())
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Size of vector similarity index in column {} would exceed 4 billion entries", index_column_name);
+    /// We use Usearch's index_dense_t as index type which supports only 4 bio entries according to https://github.com/unum-cloud/usearch/tree/main/cpp
+    if (index->size() + rows > std::numeric_limits<UInt32>::max())
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Size of vector similarity index would exceed 4 billion entries");

-        /// Reserving space is mandatory
-        if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + num_rows)))
-            throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
+    DataTypePtr data_type = block.getDataTypes()[0];
+    const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get());
+    if (!data_type_array)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected data type Array(Float*)");
+    const TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();

-        for (size_t row = 0; row < num_rows; ++row)
-        {
-            if (auto result = index->add(static_cast<UInt32>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
-            else
-            {
-                ProfileEvents::increment(ProfileEvents::USearchAddCount);
-                ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
-                ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
-            }
-        }
-    }
+    if (WhichDataType(nested_type_index).isFloat32())
+        updateImpl<ColumnFloat32>(column_array, column_array_offsets, index, dimensions, rows);
+    else if (WhichDataType(nested_type_index).isFloat64())
+        updateImpl<ColumnFloat64>(column_array, column_array_offsets, index, dimensions, rows);
    else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array(Float32) column");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected data type Array(Float*)");
+

    *pos += rows_read;
 }
@ -375,7 +396,7 @@ std::vector<size_t> MergeTreeIndexConditionVectorSimilarity::getUsefulRanges(Mer
            "does not match the dimension in the index ({})",
            vector_similarity_condition.getDimensions(), index->dimensions());

-    const std::vector<float> reference_vector = vector_similarity_condition.getReferenceVector();
+    const std::vector<Float64> reference_vector = vector_similarity_condition.getReferenceVector();

    auto search_result = index->search(reference_vector.data(), limit);
    if (!search_result)
@ -486,7 +507,7 @@ void vectorSimilarityIndexValidator(const IndexDescription & index, bool /* atta
        if (!quantizationToScalarKind.contains(index.arguments[2].safeGet<String>()))
            throw Exception(ErrorCodes::INCORRECT_DATA, "Third argument (quantization) of vector similarity index is not supported. Supported quantizations are: {}", joinByComma(quantizationToScalarKind));

-        /// Call Usearche's own parameter validation method for HNSW-specific parameters
+        /// Call Usearch's own parameter validation method for HNSW-specific parameters
        UInt64 m = index.arguments[3].safeGet<UInt64>();
        UInt64 ef_construction = index.arguments[4].safeGet<UInt64>();
        UInt64 ef_search = index.arguments[5].safeGet<UInt64>();
@ -501,18 +522,14 @@ void vectorSimilarityIndexValidator(const IndexDescription & index, bool /* atta
    if (index.column_names.size() != 1 || index.data_types.size() != 1)
        throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Vector similarity indexes must be created on a single column");

-    /// Check data type of the indexed column:
+    /// Check that the data type is Array(Float*)
    DataTypePtr data_type = index.sample_block.getDataTypes()[0];
-    if (const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get()))
-    {
-        TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();
-        if (!WhichDataType(nested_type_index).isFloat32())
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float32)");
-    }
-    else
-    {
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float32)");
-    }
+    const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get());
+    if (!data_type_array)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float*)");
+    TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();
+    if (!WhichDataType(nested_type_index).isFloat())
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float*)");
 }

 }
--- a/src/Storages/MergeTree/VectorSimilarityCondition.cpp
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.cpp
@ -24,7 +24,7 @@ namespace
 {

 template <typename Literal>
-void extractReferenceVectorFromLiteral(std::vector<Float32> & reference_vector, Literal literal)
+void extractReferenceVectorFromLiteral(std::vector<Float64> & reference_vector, Literal literal)
 {
    Float64 float_element_of_reference_vector;
    Int64 int_element_of_reference_vector;
@ -72,7 +72,7 @@ UInt64 VectorSimilarityCondition::getLimit() const
    throw Exception(ErrorCodes::LOGICAL_ERROR, "No LIMIT section in query, not supported");
 }

-std::vector<float> VectorSimilarityCondition::getReferenceVector() const
+std::vector<Float64> VectorSimilarityCondition::getReferenceVector() const
 {
    if (index_is_useful && query_information.has_value())
        return query_information->reference_vector;
--- a/src/Storages/MergeTree/VectorSimilarityCondition.h
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.h
@ -60,7 +60,7 @@ public:
            L2
        };

-        std::vector<Float32> reference_vector;
+        std::vector<Float64> reference_vector;
        DistanceFunction distance_function;
        String column_name;
        UInt64 limit;
@ -70,7 +70,7 @@ public:
    /// Returns false if query can be speeded up by an ANN index, true otherwise.
    bool alwaysUnknownOrTrue(String distance_function) const;

-    std::vector<float> getReferenceVector() const;
+    std::vector<Float64> getReferenceVector() const;
    size_t getDimensions() const;
    String getColumnName() const;
    Info::DistanceFunction getDistanceFunction() const;
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@ -332,6 +332,8 @@ struct DeltaLakeMetadataImpl
        WhichDataType which(check_type->getTypeId());
        if (which.isStringOrFixedString())
            return value;
+        else if (isBool(check_type))
+            return parse<bool>(value);
        else if (which.isInt8())
            return parse<Int8>(value);
        else if (which.isUInt8())
--- a/src/Storages/StorageExternalDistributed.cpp
+++ b/src/Storages/StorageExternalDistributed.cpp
@ -1,4 +1,4 @@
-#include "StorageExternalDistributed.h"
+#include <Storages/StorageExternalDistributed.h>

 #include <Core/Settings.h>
 #include <Storages/StorageFactory.h>
@ -6,6 +6,8 @@
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Core/PostgreSQL/PoolWithFailover.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Common/parseAddress.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Common/parseRemoteDescription.h>
@ -112,14 +114,39 @@ void registerStorageExternalDistributed(StorageFactory & factory)
        std::unordered_set<StoragePtr> shards;
        ASTs inner_engine_args(engine_args.begin() + 1, engine_args.end());

+        ASTPtr * address_arg = nullptr;
+
+        /// If there is a named collection argument, named `addresses_expr`
+        for (auto & node : inner_engine_args)
+        {
+            if (ASTFunction * func = node->as<ASTFunction>(); func && func->name == "equals" && func->arguments)
+            {
+                if (ASTExpressionList * func_args = func->arguments->as<ASTExpressionList>(); func_args && func_args->children.size() == 2)
+                {
+                    if (ASTIdentifier * arg_name = func_args->children[0]->as<ASTIdentifier>(); arg_name && arg_name->name() == "addresses_expr")
+                    {
+                        address_arg = &func_args->children[1];
+                        break;
+                    }
+                }
+            }
+        }
+
+        /// Otherwise it is the first argument.
+        if (!address_arg)
+            address_arg = &inner_engine_args.at(0);
+
+        String addresses_expr = checkAndGetLiteralArgument<String>(*address_arg, "addresses");
+        Strings shards_addresses = get_addresses(addresses_expr);
+
        auto engine_name = checkAndGetLiteralArgument<String>(engine_args[0], "engine_name");
        if (engine_name == "URL")
        {
-            auto configuration = StorageURL::getConfiguration(inner_engine_args, context);
-            auto shards_addresses = get_addresses(configuration.addresses_expr);
            auto format_settings = StorageURL::getFormatSettingsFromArgs(args);
            for (const auto & shard_address : shards_addresses)
            {
+                *address_arg = std::make_shared<ASTLiteral>(shard_address);
+                auto configuration = StorageURL::getConfiguration(inner_engine_args, context);
                auto uri_options = parseRemoteDescription(shard_address, 0, shard_address.size(), '|', max_addresses);
                if (uri_options.size() > 1)
                {
@ -140,13 +167,12 @@ void registerStorageExternalDistributed(StorageFactory & factory)
        else if (engine_name == "MySQL")
        {
            MySQLSettings mysql_settings;
-            auto configuration = StorageMySQL::getConfiguration(inner_engine_args, context, mysql_settings);
-            auto shards_addresses = get_addresses(configuration.addresses_expr);
            for (const auto & shard_address : shards_addresses)
            {
-                auto current_configuration{configuration};
-                current_configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 3306);
-                auto pool = createMySQLPoolWithFailover(current_configuration, mysql_settings);
+                *address_arg = std::make_shared<ASTLiteral>(shard_address);
+                auto configuration = StorageMySQL::getConfiguration(inner_engine_args, context, mysql_settings);
+                configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 3306);
+                auto pool = createMySQLPoolWithFailover(configuration, mysql_settings);
                shards.insert(std::make_shared<StorageMySQL>(
                    args.table_id, std::move(pool), configuration.database, configuration.table,
                    /* replace_query = */ false, /* on_duplicate_clause = */ "",
@ -157,14 +183,13 @@ void registerStorageExternalDistributed(StorageFactory & factory)
 #if USE_LIBPQXX
        else if (engine_name == "PostgreSQL")
        {
-            auto configuration = StoragePostgreSQL::getConfiguration(inner_engine_args, context);
-            auto shards_addresses = get_addresses(configuration.addresses_expr);
            for (const auto & shard_address : shards_addresses)
            {
-                auto current_configuration{configuration};
-                current_configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 5432);
+                *address_arg = std::make_shared<ASTLiteral>(shard_address);
+                auto configuration = StoragePostgreSQL::getConfiguration(inner_engine_args, context);
+                configuration.addresses = parseRemoteDescriptionForExternalDatabase(shard_address, max_addresses, 5432);
                auto pool = std::make_shared<postgres::PoolWithFailover>(
-                    current_configuration,
+                    configuration,
                    settings.postgresql_connection_pool_size,
                    settings.postgresql_connection_pool_wait_timeout,
                    settings.postgresql_connection_pool_retries,
--- a/tests/integration/test_cgroup_limit/test.py
+++ b/tests/integration/test_cgroup_limit/test.py
@ -46,7 +46,7 @@ def test_cgroup_cpu_limit():
            "clickhouse local -q \"select value from system.settings where name='max_threads'\"",
            num_cpus,
        )
-        expect_output = (r"\'auto({})\'".format(math.ceil(num_cpus))).encode()
+        expect_output = (r"auto({})".format(math.ceil(num_cpus))).encode()
        assert (
            result.strip() == expect_output
        ), f"fail for cpu limit={num_cpus}, result={result.strip()}, expect={expect_output}"
--- a/tests/queries/0_stateless/00600_replace_running_query.sh
+++ b/tests/queries/0_stateless/00600_replace_running_query.sh
@ -6,41 +6,56 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

-TEST_PREFIX=$RANDOM
+TEST_PREFIX="${CLICKHOUSE_DATABASE}"
 ${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}"
 ${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1"
 ${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}"

 function wait_for_query_to_start()
 {
-    while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done
+    while [[ 0 -eq $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") ]]
+    do
+        sleep 0.1
+    done
+}
+
+function wait_for_queries_to_finish()
+{
+    while [[ 0 -ne $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE current_database = '${CLICKHOUSE_DATABASE}' AND query NOT LIKE '%this query%'") ]]
+    do
+        sleep 0.1
+    done
 }


-$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 &
-wait_for_query_to_start 'hello'
+$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=${CLICKHOUSE_DATABASE}hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 &
+wait_for_query_to_start "${CLICKHOUSE_DATABASE}hello"

 # Replace it
-$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 0'
+$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=${CLICKHOUSE_DATABASE}hello&replace_running_query=1" -d 'SELECT 0'

 # Wait for it to be replaced
 wait
+wait_for_queries_to_finish

-${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
-wait_for_query_to_start '42'
+${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id="${CLICKHOUSE_DATABASE}42" --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'QUERY_WAS_CANCELLED' &
+wait_for_query_to_start "${CLICKHOUSE_DATABASE}42"

 # Trying to run another query with the same query_id
-${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' 2>&1 | grep -cF 'is already running by user'
+${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --query='SELECT 43' 2>&1 | grep -cF 'is already running by user'

 # Trying to replace query of a different user
-$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user'
+$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=${CLICKHOUSE_DATABASE}42&replace_running_query=1" -d 'SELECT 1' | grep -cF 'is already running by user'

-$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null
+$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}42' SYNC" > /dev/null
 wait
+wait_for_queries_to_finish

-${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
-wait_for_query_to_start '42'
-${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null
+${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'QUERY_WAS_CANCELLED' &
+wait_for_query_to_start "${CLICKHOUSE_DATABASE}42"
+${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null
 wait
-${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44'
+wait_for_queries_to_finish
+
+${CLICKHOUSE_CLIENT} --query_id="${CLICKHOUSE_DATABASE}42" --replace_running_query=1 --query='SELECT 44'
 ${CLICKHOUSE_CLIENT} -q "drop user u_00600${TEST_PREFIX}"
--- a/tests/queries/0_stateless/02354_vector_search_bugs.reference
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.reference
@ -1,3 +1,4 @@
+Rejects INSERTs of Arrays with different sizes
 Issue #52258: Empty Arrays or Arrays with default values are rejected
 It is possible to create parts with different Array vector sizes but there will be an error at query time
 Correctness of index with > 1 mark
--- a/tests/queries/0_stateless/02354_vector_search_bugs.sql
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@ -7,6 +7,12 @@ SET enable_analyzer = 1; -- 0 vs. 1 produce slightly different error codes, make

 DROP TABLE IF EXISTS tab;

+SELECT 'Rejects INSERTs of Arrays with different sizes';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id;
+INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
+
 SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected';

 CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree() ORDER BY id;
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
@ -5,4 +5,3 @@ Two or six index arguments
 4nd argument (M), if given, must be UInt64 and > 1
 Must be created on single column
 Must be created on Array(Float32) columns
-Rejects INSERTs of Arrays with different sizes
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@ -35,11 +35,6 @@ SELECT 'Must be created on Array(Float32) columns';
 SET allow_suspicious_low_cardinality_types = 1;
 CREATE TABLE tab(id Int32, vec UInt64, INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Array(UInt64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
-SELECT 'Rejects INSERTs of Arrays with different sizes';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id;
-INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
-DROP TABLE tab;
--- a/tests/queries/0_stateless/02354_vector_search_queries.reference
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@ -1,9 +1,7 @@
 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
- ORDER-BY-type
 5	[0,2]	0
 6	[0,2.1]	0.09999990463256836
 7	[0,2.2]	0.20000004768371582
- ORDER-BY-type, EXPLAIN
 Expression (Projection)
  Limit (preliminary LIMIT (without OFFSET))
    Sorting (Sorting for ORDER BY)
@ -20,11 +18,9 @@ Expression (Projection)
            Parts: 1/1
            Granules: 1/1
 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
- ORDER-BY-type
 6	[0,2]	0
 7	[0,2.1]	0.09999990463256836
 8	[0,2.2]	0.20000004768371582
- ORDER-BY-type, EXPLAIN
 Expression (Projection)
  Limit (preliminary LIMIT (without OFFSET))
    Sorting (Sorting for ORDER BY)
@ -41,11 +37,11 @@ Expression (Projection)
            Parts: 1/1
            Granules: 2/4
 Special cases
- ORDER-BY-type
+-- Non-default metric, M, ef_construction, ef_search
 6	[1,9.3]	0.005731362878640178
 1	[2,3.2]	0.15200169244542905
 7	[5.5,4.7]	0.3503476876550442
- Special case: setting "max_limit_for_ann_queries"
+-- Setting "max_limit_for_ann_queries"
 Expression (Projection)
  Limit (preliminary LIMIT (without OFFSET))
    Sorting (Sorting for ORDER BY)
@ -56,3 +52,62 @@ Expression (Projection)
            Condition: true
            Parts: 1/1
            Granules: 4/4
+-- Non-default quantization
+1	[2,3.2]	2.3323807824711897
+2	[4.2,3.4]	4.427188573446585
+0	[4.6,2.3]	4.609772130377966
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_f32)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
+          Skip
+            Name: idx
+            Description: vector_similarity GRANULARITY 2
+            Parts: 1/1
+            Granules: 2/4
+1	[2,3.2]	2.3323807824711897
+2	[4.2,3.4]	4.427188573446585
+0	[4.6,2.3]	4.609772130377966
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_f16)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
+          Skip
+            Name: idx
+            Description: vector_similarity GRANULARITY 2
+            Parts: 1/1
+            Granules: 2/4
+1	[2,3.2]	2.3323807824711897
+2	[4.2,3.4]	4.427188573446585
+0	[4.6,2.3]	4.609772130377966
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_i8)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
+          Skip
+            Name: idx
+            Description: vector_similarity GRANULARITY 2
+            Parts: 1/1
+            Granules: 2/4
+-- Index on Array(Float64) column
+6	[0,2]	0
+7	[0,2.1]	0.10000000000000009
+8	[0,2.2]	0.20000000000000018
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@ -14,14 +14,12 @@ CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similar
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);


-SELECT '- ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
 FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;

-SELECT '- ORDER-BY-type, EXPLAIN';
 EXPLAIN indexes = 1
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@ -37,14 +35,12 @@ SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexe
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);

-SELECT '- ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
 FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;

-SELECT '- ORDER-BY-type, EXPLAIN';
 EXPLAIN indexes = 1
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@ -56,19 +52,18 @@ DROP TABLE tab;


 SELECT 'Special cases'; -- Not a systematic test, just to check that no bad things happen.
-- Test with non-default metric, M, ef_construction, ef_search

+SELECT '-- Non-default metric, M, ef_construction, ef_search';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'cosineDistance', 'f32', 42, 99, 66) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);

-SELECT '- ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
 FROM tab
 ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3;

-SELECT '- Special case: setting "max_limit_for_ann_queries"';
+SELECT '-- Setting "max_limit_for_ann_queries"';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] as reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
@ -78,3 +73,66 @@ LIMIT 3
 SETTINGS max_limit_for_ann_queries = 2; -- LIMIT 3 > 2 --> don't use the ann index

 DROP TABLE tab;
+
+SELECT '-- Non-default quantization';
+CREATE TABLE tab_f32(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+CREATE TABLE tab_f16(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f16', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+CREATE TABLE tab_i8(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'i8', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab_f32 VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
+INSERT INTO tab_f16 VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
+INSERT INTO tab_i8 VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_f32
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+EXPLAIN indexes = 1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_f32
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_f16
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+EXPLAIN indexes = 1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_f16
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_i8
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+EXPLAIN indexes = 1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_i8
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+DROP TABLE tab_f32;
+DROP TABLE tab_f16;
+DROP TABLE tab_i8;
+
+SELECT '-- Index on Array(Float64) column';
+CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+DROP TABLE tab;
--- a/tests/queries/0_stateless/03221_key_condition_bug.reference
+++ b/tests/queries/0_stateless/03221_key_condition_bug.reference
@ -0,0 +1 @@
+50
--- a/tests/queries/0_stateless/03221_key_condition_bug.sql
+++ b/tests/queries/0_stateless/03221_key_condition_bug.sql
@ -0,0 +1,11 @@
+CREATE TABLE IF NOT EXISTS report_metrics_v2
+(
+	`a` UInt64
+) Engine = MergeTree()
+ORDER BY a;
+
+insert into report_metrics_v2 SELECT * FROM system.numbers LIMIT 50000;
+
+SELECT count(*) from report_metrics_v2 WHERE (intDiv(a, 50) = 200) AND (intDiv(a, 50000) = 0);
+
+DROP TABLE report_metrics_v2;
--- a/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.reference
+++ b/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.reference
--- a/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.sql
+++ b/tests/queries/0_stateless/03223_analyzer_with_cube_fuzz.sql
@ -0,0 +1,29 @@
+SET enable_analyzer = 1;
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE t1 (`a` Int64, `b` Int64) ENGINE = MergeTree ORDER BY a;
+CREATE TABLE t2 (`key` Int32, `val` Int64) ENGINE = MergeTree ORDER BY key;
+insert into t1 Select number, number from numbers(100000);
+insert into t2 Select number, number from numbers(100000);
+
+
+SELECT
+    1 * 1000.0001,
+    (count(1.) = -2147483647) AND (count(a) = 1.1920928955078125e-7) AND (count(val) = 1048577) AND (sum(val) = ((NULL * 1048576) / -9223372036854775807)) AND (sum(a) = ((9223372036854775806 * 10000000000.) / 1048575))
+FROM
+(
+    SELECT
+        a,
+        val
+    FROM t1
+    FULL OUTER JOIN t2 ON (t1.a = t2.key) OR (1 * inf) OR (t1.b = t2.key)
+)
+GROUP BY '65537'
+    WITH CUBE
+FORMAT Null
+SETTINGS max_block_size = 100, join_use_nulls = 1, max_execution_time = 1., max_result_rows = 0, max_result_bytes = 0; -- { serverError TIMEOUT_EXCEEDED }
+
+DROP TABLE t1;
+DROP TABLE t2;