From c422a8f0dc14269041aba360ea0ac645e2a635e1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Sep 2023 13:41:02 +0000 Subject: [PATCH 1/4] Cosmetics --- docs/en/sql-reference/data-types/array.md | 2 +- .../MergeTree/MergeTreeIndexAnnoy.cpp | 26 +++++++++---------- .../MergeTree/MergeTreeIndexUSearch.cpp | 24 ++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 20ce7d2ed52..0ee7c8de93c 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -4,7 +4,7 @@ sidebar_position: 52 sidebar_label: Array(T) --- -# Array(t) +# Array(T) An array of `T`-type items, with the starting array index as 1. `T` can be any data type, including an array. diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 3ad4f81716e..15830513162 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -154,36 +154,36 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t if (const auto & column_array = typeid_cast(column_cut.get())) { - const auto & data = column_array->getData(); - const auto & array = typeid_cast(data).getData(); + const auto & column_array_data = column_array->getData(); + const auto & column_arary_data_float_data = typeid_cast(column_array_data).getData(); - if (array.empty()) + if (column_arary_data_float_data.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); - const auto & offsets = column_array->getOffsets(); - const size_t num_rows = offsets.size(); + const auto & column_array_offsets = column_array->getOffsets(); + const size_t num_rows = column_array_offsets.size(); /// Check all sizes are the same - size_t size = offsets[0]; + size_t dimension = column_array_offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) - if (offsets[i + 1] - offsets[i] != size) + if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); if (!index) - index = std::make_shared>(size); + index = std::make_shared>(dimension); /// Add all rows of block - index->add_item(index->get_n_items(), array.data()); + index->add_item(index->get_n_items(), column_arary_data_float_data.data()); for (size_t current_row = 1; current_row < num_rows; ++current_row) - index->add_item(index->get_n_items(), &array[offsets[current_row - 1]]); + index->add_item(index->get_n_items(), &column_arary_data_float_data[column_array_offsets[current_row - 1]]); } else if (const auto & column_tuple = typeid_cast(column_cut.get())) { - const auto & columns = column_tuple->getColumns(); + const auto & column_tuple_columns = column_tuple->getColumns(); /// TODO check if calling index->add_item() directly on the block's tuples is faster than materializing everything - std::vector> data{column_tuple->size(), std::vector()}; - for (const auto & column : columns) + std::vector> data(column_tuple->size(), std::vector()); + for (const auto & column : column_tuple_columns) { const auto & pod_array = typeid_cast(column.get())->getData(); for (size_t i = 0; i < pod_array.size(); ++i) diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index 1ab85e6bbaf..de556eb7e07 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -173,23 +173,23 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t if (const auto & column_array = typeid_cast(column_cut.get())) { - const auto & data = column_array->getData(); - const auto & array = typeid_cast(data).getData(); + const auto & column_array_data = column_array->getData(); + const auto & column_array_data_float_data = typeid_cast(column_array_data).getData(); - if (array.empty()) + if (column_array_data_float_data.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); - const auto & offsets = column_array->getOffsets(); - const size_t num_rows = offsets.size(); + const auto & column_array_offsets = column_array->getOffsets(); + const size_t num_rows = column_array_offsets.size(); /// Check all sizes are the same - size_t size = offsets[0]; + size_t dimension = column_array_offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) - if (offsets[i + 1] - offsets[i] != size) + if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); if (!index) - index = std::make_shared>(size, scalar_kind); + index = std::make_shared>(dimension, scalar_kind); /// Add all rows of block if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows))) @@ -197,7 +197,7 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t for (size_t current_row = 0; current_row < num_rows; ++current_row) { - auto rc = index->add(static_cast(index->size()), &array[offsets[current_row - 1]]); + auto rc = index->add(static_cast(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]); if (!rc) throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release()); @@ -208,9 +208,9 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t } else if (const auto & column_tuple = typeid_cast(column_cut.get())) { - const auto & columns = column_tuple->getColumns(); - std::vector> data{column_tuple->size(), std::vector()}; - for (const auto & column : columns) + const auto & column_tuple_columns = column_tuple->getColumns(); + std::vector> data(column_tuple->size(), std::vector()); + for (const auto & column : column_tuple_columns) { const auto & pod_array = typeid_cast(column.get())->getData(); for (size_t i = 0; i < pod_array.size(); ++i) From 945179be46bcc1b07741d180a0fdaa64396994ef Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Sep 2023 14:23:09 +0000 Subject: [PATCH 2/4] Annoy: Fix LOGICAL_ERROR with default values #52258 --- .../table-engines/mergetree-family/annindexes.md | 8 +++++--- src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp | 15 +++++++++++---- src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 16 ++++++++++++---- .../0_stateless/02354_annoy_index.reference | 1 + tests/queries/0_stateless/02354_annoy_index.sql | 12 ++++++++++++ .../0_stateless/02354_usearch_index.reference | 1 + .../queries/0_stateless/02354_usearch_index.sql | 14 ++++++++++++++ 7 files changed, 56 insertions(+), 11 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 8996133f667..d6ff7f23bb4 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -203,9 +203,10 @@ Parameter `NumTrees` is the number of trees which the algorithm creates (default more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes. :::note -Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use -[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 -CHECK length(vectors) = 256`. +Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays must have same length. To avoid +errors, you can use a [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT +constraint_name_1 CHECK length(vectors) = 256`. Also, unspecified `Array` values in INSERT statements (i.e. default values) are not +supported. ::: Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger @@ -223,6 +224,7 @@ SETTINGS annoy_index_search_k_nodes=100; The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see [here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml. ::: + ## USearch {#usearch} This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 15830513162..f00f11359e1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -157,18 +157,25 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t const auto & column_array_data = column_array->getData(); const auto & column_arary_data_float_data = typeid_cast(column_array_data).getData(); - if (column_arary_data_float_data.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); - const auto & column_array_offsets = column_array->getOffsets(); const size_t num_rows = column_array_offsets.size(); + /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement + /// for the annoy-indexed column (i.e. default value), we have a problem. Reject such values. + if (column_array_offsets.empty() || column_array_offsets[0] == 0) + /// (The if condition is a bit weird but I have seen either with default values) + throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into Annoy index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read); + /// Check all sizes are the same size_t dimension = column_array_offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + /// Also check that previously inserted blocks have the same size as this block + if (index && index->getDimensions() != dimension) + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + if (!index) index = std::make_shared>(dimension); @@ -363,7 +370,7 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Annoy indexes can only be created on columns of type Array(Float32) and Tuple(Float32)"); + "Annoy indexes can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])"); }; DataTypePtr data_type = index.sample_block.getDataTypes()[0]; diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index de556eb7e07..9531b9188bf 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -176,18 +176,25 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t const auto & column_array_data = column_array->getData(); const auto & column_array_data_float_data = typeid_cast(column_array_data).getData(); - if (column_array_data_float_data.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); - const auto & column_array_offsets = column_array->getOffsets(); const size_t num_rows = column_array_offsets.size(); + /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement + /// for the usearch-indexed column (i.e. default value), we have a problem. Reject such values. + if (column_array_offsets.empty() || column_array_offsets[0] == 0) + /// (The if condition is a bit weird but I have seen either with default values) + throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into usearch index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read); + /// Check all sizes are the same size_t dimension = column_array_offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + /// Also check that previously inserted blocks have the same size as this block + if (index && index->getDimensions() != dimension) + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + if (!index) index = std::make_shared>(dimension, scalar_kind); @@ -413,7 +420,8 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */) auto throw_unsupported_underlying_column_exception = []() { throw Exception( - ErrorCodes::ILLEGAL_COLUMN, "USearch indexes can only be created on columns of type Array(Float32) and Tuple(Float32)"); + ErrorCodes::ILLEGAL_COLUMN, + "USearch can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])"); }; DataTypePtr data_type = index.sample_block.getDataTypes()[0]; diff --git a/tests/queries/0_stateless/02354_annoy_index.reference b/tests/queries/0_stateless/02354_annoy_index.reference index a0ffb1e1f7f..81f2ff8aa59 100644 --- a/tests/queries/0_stateless/02354_annoy_index.reference +++ b/tests/queries/0_stateless/02354_annoy_index.reference @@ -147,3 +147,4 @@ Expression (Projection) 9000 [9000,0,0,0] 1 (1,0,0,0) 9000 (9000,0,0,0) +--- Bugs --- diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql index eab7a62c5f0..67ef64cc301 100644 --- a/tests/queries/0_stateless/02354_annoy_index.sql +++ b/tests/queries/0_stateless/02354_annoy_index.sql @@ -281,3 +281,15 @@ ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0)) LIMIT 1; DROP TABLE tab; + +SELECT '--- Bugs ---'; + +-- Arrays with default values are rejected, issue #52258 +CREATE TABLE tab (`uuid` String, `vector` Array(Float32), `version` UInt32, INDEX idx vector TYPE annoy()) ENGINE = MergeTree() ORDER BY (uuid); +INSERT INTO tab (uuid, version) VALUES ('1', 3); -- { serverError INCORRECT_DATA } +DROP TABLE tab; + +-- Tuples with default value work +CREATE TABLE tab (`uuid` String, `vector` Tuple(Float32, Float32), `version` UInt32, INDEX idx vector TYPE annoy()) ENGINE = MergeTree() ORDER BY (uuid); +INSERT INTO tab (uuid, version) VALUES ('1', 3); -- works fine +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02354_usearch_index.reference b/tests/queries/0_stateless/02354_usearch_index.reference index 893a092a386..c2791e99a54 100644 --- a/tests/queries/0_stateless/02354_usearch_index.reference +++ b/tests/queries/0_stateless/02354_usearch_index.reference @@ -150,3 +150,4 @@ Expression (Projection) 1 [0,0,10] 2 [0,0,10.5] 3 [0,0,9.5] +--- Bugs --- diff --git a/tests/queries/0_stateless/02354_usearch_index.sql b/tests/queries/0_stateless/02354_usearch_index.sql index e534c91b615..fc2954d6c5d 100644 --- a/tests/queries/0_stateless/02354_usearch_index.sql +++ b/tests/queries/0_stateless/02354_usearch_index.sql @@ -274,3 +274,17 @@ SELECT * FROM tab WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0 LIMIT 3; + +DROP TABLE tab; + +SELECT '--- Bugs ---'; + +-- Arrays with default values are rejected, issue #52258 +CREATE TABLE tab (`uuid` String, `vector` Array(Float32), `version` UInt32, INDEX idx vector TYPE usearch()) ENGINE = MergeTree() ORDER BY (uuid); +INSERT INTO tab (uuid, version) VALUES ('1', 3); -- { serverError INCORRECT_DATA } +DROP TABLE tab; + +-- Tuples with default value work +CREATE TABLE tab (`uuid` String, `vector` Tuple(Float32, Float32), `version` UInt32, INDEX idx vector TYPE usearch()) ENGINE = MergeTree() ORDER BY (uuid); +INSERT INTO tab (uuid, version) VALUES ('1', 3); -- works fine +DROP TABLE tab; From 9f009cccd5d01be29ff8e8ab6063297ec2a73b46 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 17 Sep 2023 15:22:51 +0000 Subject: [PATCH 3/4] Incorporate review feedback --- src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp | 18 ++++++++++-------- .../MergeTree/MergeTreeIndexUSearch.cpp | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index f00f11359e1..d15d89ad6f9 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -160,21 +160,23 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t const auto & column_array_offsets = column_array->getOffsets(); const size_t num_rows = column_array_offsets.size(); - /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement - /// for the annoy-indexed column (i.e. default value), we have a problem. Reject such values. - if (column_array_offsets.empty() || column_array_offsets[0] == 0) - /// (The if condition is a bit weird but I have seen either with default values) - throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into Annoy index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read); + /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays + /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default + /// value which is also an empty array. + if (column_array->isDefaultAt(0)) + throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name); /// Check all sizes are the same size_t dimension = column_array_offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); - /// Also check that previously inserted blocks have the same size as this block + /// Also check that previously inserted blocks have the same size as this block. + /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across + /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42. if (index && index->getDimensions() != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); if (!index) index = std::make_shared>(dimension); diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index 9531b9188bf..a00cab6ca59 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -179,21 +179,23 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t const auto & column_array_offsets = column_array->getOffsets(); const size_t num_rows = column_array_offsets.size(); - /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement - /// for the usearch-indexed column (i.e. default value), we have a problem. Reject such values. - if (column_array_offsets.empty() || column_array_offsets[0] == 0) - /// (The if condition is a bit weird but I have seen either with default values) - throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into usearch index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read); + /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays + /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default + /// value which is also an empty array. + if (column_array->isDefaultAt(0)) + throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name); /// Check all sizes are the same size_t dimension = column_array_offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); - /// Also check that previously inserted blocks have the same size as this block + /// Also check that previously inserted blocks have the same size as this block. + /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across + /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42. if (index && index->getDimensions() != dimension) - throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name); + throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name); if (!index) index = std::make_shared>(dimension, scalar_kind); From de4f22e20aa1c4d044a525474f5ed2d03e6c59a5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 17 Sep 2023 15:26:36 +0000 Subject: [PATCH 4/4] Typo --- src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index d15d89ad6f9..3eec8614dcd 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -160,9 +160,9 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t const auto & column_array_offsets = column_array->getOffsets(); const size_t num_rows = column_array_offsets.size(); - /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays + /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default - /// value which is also an empty array. + /// value which is also empty. if (column_array->isDefaultAt(0)) throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name); diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index a00cab6ca59..009c004faea 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -179,9 +179,9 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t const auto & column_array_offsets = column_array->getOffsets(); const size_t num_rows = column_array_offsets.size(); - /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays + /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default - /// value which is also an empty array. + /// values which is also empty. if (column_array->isDefaultAt(0)) throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);