diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 4cf558fc872..3c75b8dbef0 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -111,15 +111,16 @@ ANN indexes are built during column insertion and merge. As a result, `INSERT` a tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write requests. -ANN indexes support these queries: +ANN indexes support this type of query: - ``` sql - SELECT * - FROM table - [WHERE ...] - ORDER BY Distance(vectors, Point) - LIMIT N - ``` +``` sql +WITH [...] AS reference_vector +SELECT * +FROM table +WHERE ... -- WHERE clause is optional +ORDER BY Distance(vectors, reference_vector) +LIMIT N +``` :::tip To avoid writing out large vectors, you can use [query diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.reference b/tests/queries/0_stateless/02354_vector_search_bugs.reference index 8da05c8a7c0..ce006359f5c 100644 --- a/tests/queries/0_stateless/02354_vector_search_bugs.reference +++ b/tests/queries/0_stateless/02354_vector_search_bugs.reference @@ -4,3 +4,39 @@ It is possible to create parts with different Array vector sizes but there will Correctness of index with > 1 mark 1 [1,0] 0 9000 [9000,0] 0 +Issue #69085: Reference vector computed by a subquery +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 + Skip + Name: idx + Description: vector_similarity GRANULARITY 2 + Parts: 1/1 + Granules: 2/4 +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + ReadFromMergeTree (default.tab) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 4/4 diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql index 51e2e6ce2b7..e0015d04b7e 100644 --- a/tests/queries/0_stateless/02354_vector_search_bugs.sql +++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql @@ -53,3 +53,50 @@ ORDER BY L2Distance(vec, reference_vec) LIMIT 1; DROP TABLE tab; + +SELECT 'Issue #69085: Reference vector computed by a subquery'; + +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'cosineDistance', 'f16', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; +INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]); + +-- works +EXPLAIN indexes = 1 +WITH [0., 2.] AS reference_vec +SELECT + id, + vec, + cosineDistance(vec, reference_vec) AS distance +FROM tab +ORDER BY distance +LIMIT 1 +SETTINGS enable_analyzer = 0; + +-- does not work +EXPLAIN indexes = 1 +WITH ( + SELECT vec + FROM tab + LIMIT 1 +) AS reference_vec +SELECT + id, + vec, + cosineDistance(vec, reference_vec) AS distance +FROM tab +ORDER BY distance +LIMIT 1 +SETTINGS enable_analyzer = 0; + +-- does not work as well +EXPLAIN indexes = 1 +WITH ( + SELECT [0., 2.] +) AS reference_vec +SELECT + id, + vec, + cosineDistance(vec, reference_vec) AS distance +FROM tab +ORDER BY distance +LIMIT 1 +SETTINGS enable_analyzer = 0;