mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Add testcase for ANN index usage with subquery
This commit is contained in:
parent
592910d538
commit
7a98f7fecc
@ -111,15 +111,16 @@ ANN indexes are built during column insertion and merge. As a result, `INSERT` a
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support these queries:
|
||||
ANN indexes support this type of query:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
``` sql
|
||||
WITH [...] AS reference_vector
|
||||
SELECT *
|
||||
FROM table
|
||||
WHERE ... -- WHERE clause is optional
|
||||
ORDER BY Distance(vectors, reference_vector)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
:::tip
|
||||
To avoid writing out large vectors, you can use [query
|
||||
|
@ -4,3 +4,39 @@ It is possible to create parts with different Array vector sizes but there will
|
||||
Correctness of index with > 1 mark
|
||||
1 [1,0] 0
|
||||
9000 [9000,0] 0
|
||||
Issue #69085: Reference vector computed by a subquery
|
||||
Expression (Projection)
|
||||
Limit (preliminary LIMIT (without OFFSET))
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
ReadFromMergeTree (default.tab)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 4/4
|
||||
Skip
|
||||
Name: idx
|
||||
Description: vector_similarity GRANULARITY 2
|
||||
Parts: 1/1
|
||||
Granules: 2/4
|
||||
Expression (Projection)
|
||||
Limit (preliminary LIMIT (without OFFSET))
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
ReadFromMergeTree (default.tab)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 4/4
|
||||
Expression (Projection)
|
||||
Limit (preliminary LIMIT (without OFFSET))
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
ReadFromMergeTree (default.tab)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 4/4
|
||||
|
@ -53,3 +53,50 @@ ORDER BY L2Distance(vec, reference_vec)
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE tab;
|
||||
|
||||
SELECT 'Issue #69085: Reference vector computed by a subquery';
|
||||
|
||||
CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'cosineDistance', 'f16', 0, 0, 0) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
|
||||
INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
|
||||
|
||||
-- works
|
||||
EXPLAIN indexes = 1
|
||||
WITH [0., 2.] AS reference_vec
|
||||
SELECT
|
||||
id,
|
||||
vec,
|
||||
cosineDistance(vec, reference_vec) AS distance
|
||||
FROM tab
|
||||
ORDER BY distance
|
||||
LIMIT 1
|
||||
SETTINGS enable_analyzer = 0;
|
||||
|
||||
-- does not work
|
||||
EXPLAIN indexes = 1
|
||||
WITH (
|
||||
SELECT vec
|
||||
FROM tab
|
||||
LIMIT 1
|
||||
) AS reference_vec
|
||||
SELECT
|
||||
id,
|
||||
vec,
|
||||
cosineDistance(vec, reference_vec) AS distance
|
||||
FROM tab
|
||||
ORDER BY distance
|
||||
LIMIT 1
|
||||
SETTINGS enable_analyzer = 0;
|
||||
|
||||
-- does not work as well
|
||||
EXPLAIN indexes = 1
|
||||
WITH (
|
||||
SELECT [0., 2.]
|
||||
) AS reference_vec
|
||||
SELECT
|
||||
id,
|
||||
vec,
|
||||
cosineDistance(vec, reference_vec) AS distance
|
||||
FROM tab
|
||||
ORDER BY distance
|
||||
LIMIT 1
|
||||
SETTINGS enable_analyzer = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user