2023-06-08 13:13:12 +00:00
|
|
|
-- Tags: disabled, no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check
|
2023-05-25 20:35:38 +00:00
|
|
|
|
|
|
|
SET allow_experimental_annoy_index = 1;
|
|
|
|
|
2023-06-05 10:49:45 +00:00
|
|
|
SELECT '--- Test with Array ---';
|
|
|
|
|
2023-05-25 20:35:38 +00:00
|
|
|
DROP TABLE IF EXISTS tab;
|
2023-06-05 10:26:45 +00:00
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity=5;
|
2023-05-25 21:03:50 +00:00
|
|
|
INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
|
|
|
|
|
2023-05-25 21:57:17 +00:00
|
|
|
SELECT 'WHERE type, L2Distance';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
|
|
|
|
LIMIT 5;
|
|
|
|
|
|
|
|
SELECT 'ORDER BY type, L2Distance';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
|
|
|
|
LIMIT 3;
|
|
|
|
|
2023-06-07 09:46:10 +00:00
|
|
|
-- Produces different error code with analyzer, TODO: check
|
|
|
|
-- SELECT 'Reference ARRAYs with non-matching dimension are rejected';
|
|
|
|
-- SELECT *
|
|
|
|
-- FROM tab
|
|
|
|
-- ORDER BY L2Distance(embedding, [0.0, 0.0])
|
|
|
|
-- LIMIT 3; -- { serverError INCORRECT_QUERY }
|
2023-05-25 21:57:17 +00:00
|
|
|
|
|
|
|
SELECT 'WHERE type, L2Distance, check that index is used';
|
|
|
|
EXPLAIN indexes=1
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
|
|
|
|
LIMIT 5;
|
|
|
|
|
|
|
|
SELECT 'ORDER BY type, L2Distance, check that index is used';
|
|
|
|
EXPLAIN indexes=1
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
|
|
|
|
LIMIT 3;
|
|
|
|
|
2023-05-25 21:03:50 +00:00
|
|
|
SELECT 'parameter annoy_index_search_k_nodes';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, [5.3, 7.3, 2.1])
|
|
|
|
LIMIT 5
|
|
|
|
SETTINGS annoy_index_search_k_nodes=0; -- searches zero nodes --> no results
|
|
|
|
|
|
|
|
SELECT 'parameter max_limit_for_ann_queries';
|
|
|
|
EXPLAIN indexes=1
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, [5.3, 7.3, 2.1])
|
|
|
|
LIMIT 5
|
|
|
|
SETTINGS max_limit_for_ann_queries=2; -- doesn't use the ann index
|
|
|
|
|
|
|
|
DROP TABLE tab;
|
|
|
|
|
2023-06-05 10:49:45 +00:00
|
|
|
SELECT '--- Test with Tuple ---';
|
|
|
|
|
|
|
|
CREATE TABLE tab(id Int32, embedding Tuple(Float32, Float32, Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity=5;
|
|
|
|
INSERT INTO tab VALUES (1, (0.0, 0.0, 10.0)), (2, (0.0, 0.0, 10.5)), (3, (0.0, 0.0, 9.5)), (4, (0.0, 0.0, 9.7)), (5, (0.0, 0.0, 10.2)), (6, (10.0, 0.0, 0.0)), (7, (9.5, 0.0, 0.0)), (8, (9.7, 0.0, 0.0)), (9, (10.2, 0.0, 0.0)), (10, (10.5, 0.0, 0.0)), (11, (0.0, 10.0, 0.0)), (12, (0.0, 9.5, 0.0)), (13, (0.0, 9.7, 0.0)), (14, (0.0, 10.2, 0.0)), (15, (0.0, 10.5, 0.0));
|
|
|
|
|
|
|
|
SELECT 'WHERE type, L2Distance';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
WHERE L2Distance(embedding, (0.0, 0.0, 10.0)) < 1.0
|
|
|
|
LIMIT 5;
|
|
|
|
|
|
|
|
SELECT 'ORDER BY type, L2Distance';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, (0.0, 0.0, 10.0))
|
|
|
|
LIMIT 3;
|
|
|
|
|
|
|
|
SELECT 'WHERE type, L2Distance, check that index is used';
|
|
|
|
EXPLAIN indexes=1
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
WHERE L2Distance(embedding, (0.0, 0.0, 10.0)) < 1.0
|
|
|
|
LIMIT 5;
|
|
|
|
|
|
|
|
SELECT 'ORDER BY type, L2Distance, check that index is used';
|
|
|
|
EXPLAIN indexes=1
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, (0.0, 0.0, 10.0))
|
|
|
|
LIMIT 3;
|
2023-05-25 21:03:50 +00:00
|
|
|
|
2023-06-05 10:49:45 +00:00
|
|
|
SELECT 'parameter annoy_index_search_k_nodes';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, (5.3, 7.3, 2.1))
|
|
|
|
LIMIT 5
|
|
|
|
SETTINGS annoy_index_search_k_nodes=0; -- searches zero nodes --> no results
|
|
|
|
|
|
|
|
SELECT 'parameter max_limit_for_ann_queries';
|
|
|
|
EXPLAIN indexes=1
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, (5.3, 7.3, 2.1))
|
|
|
|
LIMIT 5
|
|
|
|
SETTINGS max_limit_for_ann_queries=2; -- doesn't use the ann index
|
|
|
|
|
|
|
|
DROP TABLE tab;
|
|
|
|
|
|
|
|
SELECT '--- Test alternative metric (cosine distance) and non-default NumTrees ---';
|
|
|
|
|
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('cosineDistance', 200)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity=5;
|
|
|
|
INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
|
|
|
|
|
|
|
|
SELECT 'WHERE type, L2Distance';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
|
|
|
|
LIMIT 5;
|
|
|
|
|
|
|
|
SELECT 'ORDER BY type, L2Distance';
|
|
|
|
SELECT *
|
|
|
|
FROM tab
|
|
|
|
ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
|
|
|
|
LIMIT 3;
|
|
|
|
|
|
|
|
DROP TABLE tab;
|
|
|
|
|
|
|
|
SELECT '--- Negative tests ---';
|
2023-05-25 20:35:38 +00:00
|
|
|
|
|
|
|
-- must have at most 2 arguments
|
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
|
|
|
|
|
2023-06-05 10:49:45 +00:00
|
|
|
-- first argument (distance_function) must be String
|
2023-05-25 20:35:38 +00:00
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
|
|
|
|
|
2023-06-05 10:49:45 +00:00
|
|
|
-- 2nd argument (number of trees) must be UInt64
|
2023-05-25 20:35:38 +00:00
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
|
|
|
|
|
2023-06-05 10:49:45 +00:00
|
|
|
-- reject unsupported distance functions
|
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('wormholeDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
|
|
|
|
|
2023-05-25 20:35:38 +00:00
|
|
|
-- must be created on single column
|
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index (embedding, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
|
|
|
|
|
|
|
|
-- must be created on Array/Tuple(Float32) columns
|
|
|
|
SET allow_suspicious_low_cardinality_types = 1;
|
|
|
|
CREATE TABLE tab(id Int32, embedding Float32, INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
|
|
|
CREATE TABLE tab(id Int32, embedding Array(Float64), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
|
|
|
CREATE TABLE tab(id Int32, embedding LowCardinality(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
|
|
|
CREATE TABLE tab(id Int32, embedding Nullable(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|