ClickHouse/tests/queries/0_stateless/02354_annoy.sh

213 lines
6.0 KiB
Bash
Raw Normal View History

2022-11-12 09:23:49 +00:00
#!/usr/bin/env bash
# Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check
2022-11-12 09:23:49 +00:00
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
2022-09-05 12:41:46 +00:00
2022-11-12 09:23:49 +00:00
# Check that index works correctly for L2Distance and with client parameters
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
DROP TABLE IF EXISTS 02354_annoy_l2;
CREATE TABLE 02354_annoy_l2
(
id Int32,
embedding Array(Float32),
INDEX annoy_index embedding TYPE annoy() GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5;
INSERT INTO 02354_annoy_l2 VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
SELECT *
FROM 02354_annoy_l2
WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
LIMIT 5;
SELECT *
FROM 02354_annoy_l2
ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
LIMIT 3;
SET param_02354_target_vector='[0.0, 0.0, 10.0]';
SELECT *
FROM 02354_annoy_l2
WHERE L2Distance(embedding, {02354_target_vector: Array(Float32)}) < 1.0
LIMIT 5;
SELECT *
FROM 02354_annoy_l2
ORDER BY L2Distance(embedding, {02354_target_vector: Array(Float32)})
LIMIT 3;
SELECT *
FROM 02354_annoy_l2
ORDER BY L2Distance(embedding, [0.0, 0.0])
LIMIT 3; -- { serverError 80 }
DROP TABLE IF EXISTS 02354_annoy_l2;
2022-11-12 09:23:49 +00:00
"
2022-11-12 09:23:49 +00:00
# Check that indexes are used
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
DROP TABLE IF EXISTS 02354_annoy_l2;
CREATE TABLE 02354_annoy_l2
(
id Int32,
embedding Array(Float32),
INDEX annoy_index embedding TYPE annoy() GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5;
INSERT INTO 02354_annoy_l2 VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
EXPLAIN indexes=1
SELECT *
FROM 02354_annoy_l2
WHERE L2Distance(embedding, [0.0, 0.0, 10.0]) < 1.0
LIMIT 5;
EXPLAIN indexes=1
SELECT *
FROM 02354_annoy_l2
ORDER BY L2Distance(embedding, [0.0, 0.0, 10.0])
LIMIT 3;
DROP TABLE IF EXISTS 02354_annoy_l2;
" | grep "annoy_index"
# # Check that index works correctly for cosineDistance
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
DROP TABLE IF EXISTS 02354_annoy_cosine;
CREATE TABLE 02354_annoy_cosine
(
id Int32,
embedding Array(Float32),
2022-10-31 08:53:59 +00:00
INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5;
2022-11-12 09:23:49 +00:00
INSERT INTO 02354_annoy_cosine VALUES (1, [0.0, 0.0, 10.0]), (2, [0.2, 0.0, 10.0]), (3, [-0.3, 0.0, 10.0]), (4, [0.5, 0.0, 10.1]), (5, [0.8, 0.0, 10.0]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
SELECT *
FROM 02354_annoy_cosine
WHERE cosineDistance(embedding, [0.0, 0.0, 10.0]) < 1.0
2022-11-12 09:23:49 +00:00
LIMIT 3;
SELECT *
FROM 02354_annoy_cosine
ORDER BY cosineDistance(embedding, [0.0, 0.0, 10.0])
2022-11-12 09:23:49 +00:00
LIMIT 3;
DROP TABLE IF EXISTS 02354_annoy_cosine;
2022-11-12 09:23:49 +00:00
"
2022-11-12 09:23:49 +00:00
# # Check that indexes are used
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
DROP TABLE IF EXISTS 02354_annoy_cosine;
CREATE TABLE 02354_annoy_cosine
(
id Int32,
embedding Array(Float32),
INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5;
2022-11-12 09:23:49 +00:00
INSERT INTO 02354_annoy_cosine VALUES (1, [0.0, 0.0, 10.0]), (2, [0.2, 0.0, 10.0]), (3, [-0.3, 0.0, 10.0]), (4, [0.5, 0.0, 10.1]), (5, [0.8, 0.0, 10.0]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
EXPLAIN indexes=1
SELECT *
FROM 02354_annoy_cosine
WHERE cosineDistance(embedding, [0.0, 0.0, 10.0]) < 1.0
LIMIT 3;
EXPLAIN indexes=1
SELECT *
FROM 02354_annoy_cosine
ORDER BY cosineDistance(embedding, [0.0, 0.0, 10.0])
LIMIT 3;
DROP TABLE IF EXISTS 02354_annoy_cosine;
" | grep "annoy_index"
# # Check that weird base columns are rejected
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
DROP TABLE IF EXISTS 02354_annoy;
-- Index spans >1 column
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(Float32),
INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5; -- {serverError 7 }
-- Index must be created on Array(Float32) or Tuple(Float32)
CREATE TABLE 02354_annoy
(
id Int32,
embedding Float32,
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(Float64),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Tuple(Float32, Float64),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(LowCardinality(Float32)),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(Nullable(Float32)),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
2022-11-12 09:23:49 +00:00
SETTINGS index_granularity=5; -- {serverError 44 }"