Simplify Annoy parameterization

This commit is contained in:
Robert Schulze 2023-05-25 20:35:38 +00:00
parent 5d871c7fa0
commit f577bf35fc
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
4 changed files with 42 additions and 83 deletions

View File

@ -328,14 +328,16 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(
MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index)
{
uint64_t param = 100;
uint64_t trees = 100;
String distance_name = "L2Distance";
if (!index.arguments.empty() && !index.arguments[0].tryGet<uint64_t>(param))
if (!index.arguments[0].tryGet<String>(distance_name))
throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse first argument");
if (index.arguments.size() > 1 && !index.arguments[1].tryGet<String>(distance_name))
throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse second argument");
return std::make_shared<MergeTreeIndexAnnoy>(index, param, distance_name);
if (!index.arguments.empty())
distance_name = index.arguments[0].get<String>();
if (index.arguments.size() > 1)
trees = index.arguments[1].get<uint64_t>();
return std::make_shared<MergeTreeIndexAnnoy>(index, trees, distance_name);
}
void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
@ -345,12 +347,11 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
if (index.arguments.size() > 2)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index must not have more than two parameters");
if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64
&& index.arguments[0].getType() != Field::Types::String)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index first argument must be UInt64 or String");
if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance function argument of Annoy index must be of type String");
if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index second argument must be String");
if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::UInt64)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Number of trees argument of Annoy index must be UInt64");
/// Check that the index is created on a single column

View File

@ -91,7 +91,7 @@ CREATE TABLE 02354_annoy_cosine
(
id Int32,
embedding Array(Float32),
INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
INDEX annoy_index embedding TYPE annoy('cosineDistance', 100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
@ -120,7 +120,7 @@ CREATE TABLE 02354_annoy_cosine
(
id Int32,
embedding Array(Float32),
INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
INDEX annoy_index embedding TYPE annoy('cosineDistance', 100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
@ -141,72 +141,3 @@ ORDER BY cosineDistance(embedding, [0.0, 0.0, 10.0])
LIMIT 3;
DROP TABLE IF EXISTS 02354_annoy_cosine;
" | grep "annoy_index"
# # Check that weird base columns are rejected
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
DROP TABLE IF EXISTS 02354_annoy;
-- Index spans >1 column
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(Float32),
INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 7 }
-- Index must be created on Array(Float32) or Tuple(Float32)
CREATE TABLE 02354_annoy
(
id Int32,
embedding Float32,
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(Float64),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Tuple(Float32, Float64),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(LowCardinality(Float32)),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
CREATE TABLE 02354_annoy
(
id Int32,
embedding Array(Nullable(Float32)),
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }"

View File

@ -0,0 +1 @@
Negative tests

View File

@ -0,0 +1,26 @@
-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check
SET allow_experimental_annoy_index = 1;
DROP TABLE IF EXISTS tab;
SELECT 'Negative tests';
-- must have at most 2 arguments
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-- first argument must be UInt64
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-- 2nd argument must be String
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-- must be created on single column
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index (embedding, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
-- must be created on Array/Tuple(Float32) columns
SET allow_suspicious_low_cardinality_types = 1;
CREATE TABLE tab(id Int32, embedding Float32, INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
CREATE TABLE tab(id Int32, embedding Array(Float64), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
CREATE TABLE tab(id Int32, embedding LowCardinality(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
CREATE TABLE tab(id Int32, embedding Nullable(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }