mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Simplify Annoy parameterization
This commit is contained in:
parent
5d871c7fa0
commit
f577bf35fc
@ -328,14 +328,16 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(
|
||||
|
||||
MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index)
|
||||
{
|
||||
uint64_t param = 100;
|
||||
uint64_t trees = 100;
|
||||
String distance_name = "L2Distance";
|
||||
if (!index.arguments.empty() && !index.arguments[0].tryGet<uint64_t>(param))
|
||||
if (!index.arguments[0].tryGet<String>(distance_name))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse first argument");
|
||||
if (index.arguments.size() > 1 && !index.arguments[1].tryGet<String>(distance_name))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse second argument");
|
||||
return std::make_shared<MergeTreeIndexAnnoy>(index, param, distance_name);
|
||||
|
||||
if (!index.arguments.empty())
|
||||
distance_name = index.arguments[0].get<String>();
|
||||
|
||||
if (index.arguments.size() > 1)
|
||||
trees = index.arguments[1].get<uint64_t>();
|
||||
|
||||
return std::make_shared<MergeTreeIndexAnnoy>(index, trees, distance_name);
|
||||
}
|
||||
|
||||
void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
|
||||
@ -345,12 +347,11 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
|
||||
if (index.arguments.size() > 2)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index must not have more than two parameters");
|
||||
|
||||
if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64
|
||||
&& index.arguments[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index first argument must be UInt64 or String");
|
||||
if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance function argument of Annoy index must be of type String");
|
||||
|
||||
if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index second argument must be String");
|
||||
if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Number of trees argument of Annoy index must be UInt64");
|
||||
|
||||
/// Check that the index is created on a single column
|
||||
|
||||
|
@ -91,7 +91,7 @@ CREATE TABLE 02354_annoy_cosine
|
||||
(
|
||||
id Int32,
|
||||
embedding Array(Float32),
|
||||
INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
|
||||
INDEX annoy_index embedding TYPE annoy('cosineDistance', 100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
@ -120,7 +120,7 @@ CREATE TABLE 02354_annoy_cosine
|
||||
(
|
||||
id Int32,
|
||||
embedding Array(Float32),
|
||||
INDEX annoy_index embedding TYPE annoy(100, 'cosineDistance') GRANULARITY 1
|
||||
INDEX annoy_index embedding TYPE annoy('cosineDistance', 100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
@ -141,72 +141,3 @@ ORDER BY cosineDistance(embedding, [0.0, 0.0, 10.0])
|
||||
LIMIT 3;
|
||||
DROP TABLE IF EXISTS 02354_annoy_cosine;
|
||||
" | grep "annoy_index"
|
||||
|
||||
# # Check that weird base columns are rejected
|
||||
$CLICKHOUSE_CLIENT -nm --allow_experimental_annoy_index=1 -q "
|
||||
DROP TABLE IF EXISTS 02354_annoy;
|
||||
|
||||
-- Index spans >1 column
|
||||
|
||||
CREATE TABLE 02354_annoy
|
||||
(
|
||||
id Int32,
|
||||
embedding Array(Float32),
|
||||
INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 7 }
|
||||
|
||||
-- Index must be created on Array(Float32) or Tuple(Float32)
|
||||
|
||||
CREATE TABLE 02354_annoy
|
||||
(
|
||||
id Int32,
|
||||
embedding Float32,
|
||||
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
|
||||
|
||||
|
||||
CREATE TABLE 02354_annoy
|
||||
(
|
||||
id Int32,
|
||||
embedding Array(Float64),
|
||||
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
|
||||
|
||||
CREATE TABLE 02354_annoy
|
||||
(
|
||||
id Int32,
|
||||
embedding Tuple(Float32, Float64),
|
||||
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
|
||||
|
||||
CREATE TABLE 02354_annoy
|
||||
(
|
||||
id Int32,
|
||||
embedding Array(LowCardinality(Float32)),
|
||||
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }
|
||||
|
||||
CREATE TABLE 02354_annoy
|
||||
(
|
||||
id Int32,
|
||||
embedding Array(Nullable(Float32)),
|
||||
INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity=5, index_granularity_bytes = '10Mi'; -- {serverError 44 }"
|
||||
|
1
tests/queries/0_stateless/02354_annoy_index.reference
Normal file
1
tests/queries/0_stateless/02354_annoy_index.reference
Normal file
@ -0,0 +1 @@
|
||||
Negative tests
|
26
tests/queries/0_stateless/02354_annoy_index.sql
Normal file
26
tests/queries/0_stateless/02354_annoy_index.sql
Normal file
@ -0,0 +1,26 @@
|
||||
-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-upgrade-check
|
||||
|
||||
SET allow_experimental_annoy_index = 1;
|
||||
|
||||
DROP TABLE IF EXISTS tab;
|
||||
|
||||
SELECT 'Negative tests';
|
||||
|
||||
-- must have at most 2 arguments
|
||||
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
|
||||
|
||||
-- first argument must be UInt64
|
||||
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
|
||||
|
||||
-- 2nd argument must be String
|
||||
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index embedding TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
|
||||
|
||||
-- must be created on single column
|
||||
CREATE TABLE tab(id Int32, embedding Array(Float32), INDEX annoy_index (embedding, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
|
||||
|
||||
-- must be created on Array/Tuple(Float32) columns
|
||||
SET allow_suspicious_low_cardinality_types = 1;
|
||||
CREATE TABLE tab(id Int32, embedding Float32, INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
||||
CREATE TABLE tab(id Int32, embedding Array(Float64), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
||||
CREATE TABLE tab(id Int32, embedding LowCardinality(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
||||
CREATE TABLE tab(id Int32, embedding Nullable(Float32), INDEX annoy_index embedding TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
|
Loading…
Reference in New Issue
Block a user