Allow un-quoted skip index parameters

Previously, only this syntax to create a skip index worked:

   INDEX index_name column_name TYPE vector_similarity('hnsw', 'L2Distance')

Now, this syntax will work as well:

  INDEX index_name column_name TYPE vector_similarity(hnsw, L2Distance)
This commit is contained in:
Robert Schulze 2024-08-11 09:31:36 +00:00
parent d2e79f0b92
commit fb76cb90b1
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
4 changed files with 46 additions and 4 deletions

View File

@ -43,7 +43,7 @@ CREATE TABLE table
(
id Int64,
vectors Array(Float32),
INDEX index_name vec TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
@ -59,6 +59,19 @@ Parameters:
- `ef_construction`: (optional, default: 128)
- `ef_search`: (optional, default: 64)
Example:
```sql
CREATE TABLE table
(
id Int64,
vectors Array(Float32),
INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
)
ENGINE = MergeTree
ORDER BY id;
```
Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the

View File

@ -3,6 +3,7 @@
#include <Storages/IndicesDescription.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTIndexDeclaration.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ParserCreateQuery.h>
@ -130,10 +131,15 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
{
for (size_t i = 0; i < index_type->arguments->children.size(); ++i)
{
const auto * argument = index_type->arguments->children[i]->as<ASTLiteral>();
if (!argument)
const auto & child = index_type->arguments->children[i];
if (const auto * ast_literal = child->as<ASTLiteral>(); ast_literal != nullptr)
/// E.g. INDEX index_name column_name TYPE vector_similarity('hnsw', 'f32')
result.arguments.emplace_back(ast_literal->value);
else if (const auto * ast_identifier = child->as<ASTIdentifier>(); ast_identifier != nullptr)
/// E.g. INDEX index_name column_name TYPE vector_similarity(hnsw, f32)
result.arguments.emplace_back(ast_identifier->name());
else
throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments");
result.arguments.emplace_back(argument->value);
}
}

View File

@ -0,0 +1,23 @@
-- Tags: no-fasttest, no-ordinary-database
SET allow_experimental_vector_similarity_index = 1;
-- Tests that quoted and unquoted parameters can be passed to vector search indexes.
DROP TABLE IF EXISTS tab1;
DROP TABLE IF EXISTS tab2;
CREATE TABLE tab1 (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity('hnsw', 'L2Distance'));
CREATE TABLE tab2 (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity(hnsw, L2Distance));
DROP TABLE tab1;
DROP TABLE tab2;
CREATE TABLE tab1 (id Int32, vec Array(Float32), PRIMARY KEY id);
CREATE TABLE tab2 (id Int32, vec Array(Float32), PRIMARY KEY id);
ALTER TABLE tab1 ADD INDEX idx1(vec) TYPE vector_similarity('hnsw', 'L2Distance');
ALTER TABLE tab2 ADD INDEX idx2(vec) TYPE vector_similarity(hnsw, L2Distance);
DROP TABLE tab1;
DROP TABLE tab2;