mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #69090 from ucasfl/fix-vector-index
Fix bug that vector similarity index does not work for cosine distance
This commit is contained in:
commit
8c9cf37858
@ -40,10 +40,12 @@ void extractReferenceVectorFromLiteral(std::vector<Float64> & reference_vector,
|
||||
}
|
||||
}
|
||||
|
||||
VectorSimilarityCondition::Info::DistanceFunction stringToDistanceFunction(std::string_view distance_function)
|
||||
VectorSimilarityCondition::Info::DistanceFunction stringToDistanceFunction(const String & distance_function)
|
||||
{
|
||||
if (distance_function == "L2Distance")
|
||||
return VectorSimilarityCondition::Info::DistanceFunction::L2;
|
||||
else if (distance_function == "cosineDistance")
|
||||
return VectorSimilarityCondition::Info::DistanceFunction::Cosine;
|
||||
else
|
||||
return VectorSimilarityCondition::Info::DistanceFunction::Unknown;
|
||||
}
|
||||
@ -57,7 +59,7 @@ VectorSimilarityCondition::VectorSimilarityCondition(const SelectQueryInfo & que
|
||||
, index_is_useful(checkQueryStructure(query_info))
|
||||
{}
|
||||
|
||||
bool VectorSimilarityCondition::alwaysUnknownOrTrue(String distance_function) const
|
||||
bool VectorSimilarityCondition::alwaysUnknownOrTrue(const String & distance_function) const
|
||||
{
|
||||
if (!index_is_useful)
|
||||
return true; /// query isn't supported
|
||||
|
@ -57,7 +57,8 @@ public:
|
||||
enum class DistanceFunction : uint8_t
|
||||
{
|
||||
Unknown,
|
||||
L2
|
||||
L2,
|
||||
Cosine
|
||||
};
|
||||
|
||||
std::vector<Float64> reference_vector;
|
||||
@ -68,7 +69,7 @@ public:
|
||||
};
|
||||
|
||||
/// Returns false if query can be speeded up by an ANN index, true otherwise.
|
||||
bool alwaysUnknownOrTrue(String distance_function) const;
|
||||
bool alwaysUnknownOrTrue(const String & distance_function) const;
|
||||
|
||||
std::vector<Float64> getReferenceVector() const;
|
||||
size_t getDimensions() const;
|
||||
@ -141,18 +142,12 @@ private:
|
||||
/// Traverses the AST of ORDERBY section
|
||||
void traverseOrderByAST(const ASTPtr & node, RPN & rpn);
|
||||
|
||||
/// Returns true and stores ANNExpr if the query has valid WHERE section
|
||||
static bool matchRPNWhere(RPN & rpn, Info & info);
|
||||
|
||||
/// Returns true and stores ANNExpr if the query has valid ORDERBY section
|
||||
static bool matchRPNOrderBy(RPN & rpn, Info & info);
|
||||
|
||||
/// Returns true and stores Length if we have valid LIMIT clause in query
|
||||
static bool matchRPNLimit(RPNElement & rpn, UInt64 & limit);
|
||||
|
||||
/// Matches dist function, reference vector, column name
|
||||
static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, Info & info);
|
||||
|
||||
/// Gets float or int from AST node
|
||||
static float getFloatOrIntLiteralOrPanic(const RPN::iterator& iter);
|
||||
|
||||
|
@ -41,6 +41,21 @@ Special cases
|
||||
6 [1,9.3] 0.005731362878640178
|
||||
1 [2,3.2] 0.15200169244542905
|
||||
7 [5.5,4.7] 0.3503476876550442
|
||||
Expression (Projection)
|
||||
Limit (preliminary LIMIT (without OFFSET))
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
ReadFromMergeTree (default.tab)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 4/4
|
||||
Skip
|
||||
Name: idx
|
||||
Description: vector_similarity GRANULARITY 2
|
||||
Parts: 1/1
|
||||
Granules: 2/4
|
||||
-- Setting "max_limit_for_ann_queries"
|
||||
Expression (Projection)
|
||||
Limit (preliminary LIMIT (without OFFSET))
|
||||
|
@ -63,6 +63,13 @@ FROM tab
|
||||
ORDER BY cosineDistance(vec, reference_vec)
|
||||
LIMIT 3;
|
||||
|
||||
EXPLAIN indexes = 1
|
||||
WITH [0.0, 2.0] AS reference_vec
|
||||
SELECT id, vec, cosineDistance(vec, reference_vec)
|
||||
FROM tab
|
||||
ORDER BY cosineDistance(vec, reference_vec)
|
||||
LIMIT 3;
|
||||
|
||||
SELECT '-- Setting "max_limit_for_ann_queries"';
|
||||
EXPLAIN indexes=1
|
||||
WITH [0.0, 2.0] as reference_vec
|
||||
|
Loading…
Reference in New Issue
Block a user