Rename jaccardIndex -> stringJaccardIndex, upd doc

This commit is contained in:
vdimir 2023-10-02 11:57:59 +00:00
parent 1f936d1966
commit 14ba4696f9
No known key found for this signature in database
GPG Key ID: 6EE4CE2BEDC51862
2 changed files with 21 additions and 21 deletions

View File

@ -706,30 +706,30 @@ Result:
└───────────────────────────────────────────┘
```
- Alias: mismatches
Alias: mismatches
## jaccardIndex
## stringJaccardIndex
Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
**Syntax**
```sql
byteJaccardIndex(string1, string2)
stringJaccardIndex(string1, string2)
```
**Examples**
``` sql
SELECT jaccardIndex('clickhouse', 'mouse');
SELECT stringJaccardIndex('clickhouse', 'mouse');
```
Result:
``` text
┌─jaccardIndex('clickhouse', 'mouse')─┐
│ 0.4 │
└─────────────────────────────────────────┘
┌─stringJaccardIndex('clickhouse', 'mouse')─┐
0.4 │
└───────────────────────────────────────────
```
## editDistance
@ -752,8 +752,8 @@ Result:
``` text
┌─editDistance('clickhouse', 'mouse')─┐
6 │
└─────────────────────────────────────────
│ 6 │
└─────────────────────────────────────┘
```
- Alias: levenshteinDistance
Alias: levenshteinDistance

View File

@ -156,9 +156,8 @@ struct ByteEditDistanceImpl
if (haystack_size > max_string_size || needle_size > max_string_size)
throw Exception(
ErrorCodes::TOO_LARGE_STRING_SIZE,
"The string size is too big for function byteEditDistance. "
"Should be at most {}",
max_string_size);
"The string size is too big for function editDistance, "
"should be at most {}", max_string_size);
PaddedPODArray<ResultType> distances0(haystack_size + 1, 0);
PaddedPODArray<ResultType> distances1(haystack_size + 1, 0);
@ -196,18 +195,19 @@ struct NameByteHammingDistance
{
static constexpr auto name = "byteHammingDistance";
};
using FunctionByteHammingDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteHammingDistanceImpl>, NameByteHammingDistance>;
struct NameEditDistance
{
static constexpr auto name = "editDistance";
};
using FunctionEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
using FunctionByteHammingDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteHammingDistanceImpl>, NameByteHammingDistance>;
using FunctionByteEditDistance = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteEditDistanceImpl>, NameEditDistance>;
struct NameJaccardIndex { static constexpr auto name = "jaccardIndex"; };
using FunctionByteJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl>, NameJaccardIndex>;
struct NameJaccardIndex
{
static constexpr auto name = "stringJaccardIndex";
};
using FunctionStringJaccardIndex = FunctionsStringSimilarity<FunctionStringDistanceImpl<ByteJaccardIndexImpl>, NameJaccardIndex>;
REGISTER_FUNCTION(StringDistance)
{
@ -215,11 +215,11 @@ REGISTER_FUNCTION(StringDistance)
FunctionDocumentation{.description = R"(Calculates Hamming distance between two byte-strings.)"});
factory.registerAlias("mismatches", NameByteHammingDistance::name);
factory.registerFunction<FunctionByteEditDistance>(
factory.registerFunction<FunctionEditDistance>(
FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"});
factory.registerAlias("levenshteinDistance", NameEditDistance::name);
factory.registerFunction<FunctionByteJaccardIndex>(
factory.registerFunction<FunctionStringJaccardIndex>(
FunctionDocumentation{.description = R"(Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.)"});
}
}