mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Move documentation of string similarity functions to better location
This commit is contained in:
parent
3b775dee53
commit
ae1dcb5254
@ -1371,6 +1371,86 @@ Result:
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## byteHammingDistance
|
||||
|
||||
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
byteHammingDistance(string1, string2)
|
||||
```
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
SELECT byteHammingDistance('karolin', 'kathrin');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─byteHammingDistance('karolin', 'kathrin')─┐
|
||||
│ 3 │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Alias: mismatches
|
||||
|
||||
## stringJaccardIndex
|
||||
|
||||
Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
stringJaccardIndex(string1, string2)
|
||||
```
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
SELECT stringJaccardIndex('clickhouse', 'mouse');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─stringJaccardIndex('clickhouse', 'mouse')─┐
|
||||
│ 0.4 │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## stringJaccardIndexUTF8
|
||||
|
||||
Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings.
|
||||
|
||||
## editDistance
|
||||
|
||||
Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
editDistance(string1, string2)
|
||||
```
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
SELECT editDistance('clickhouse', 'mouse');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─editDistance('clickhouse', 'mouse')─┐
|
||||
│ 6 │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Alias: levenshteinDistance
|
||||
|
||||
## initcap
|
||||
|
||||
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
||||
|
@ -36,12 +36,12 @@ SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF\xFF\xFF\xF
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x41\xE2\x82\xAC'));
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x9F\x99\x82'));
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF'));
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError 36 }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError 36 }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError 36 }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError 36 }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError 36 }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError 36 }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError BAD_ARGUMENTS }
|
||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
SELECT stringJaccardIndexUTF8('😃🌍', '🙃😃🌑'), stringJaccardIndex('😃🌍', '🙃😃🌑');
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user