mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Move documentation of string similarity functions to better location
This commit is contained in:
parent
3b775dee53
commit
ae1dcb5254
@ -1371,6 +1371,86 @@ Result:
|
|||||||
└──────────────────┘
|
└──────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## byteHammingDistance
|
||||||
|
|
||||||
|
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
byteHammingDistance(string1, string2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT byteHammingDistance('karolin', 'kathrin');
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─byteHammingDistance('karolin', 'kathrin')─┐
|
||||||
|
│ 3 │
|
||||||
|
└───────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
Alias: mismatches
|
||||||
|
|
||||||
|
## stringJaccardIndex
|
||||||
|
|
||||||
|
Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
stringJaccardIndex(string1, string2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT stringJaccardIndex('clickhouse', 'mouse');
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─stringJaccardIndex('clickhouse', 'mouse')─┐
|
||||||
|
│ 0.4 │
|
||||||
|
└───────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## stringJaccardIndexUTF8
|
||||||
|
|
||||||
|
Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings.
|
||||||
|
|
||||||
|
## editDistance
|
||||||
|
|
||||||
|
Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
editDistance(string1, string2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT editDistance('clickhouse', 'mouse');
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─editDistance('clickhouse', 'mouse')─┐
|
||||||
|
│ 6 │
|
||||||
|
└─────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
Alias: levenshteinDistance
|
||||||
|
|
||||||
## initcap
|
## initcap
|
||||||
|
|
||||||
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
||||||
|
@ -36,12 +36,12 @@ SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF\xFF\xFF\xF
|
|||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x41\xE2\x82\xAC'));
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x41\xE2\x82\xAC'));
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x9F\x99\x82'));
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x9F\x99\x82'));
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF'));
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF'));
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError 36 }
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError BAD_ARGUMENTS }
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError 36 }
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError BAD_ARGUMENTS }
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError 36 }
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError BAD_ARGUMENTS }
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError 36 }
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError BAD_ARGUMENTS }
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError 36 }
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError BAD_ARGUMENTS }
|
||||||
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError 36 }
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError BAD_ARGUMENTS }
|
||||||
|
|
||||||
SELECT stringJaccardIndexUTF8('😃🌍', '🙃😃🌑'), stringJaccardIndex('😃🌍', '🙃😃🌑');
|
SELECT stringJaccardIndexUTF8('😃🌍', '🙃😃🌑'), stringJaccardIndex('😃🌍', '🙃😃🌑');
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user