mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-13 18:02:24 +00:00
56 lines
2.6 KiB
SQL
56 lines
2.6 KiB
SQL
select 'const arguments byteHammingDistance';
|
|
select byteHammingDistance('abcd', 'abcd');
|
|
select 'const arguments editDistance';
|
|
select editDistance('clickhouse', 'mouse');
|
|
|
|
select 'const arguments stringJaccardIndex';
|
|
select stringJaccardIndex('clickhouse', 'mouse');
|
|
|
|
drop table if exists t;
|
|
create table t
|
|
(
|
|
s1 String,
|
|
s2 String
|
|
) engine = MergeTree order by s1;
|
|
|
|
insert into t values ('abcdefg', 'abcdef') ('abcdefg', 'bcdefg') ('abcdefg', '') ('mouse', 'clickhouse');
|
|
select 'byteHammingDistance';
|
|
select byteHammingDistance(s1, s2) FROM t ORDER BY s1, s2;
|
|
select 'byteHammingDistance(const, non const)';
|
|
select byteHammingDistance('abc', s2) FROM t ORDER BY s1, s2;
|
|
select 'byteHammingDistance(non const, const)';
|
|
select byteHammingDistance(s2, 'def') FROM t ORDER BY s1, s2;
|
|
|
|
select 'mismatches(alias)';
|
|
select mismatches(s1, s2) FROM t ORDER BY s1, s2;
|
|
select mismatches('abc', s2) FROM t ORDER BY s1, s2;
|
|
select mismatches(s2, 'def') FROM t ORDER BY s1, s2;
|
|
|
|
select 'stringJaccardIndex';
|
|
select stringJaccardIndex(s1, s2) FROM t ORDER BY s1, s2;
|
|
select stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY s1, s2;
|
|
|
|
-- we do not perform full UTF8 validation, so sometimes it just returns some result
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x48\x65\x6C'));
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF\xFF\xFF\xFF'));
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\x41\xE2\x82\xAC'));
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x9F\x99\x82'));
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xFF'));
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC2\x01')); -- { serverError BAD_ARGUMENTS }
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC1\x81')); -- { serverError BAD_ARGUMENTS }
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xF0\x80\x80\x41')); -- { serverError BAD_ARGUMENTS }
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xC0\x80')); -- { serverError BAD_ARGUMENTS }
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xD8\x00 ')); -- { serverError BAD_ARGUMENTS }
|
|
SELECT stringJaccardIndexUTF8(materialize('hello'), materialize('\xDC\x00')); -- { serverError BAD_ARGUMENTS }
|
|
|
|
SELECT stringJaccardIndexUTF8('😃🌍', '🙃😃🌑'), stringJaccardIndex('😃🌍', '🙃😃🌑');
|
|
|
|
select 'editDistance';
|
|
select editDistance(s1, s2) FROM t ORDER BY s1, s2;
|
|
select 'levenshteinDistance';
|
|
select levenshteinDistance(s1, s2) FROM t ORDER BY s1, s2;
|
|
|
|
SELECT editDistance(randomString(power(2, 17)), 'abc'); -- { serverError TOO_LARGE_STRING_SIZE}
|
|
|
|
drop table t;
|