mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
ngramEntry renamed to ngramSearch
This commit is contained in:
parent
b981318999
commit
b65a9499c8
@ -479,22 +479,22 @@ struct NameNgramDistanceUTF8CaseInsensitive
|
||||
static constexpr auto name = "ngramDistanceCaseInsensitiveUTF8";
|
||||
};
|
||||
|
||||
struct NameNgramEntry
|
||||
struct NameNgramSearch
|
||||
{
|
||||
static constexpr auto name = "ngramEntry";
|
||||
static constexpr auto name = "ngramSearch";
|
||||
};
|
||||
struct NameNgramEntryCaseInsensitive
|
||||
struct NameNgramSearchCaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "ngramEntryCaseInsensitive";
|
||||
static constexpr auto name = "ngramSearchCaseInsensitive";
|
||||
};
|
||||
struct NameNgramEntryUTF8
|
||||
struct NameNgramSearchUTF8
|
||||
{
|
||||
static constexpr auto name = "ngramEntryUTF8";
|
||||
static constexpr auto name = "ngramSearchUTF8";
|
||||
};
|
||||
|
||||
struct NameNgramEntryUTF8CaseInsensitive
|
||||
struct NameNgramSearchUTF8CaseInsensitive
|
||||
{
|
||||
static constexpr auto name = "ngramEntryCaseInsensitiveUTF8";
|
||||
static constexpr auto name = "ngramSearchCaseInsensitiveUTF8";
|
||||
};
|
||||
|
||||
using FunctionNgramDistance = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, false, true>, NameNgramDistance>;
|
||||
@ -502,10 +502,10 @@ using FunctionNgramDistanceCaseInsensitive = FunctionsStringSimilarity<NgramDist
|
||||
using FunctionNgramDistanceUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, false, true>, NameNgramDistanceUTF8>;
|
||||
using FunctionNgramDistanceCaseInsensitiveUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true, true>, NameNgramDistanceUTF8CaseInsensitive>;
|
||||
|
||||
using FunctionNgramEntry = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, false, false>, NameNgramEntry>;
|
||||
using FunctionNgramEntryCaseInsensitive = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true, false>, NameNgramEntryCaseInsensitive>;
|
||||
using FunctionNgramEntryUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, false, false>, NameNgramEntryUTF8>;
|
||||
using FunctionNgramEntryCaseInsensitiveUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true, false>, NameNgramEntryUTF8CaseInsensitive>;
|
||||
using FunctionNgramSearch = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, false, false>, NameNgramSearch>;
|
||||
using FunctionNgramSearchCaseInsensitive = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true, false>, NameNgramSearchCaseInsensitive>;
|
||||
using FunctionNgramSearchUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, false, false>, NameNgramSearchUTF8>;
|
||||
using FunctionNgramSearchCaseInsensitiveUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true, false>, NameNgramSearchUTF8CaseInsensitive>;
|
||||
|
||||
|
||||
void registerFunctionsStringSimilarity(FunctionFactory & factory)
|
||||
@ -515,10 +515,10 @@ void registerFunctionsStringSimilarity(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionNgramDistanceUTF8>();
|
||||
factory.registerFunction<FunctionNgramDistanceCaseInsensitiveUTF8>();
|
||||
|
||||
factory.registerFunction<FunctionNgramEntry>();
|
||||
factory.registerFunction<FunctionNgramEntryCaseInsensitive>();
|
||||
factory.registerFunction<FunctionNgramEntryUTF8>();
|
||||
factory.registerFunction<FunctionNgramEntryCaseInsensitiveUTF8>();
|
||||
factory.registerFunction<FunctionNgramSearch>();
|
||||
factory.registerFunction<FunctionNgramSearchCaseInsensitive>();
|
||||
factory.registerFunction<FunctionNgramSearchUTF8>();
|
||||
factory.registerFunction<FunctionNgramSearchCaseInsensitiveUTF8>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,180 +1,180 @@
|
||||
select round(1000 * ngramEntryUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryUTF8(materialize(''), materialize('')))=round(1000 * ngramEntryUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абв'), materialize('')))=round(1000 * ngramEntryUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize(''), materialize('абв')))=round(1000 * ngramEntryUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), materialize('абвгдеёжз')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), materialize('абвгдеёж')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), materialize('гдеёзд')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), materialize('ёёёёёёёё')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize(''), materialize('')))=round(1000 * ngramSearchUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абв'), materialize('')))=round(1000 * ngramSearchUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize(''), materialize('абв')))=round(1000 * ngramSearchUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), materialize('абвгдеёжз')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), materialize('абвгдеёж')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), materialize('гдеёзд')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), materialize('ёёёёёёёё')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryUTF8('', materialize('')))=round(1000 * ngramEntryUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8('абв', materialize('')))=round(1000 * ngramEntryUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8('', materialize('абв')))=round(1000 * ngramEntryUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', materialize('абвгдеёжз')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', materialize('абвгдеёж')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', materialize('гдеёзд')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', materialize('ёёёёёёёё')))=round(1000 * ngramEntryUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('', materialize('')))=round(1000 * ngramSearchUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('абв', materialize('')))=round(1000 * ngramSearchUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('', materialize('абв')))=round(1000 * ngramSearchUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', materialize('абвгдеёжз')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', materialize('абвгдеёж')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', materialize('гдеёзд')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', materialize('ёёёёёёёё')))=round(1000 * ngramSearchUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryUTF8('', ''));
|
||||
select round(1000 * ngramEntryUTF8('абв', ''));
|
||||
select round(1000 * ngramEntryUTF8('', 'абв'));
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', 'абвгдеёжз'));
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', 'абвгдеёж'));
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', 'гдеёзд'));
|
||||
select round(1000 * ngramEntryUTF8('абвгдеёжз', 'ёёёёёёёё'));
|
||||
select round(1000 * ngramSearchUTF8('', ''));
|
||||
select round(1000 * ngramSearchUTF8('абв', ''));
|
||||
select round(1000 * ngramSearchUTF8('', 'абв'));
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', 'абвгдеёжз'));
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', 'абвгдеёж'));
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', 'гдеёзд'));
|
||||
select round(1000 * ngramSearchUTF8('абвгдеёжз', 'ёёёёёёёё'));
|
||||
|
||||
drop table if exists test_entry_distance;
|
||||
create table test_entry_distance (Title String) engine = Memory;
|
||||
insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), ('');
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, Title) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, extract(Title, 'как дела')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, extract(Title, 'metr')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, Title) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'как дела')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'metr')) as distance;
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'привет как дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'как привет дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'metrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'metriks') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryUTF8(Title, 'yandex') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'привет как дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'как привет дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metriks') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'yandex') as distance;
|
||||
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''),materialize(''))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абв'),materialize(''))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), materialize('абв'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвГДЕёжз'), materialize('АбвгдЕёжз'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), materialize('АбвГдеёж'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('гдеёЗД'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''),materialize(''))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абв'),materialize(''))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), materialize('абв'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвГДЕёжз'), materialize('АбвгдЕёжз'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), materialize('АбвГдеёж'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('гдеёЗД'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('', materialize(''))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абв',materialize(''))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('', materialize('абв'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абвГДЕёжз', materialize('АбвгдЕёжз'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('аБВГдеёЖз', materialize('АбвГдеёж'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абвгдеёжз', materialize('гдеёЗД'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абвгдеёжз', materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramEntryCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('', materialize(''))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абв',materialize(''))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('', materialize('абв'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абвГДЕёжз', materialize('АбвгдЕёжз'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('аБВГдеёЖз', materialize('АбвГдеёж'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абвгдеёжз', materialize('гдеёЗД'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абвгдеёжз', materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramSearchCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('', ''));
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абв', ''));
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('', 'абв'));
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абвГДЕёжз', 'АбвгдЕЁжз'));
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('аБВГдеёЖз', 'АбвГдеёж'));
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('абвгдеёжз', 'гдеёЗД'));
|
||||
select round(1000 * ngramEntryCaseInsensitiveUTF8('АБВГДеёжз', 'ЁЁЁЁЁЁЁЁ'));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('', ''));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абв', ''));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('', 'абв'));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абвГДЕёжз', 'АбвгдЕЁжз'));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('аБВГдеёЖз', 'АбвГдеёж'));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('абвгдеёжз', 'гдеёЗД'));
|
||||
select round(1000 * ngramSearchCaseInsensitiveUTF8('АБВГДеёжз', 'ЁЁЁЁЁЁЁЁ'));
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, Title) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, Title) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance;
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'Metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'mEtrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'metriKS') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'YanDEX') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'Metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'mEtrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metriKS') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'YanDEX') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance;
|
||||
|
||||
|
||||
select round(1000 * ngramEntry(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntry(materialize(''),materialize('')))=round(1000 * ngramEntry(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abc'),materialize('')))=round(1000 * ngramEntry(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize(''), materialize('abc')))=round(1000 * ngramEntry(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), materialize('abcdefgh')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), materialize('abcdefg')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), materialize('defgh')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry(materialize('abcdefgh'), materialize('aaaaaaaa')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize(''),materialize('')))=round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abc'),materialize('')))=round(1000 * ngramSearch(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize(''), materialize('abc')))=round(1000 * ngramSearch(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), materialize('abcdefgh')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), materialize('abcdefg')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), materialize('defgh')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch(materialize('abcdefgh'), materialize('aaaaaaaa')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntry('',materialize('')))=round(1000 * ngramEntry(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry('abc', materialize('')))=round(1000 * ngramEntry(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry('', materialize('abc')))=round(1000 * ngramEntry(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry('abcdefgh', materialize('abcdefgh')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry('abcdefgh', materialize('abcdefg')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry('abcdefgh', materialize('defgh')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntry('abcdefgh', materialize('aaaaaaaa')))=round(1000 * ngramEntry(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('',materialize('')))=round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('abc', materialize('')))=round(1000 * ngramSearch(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('', materialize('abc')))=round(1000 * ngramSearch(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('abcdefgh', materialize('abcdefgh')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('abcdefgh', materialize('abcdefg')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('abcdefgh', materialize('defgh')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearch('abcdefgh', materialize('aaaaaaaa')))=round(1000 * ngramSearch(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
|
||||
select round(1000 * ngramEntry('', ''));
|
||||
select round(1000 * ngramEntry('abc', ''));
|
||||
select round(1000 * ngramEntry('', 'abc'));
|
||||
select round(1000 * ngramEntry('abcdefgh', 'abcdefgh'));
|
||||
select round(1000 * ngramEntry('abcdefgh', 'abcdefg'));
|
||||
select round(1000 * ngramEntry('abcdefgh', 'defgh'));
|
||||
select round(1000 * ngramEntry('abcdefghaaaaaaaaaa', 'aaaaaaaa'));
|
||||
select round(1000 * ngramSearch('', ''));
|
||||
select round(1000 * ngramSearch('abc', ''));
|
||||
select round(1000 * ngramSearch('', 'abc'));
|
||||
select round(1000 * ngramSearch('abcdefgh', 'abcdefgh'));
|
||||
select round(1000 * ngramSearch('abcdefgh', 'abcdefg'));
|
||||
select round(1000 * ngramSearch('abcdefgh', 'defgh'));
|
||||
select round(1000 * ngramSearch('abcdefghaaaaaaaaaa', 'aaaaaaaa'));
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'привет как дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'как привет дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'metrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'metriks') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntry(Title, 'yandex') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'привет как дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'как привет дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metriks') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'yandex') as distance;
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize(''), materialize('')))=round(1000 * ngramEntryCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('abc'), materialize('')))=round(1000 * ngramEntryCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize(''), materialize('abc')))=round(1000 * ngramEntryCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('abCdefgH'), materialize('Abcdefgh')))=round(1000 * ngramEntryCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('abcdefgh'), materialize('abcdeFG')))=round(1000 * ngramEntryCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('AAAAbcdefgh'), materialize('defgh')))=round(1000 * ngramEntryCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive(materialize('ABCdefgH'), materialize('aaaaaaaa')))=round(1000 * ngramEntryCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize(''), materialize('')))=round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('abc'), materialize('')))=round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize(''), materialize('abc')))=round(1000 * ngramSearchCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('abCdefgH'), materialize('Abcdefgh')))=round(1000 * ngramSearchCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('abcdefgh'), materialize('abcdeFG')))=round(1000 * ngramSearchCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('AAAAbcdefgh'), materialize('defgh')))=round(1000 * ngramSearchCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive(materialize('ABCdefgH'), materialize('aaaaaaaa')))=round(1000 * ngramSearchCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitive('', materialize('')))=round(1000 * ngramEntryCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive('abc', materialize('')))=round(1000 * ngramEntryCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive('', materialize('abc')))=round(1000 * ngramEntryCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive('abCdefgH', materialize('Abcdefgh')))=round(1000 * ngramEntryCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive('abcdefgh', materialize('abcdeFG')))=round(1000 * ngramEntryCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive('AAAAbcdefgh', materialize('defgh')))=round(1000 * ngramEntryCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramEntryCaseInsensitive('ABCdefgH', materialize('aaaaaaaa')))=round(1000 * ngramEntryCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('', materialize('')))=round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('abc', materialize('')))=round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('', materialize('abc')))=round(1000 * ngramSearchCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('abCdefgH', materialize('Abcdefgh')))=round(1000 * ngramSearchCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('abcdefgh', materialize('abcdeFG')))=round(1000 * ngramSearchCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('AAAAbcdefgh', materialize('defgh')))=round(1000 * ngramSearchCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramSearchCaseInsensitive('ABCdefgH', materialize('aaaaaaaa')))=round(1000 * ngramSearchCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramEntryCaseInsensitive('', ''));
|
||||
select round(1000 * ngramEntryCaseInsensitive('abc', ''));
|
||||
select round(1000 * ngramEntryCaseInsensitive('', 'abc'));
|
||||
select round(1000 * ngramEntryCaseInsensitive('abCdefgH', 'Abcdefgh'));
|
||||
select round(1000 * ngramEntryCaseInsensitive('abcdefgh', 'abcdeFG'));
|
||||
select round(1000 * ngramEntryCaseInsensitive('AAAAbcdefgh', 'defgh'));
|
||||
select round(1000 * ngramEntryCaseInsensitive('ABCdefgHaAaaaAaaaAA', 'aaaaaaaa'));
|
||||
select round(1000 * ngramSearchCaseInsensitive('', ''));
|
||||
select round(1000 * ngramSearchCaseInsensitive('abc', ''));
|
||||
select round(1000 * ngramSearchCaseInsensitive('', 'abc'));
|
||||
select round(1000 * ngramSearchCaseInsensitive('abCdefgH', 'Abcdefgh'));
|
||||
select round(1000 * ngramSearchCaseInsensitive('abcdefgh', 'abcdeFG'));
|
||||
select round(1000 * ngramSearchCaseInsensitive('AAAAbcdefgh', 'defgh'));
|
||||
select round(1000 * ngramSearchCaseInsensitive('ABCdefgHaAaaaAaaaAA', 'aaaaaaaa'));
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'ПрИвЕт кАК ДЕЛа') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'как ПРИВЕТ дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'Metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'mEtrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'metriKS') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramEntryCaseInsensitive(Title, 'YanDEX') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'ПрИвЕт кАК ДЕЛа') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'как ПРИВЕТ дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'Metrika') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'mEtrica') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metriKS') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrics') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'YanDEX') as distance;
|
||||
|
||||
drop table if exists test_entry_distance;
|
||||
|
@ -106,11 +106,11 @@ Calculates the 4-gram distance between `haystack` and `needle`: counts the symme
|
||||
|
||||
For case-insensitive search or/and in UTF-8 format use functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8`.
|
||||
|
||||
## ngramEntry(haystack, needle)
|
||||
## ngramSearch(haystack, needle)
|
||||
|
||||
Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Can be useful for fuzzy string search.
|
||||
|
||||
For case-insensitive search or/and in UTF-8 format use functions `ngramEntryCaseInsensitive, ngramEntryUTF8, ngramEntryCaseInsensitiveUTF8`.
|
||||
For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`.
|
||||
|
||||
**Note: For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables -- collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function -- we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one -- this works for Latin and mostly for all Cyrillic letters.**
|
||||
|
||||
|
@ -95,11 +95,11 @@
|
||||
|
||||
Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8`.
|
||||
|
||||
## ngramEntry(haystack, needle)
|
||||
## ngramSearch(haystack, needle)
|
||||
|
||||
То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Может быть использовано для приближенного поиска.
|
||||
|
||||
Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramEntryCaseInsensitive, ngramEntryUTF8, ngramEntryCaseInsensitiveUTF8`.
|
||||
Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`.
|
||||
|
||||
|
||||
**Примечание: для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами -- могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` -- мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 -- это работает для латиницы и почти для всех кириллических букв.**
|
||||
|
Loading…
Reference in New Issue
Block a user