mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-06 14:32:22 +00:00
81bb2242fd
SQL functions countSubstrings(), countSubstringsCaseInsensitive(), countSubstringsUTF8(), position(), positionCaseInsensitive(), positionUTF8() with non-const pattern argument use fallback sorters LibCASCIICaseSensitiveStringSearcher and LibCASCIICaseInsensitiveStringSearcher which call ::strstr(), resp. ::strcasestr(). These functions assume that the haystack is 0-terminated and they even document that. However, the callers did not check if the haystack contains 0-byte (perhaps because its sort of expensive). As a consequence, if the haystack contained a zero byte in it's payload, matches behind this zero byte were ignored. create table t (id UInt32, pattern String) engine = MergeTree() order by id; insert into t values (1, 'x'); select countSubstrings('aaaxxxaa\0xxx', pattern) from t; We returned 3 before this commit, now we return 6
25 lines
1.1 KiB
SQL
25 lines
1.1 KiB
SQL
drop table if exists tab;
|
|
|
|
create table tab (id UInt32, haystack String, pattern String) engine = MergeTree() order by id;
|
|
insert into tab values (1, 'aaaxxxaa\0xxx', 'x');
|
|
|
|
select countSubstrings('aaaxxxaa\0xxx', pattern) from tab where id = 1;
|
|
select countSubstringsCaseInsensitive('aaaxxxaa\0xxx', pattern) from tab where id = 1;
|
|
select countSubstringsCaseInsensitiveUTF8('aaaxxxaa\0xxx', pattern) from tab where id = 1;
|
|
|
|
select countSubstrings(haystack, pattern) from tab where id = 1;
|
|
select countSubstringsCaseInsensitive(haystack, pattern) from tab where id = 1;
|
|
select countSubstringsCaseInsensitiveUTF8(haystack, pattern) from tab where id = 1;
|
|
|
|
insert into tab values (2, 'aaaaa\0x', 'x');
|
|
|
|
select position('aaaaa\0x', pattern) from tab where id = 2;
|
|
select positionCaseInsensitive('aaaaa\0x', pattern) from tab where id = 2;
|
|
select positionCaseInsensitiveUTF8('aaaaa\0x', pattern) from tab where id = 2;
|
|
|
|
select position(haystack, pattern) from tab where id = 2;
|
|
select positionCaseInsensitive(haystack, pattern) from tab where id = 2;
|
|
select positionCaseInsensitiveUTF8(haystack, pattern) from tab where id = 2;
|
|
|
|
drop table if exists tab;
|