mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
fix like
This commit is contained in:
parent
970f93d3c4
commit
ca4d1e137c
@ -27,7 +27,7 @@ namespace ErrorCodes
|
||||
|
||||
/// Adds all tokens from string to bloom filter.
|
||||
static void stringToBloomFilter(
|
||||
const char * data, size_t size, const std::unique_ptr<TokenExtractor> & token_extractor, StringBloomFilter & bloom_filter)
|
||||
const char * data, size_t size, const std::unique_ptr<ITokenExtractor> & token_extractor, StringBloomFilter & bloom_filter)
|
||||
{
|
||||
size_t cur = 0;
|
||||
size_t token_start = 0;
|
||||
@ -38,7 +38,7 @@ static void stringToBloomFilter(
|
||||
|
||||
/// Adds all tokens from like pattern string to bloom filter. (Because like pattern can contain `\%` and `\_`.)
|
||||
static void likeStringToBloomFilter(
|
||||
const String & data, const std::unique_ptr<TokenExtractor> & token_extractor, StringBloomFilter & bloom_filter)
|
||||
const String & data, const std::unique_ptr<ITokenExtractor> & token_extractor, StringBloomFilter & bloom_filter)
|
||||
{
|
||||
size_t cur = 0;
|
||||
String token;
|
||||
|
@ -96,17 +96,22 @@ private:
|
||||
PreparedSets prepared_sets;
|
||||
};
|
||||
|
||||
struct TokenExtractor
|
||||
|
||||
/// Interface for string parsers.
|
||||
struct ITokenExtractor
|
||||
{
|
||||
virtual ~TokenExtractor() = default;
|
||||
virtual ~ITokenExtractor() = default;
|
||||
/// Fast inplace implementation for regular use.
|
||||
/// Gets string (data ptr and len) and start position for extracting next token (state of extractor).
|
||||
/// Returns false if parsing is finished, otherwise returns true.
|
||||
virtual bool next(const char * data, size_t len, size_t * pos, size_t * token_start, size_t * token_len) const = 0;
|
||||
/// Special implementation for creating bloom filter for LIKE function.
|
||||
/// It skips unescaped `%` and `_` and supports escaping symbols, but it is less lightweight.
|
||||
virtual bool nextLike(const String & str, size_t * pos, String & out) const = 0;
|
||||
};
|
||||
|
||||
struct NgramTokenExtractor : public TokenExtractor
|
||||
/// Parser extracting all ngrams from string.
|
||||
struct NgramTokenExtractor : public ITokenExtractor
|
||||
{
|
||||
NgramTokenExtractor(size_t n_) : n(n_) {}
|
||||
|
||||
@ -143,7 +148,7 @@ public:
|
||||
size_t bloom_filter_size_,
|
||||
size_t bloom_filter_hashes_,
|
||||
size_t seed_,
|
||||
std::unique_ptr<TokenExtractor> && token_extractor_func_)
|
||||
std::unique_ptr<ITokenExtractor> && token_extractor_func_)
|
||||
: IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_)
|
||||
, bloom_filter_size(bloom_filter_size_)
|
||||
, bloom_filter_hashes(bloom_filter_hashes_)
|
||||
@ -164,7 +169,7 @@ public:
|
||||
/// Bloom filter seed.
|
||||
size_t seed;
|
||||
/// Fucntion for selecting next token.
|
||||
std::unique_ptr<TokenExtractor> token_extractor_func;
|
||||
std::unique_ptr<ITokenExtractor> token_extractor_func;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,23 @@
|
||||
8 aбвгдеёж
|
||||
"rows_read": 2,
|
||||
13 abc
|
||||
"rows_read": 1,
|
||||
1 ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
|
||||
"rows_read": 2,
|
||||
1 ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).
|
||||
"rows_read": 2,
|
||||
0 ClickHouse - столбцовая система управления базами данных (СУБД) для онлайн обработки аналитических запросов (OLAP).
|
||||
"rows_read": 2,
|
||||
0 ClickHouse - столбцовая система управления базами данных (СУБД) для онлайн обработки аналитических запросов (OLAP).
|
||||
5 еще строка
|
||||
"rows_read": 4,
|
||||
12 <div> странный <strong>html</strong> </div>
|
||||
"rows_read": 2,
|
||||
9 2_2%2_2\\
|
||||
"rows_read": 2,
|
||||
9 2_2%2_2\\
|
||||
"rows_read": 2,
|
||||
9 2_2%2_2\\
|
||||
"rows_read": 2,
|
||||
9 2_2%2_2\\
|
||||
"rows_read": 2,
|
36
dbms/tests/queries/0_stateless/00908_bloom_filter_index.sh
Normal file → Executable file
36
dbms/tests/queries/0_stateless/00908_bloom_filter_index.sh
Normal file → Executable file
@ -12,7 +12,7 @@ CREATE TABLE test.bloom_filter_idx
|
||||
(
|
||||
k UInt64,
|
||||
s String,
|
||||
INDEX bf (s) TYPE ngrambf(3, 16, 0) GRANULARITY 1,
|
||||
INDEX bf (s, lower(s)) TYPE ngrambf(3, 512, 0) GRANULARITY 1
|
||||
) ENGINE = MergeTree()
|
||||
ORDER BY k
|
||||
SETTINGS index_granularity = 2;"
|
||||
@ -28,27 +28,45 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO test.bloom_filter_idx VALUES
|
||||
(6, 'some string'),
|
||||
(7, 'another string'),
|
||||
(8, 'aбвгдеёж'),
|
||||
(9, '2_2%2_2\\'),
|
||||
(11, '!@#$%^&*()1234567890<>?:|{}'),
|
||||
(9, '2_2%2_2\\\\'),
|
||||
(11, '!@#$%^&*0123456789'),
|
||||
(12, '<div> странный <strong>html</strong> </div>'),
|
||||
(13, 'abc')"
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s = 'aбвгдеёж' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s = 'aбвгдеёж' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s = 'abc' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s = 'abc' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx" | grep "rows_read"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____строка' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____строка' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%%<div>_%_%_</div>%%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%%<div>_%_%_</div>%%' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%2\\%2%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%2\\_2%' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '%2\\\\' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '2\\_2\\%2_2\\\\' ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.bloom_filter_idx WHERE s LIKE '2\\_2\\%2_2_' ORDER BY k"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE test.bloom_filter_idx"
|
Loading…
Reference in New Issue
Block a user