2019-02-23 09:26:32 +00:00
#!/usr/bin/env bash
CURDIR = $( cd " $( dirname " ${ BASH_SOURCE [0] } " ) " && pwd )
. $CURDIR /../shell_config.sh
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "DROP TABLE IF EXISTS bloom_filter_idx;"
$CLICKHOUSE_CLIENT --query= "DROP TABLE IF EXISTS bloom_filter_idx2;"
$CLICKHOUSE_CLIENT --query= "DROP TABLE IF EXISTS bloom_filter_idx2;"
2019-02-23 09:26:32 +00:00
2019-02-25 17:12:09 +00:00
# NGRAM BF
2019-02-23 09:26:32 +00:00
$CLICKHOUSE_CLIENT -n --query= "
SET allow_experimental_data_skipping_indices = 1;
2019-04-16 14:13:13 +00:00
CREATE TABLE bloom_filter_idx
2019-02-23 09:26:32 +00:00
(
k UInt64,
s String,
2019-03-20 14:52:05 +00:00
INDEX bf ( s, lower( s) ) TYPE ngrambf_v1( 3, 512, 2, 0) GRANULARITY 1
2019-02-23 09:26:32 +00:00
) ENGINE = MergeTree( )
ORDER BY k
SETTINGS index_granularity = 2; "
2019-02-24 19:26:42 +00:00
$CLICKHOUSE_CLIENT -n --query= "
SET allow_experimental_data_skipping_indices = 1;
2019-04-16 14:13:13 +00:00
CREATE TABLE bloom_filter_idx2
2019-02-24 19:26:42 +00:00
(
k UInt64,
s FixedString( 15) ,
2019-03-20 14:52:05 +00:00
INDEX bf ( s, lower( s) ) TYPE ngrambf_v1( 3, 512, 2, 0) GRANULARITY 1
2019-02-24 19:26:42 +00:00
) ENGINE = MergeTree( )
ORDER BY k
SETTINGS index_granularity = 2; "
2019-02-23 09:26:32 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= " INSERT INTO bloom_filter_idx VALUES
2019-02-23 09:26:32 +00:00
( 0, 'ClickHouse - столбцовая система управления базами данных (СУБД) для онлайн обработки аналитических запросов (OLAP).' ) ,
( 1, 'ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).' ) ,
( 2, 'column-oriented database management system' ) ,
( 3, 'столбцовая система управления базами данных' ) ,
( 4, 'какая-то строка' ) ,
( 5, 'еще строка' ) ,
( 6, 'some string' ) ,
( 7, 'another string' ) ,
( 8, 'aб вг де ёж' ) ,
2019-02-23 13:06:23 +00:00
( 9, '2_2%2_2\\\\' ) ,
( 11, '!@#$%^&*0123456789' ) ,
2019-02-23 09:26:32 +00:00
( 12, '<div> странный <strong>html</strong> </div>' ) ,
( 13, 'abc' ) "
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= " INSERT INTO bloom_filter_idx2 VALUES
2019-02-24 19:26:42 +00:00
( 0, 'ClickHouse' ) ,
( 1, 'column-oriented' ) ,
( 2, 'column-oriented' ) ,
( 6, 'some string' ) ,
( 8, 'aб вг де ёж' ) ,
( 9, '2_2%2_2\\\\' ) ,
( 13, 'abc' ) "
2019-02-24 21:17:52 +00:00
# EQUAL
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx2 WHERE lower(s) = 'aб вг де ёж' OR s = 'aб вг де ёж' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx2 WHERE lower(s) = 'aб вг де ёж' OR s = 'aб вг де ёж' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s = 'aб вг де ёж' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s = 'aб вг де ёж' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE lower(s) = 'abc' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE lower(s) = 'abc' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 09:26:32 +00:00
2019-02-24 21:17:52 +00:00
# LIKE
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 09:26:32 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 09:26:32 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____с тр о ка ' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____с тр о ка ' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 09:26:32 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%%<div>_%_%_</div>%%' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%%<div>_%_%_</div>%%' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 13:06:23 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 09:26:32 +00:00
2019-02-24 21:17:52 +00:00
# IN
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s IN ('aб вг де ёж', 'abc') ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE s IN ('aб вг де ёж', 'abc') ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-23 09:26:32 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aб вг де ёж', 'aб вг де ёж'), ('abc', 'cba')) ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aб вг де ёж', 'aб вг де ёж'), ('abc', 'cba')) ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-25 08:43:19 +00:00
2019-02-25 17:12:09 +00:00
# TOKEN BF
$CLICKHOUSE_CLIENT -n --query= "
SET allow_experimental_data_skipping_indices = 1;
2019-04-16 14:13:13 +00:00
CREATE TABLE bloom_filter_idx3
2019-02-25 17:12:09 +00:00
(
k UInt64,
s String,
2019-03-20 14:52:05 +00:00
INDEX bf ( s, lower( s) ) TYPE tokenbf_v1( 512, 3, 0) GRANULARITY 1
2019-02-25 17:12:09 +00:00
) ENGINE = MergeTree( )
ORDER BY k
SETTINGS index_granularity = 2; "
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= " INSERT INTO bloom_filter_idx3 VALUES
2019-02-25 17:12:09 +00:00
( 0, 'ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP).' ) ,
( 1, 'column-oriented' ) ,
( 2, 'column-oriented' ) ,
( 6, 'some string' ) ,
( 8, 'column with ints' ) ,
( 9, '2_2%2_2\\\\' ) ,
( 13, 'abc' ) "
# EQUAL
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE lower(s) = 'column-oriented' OR s = 'column-oriented' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE lower(s) = 'column-oriented' OR s = 'column-oriented' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-25 17:12:09 +00:00
# LIKE
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE lower(s) LIKE '%(dbms)%' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE lower(s) LIKE '%(dbms)%' ORDER BY k FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE s LIKE 'column-%' AND s LIKE '%-oriented' ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE s LIKE 'column-%' AND s LIKE '%-oriented' ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-25 17:12:09 +00:00
# IN
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE s IN ('some string', 'abc') ORDER BY k"
$CLICKHOUSE_CLIENT --query= "SELECT * FROM bloom_filter_idx3 WHERE s IN ('some string', 'abc') ORDER BY k FORMAT JSON" | grep "rows_read"
2019-02-25 17:12:09 +00:00
2019-04-16 14:13:13 +00:00
$CLICKHOUSE_CLIENT --query= "DROP TABLE bloom_filter_idx"
$CLICKHOUSE_CLIENT --query= "DROP TABLE bloom_filter_idx2"
2019-09-23 03:41:23 +00:00
$CLICKHOUSE_CLIENT --query= "DROP TABLE bloom_filter_idx3"
$CLICKHOUSE_CLIENT --query= "DROP TABLE IF EXISTS bloom_filter_idx_na;"
$CLICKHOUSE_CLIENT -n --query= "
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE bloom_filter_na
(
na Array( Array( String) ) ,
INDEX bf na TYPE bloom_filter( 0.1) GRANULARITY 1
) ENGINE = MergeTree( )
ORDER BY na" 2>&1 | grep -c 'DB::Exception: Unexpected type Array(Array(String)) of bloom filter index'