Revert "Refactor tests for (experimental) statistics"

This commit is contained in:
Alexander Tokmakov 2024-08-11 13:19:36 +02:00 committed by GitHub
parent f7e7a884b5
commit 53bc1b7e35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 399 additions and 625 deletions

View File

@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
:::note
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes
### Adding a New Test
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.

View File

@ -8,28 +8,26 @@ sidebar_label: STATISTICS
The following operations are available:
- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata.
- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata.
- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata.
- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata.
- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
The first two commands are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing statistics metadata via ZooKeeper.
## Example:
Adding two statistics types to two columns:
There is an example adding two statistics types to two columns:
```
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
```
:::note
Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
:::

View File

@ -3517,7 +3517,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
const auto & new_column = new_metadata.getColumns().get(command.column_name);
if (!old_column.type->equals(*new_column.type))
throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN,
"ALTER types of column {} with statistics is not safe "
"ALTER types of column {} with statistics is not not safe "
"because it can change the representation of statistics",
backQuoteIfNeed(command.column_name));
}

View File

@ -0,0 +1,14 @@
CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
Test statistics count_min:
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed)
Test statistics multi-types:
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed)
Test LowCardinality and Nullable data type:
tab2

View File

@ -0,0 +1,70 @@
-- Tags: no-fasttest
DROP TABLE IF EXISTS tab SYNC;
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET allow_suspicious_low_cardinality_types=1;
SET mutations_sync = 2;
CREATE TABLE tab
(
a String,
b UInt64,
c Int64,
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
SHOW CREATE TABLE tab;
INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000;
SELECT 'Test statistics count_min:';
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
ALTER TABLE tab ADD STATISTICS b TYPE count_min;
ALTER TABLE tab ADD STATISTICS c TYPE count_min;
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE tab DROP STATISTICS a, b, c;
SELECT 'Test statistics multi-types:';
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest;
ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest;
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/)
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/)
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE tab DROP STATISTICS a, b, c;
DROP TABLE IF EXISTS tab SYNC;
SELECT 'Test LowCardinality and Nullable data type:';
DROP TABLE IF EXISTS tab2 SYNC;
SET allow_suspicious_low_cardinality_types=1;
CREATE TABLE tab2
(
a LowCardinality(Int64) STATISTICS(count_min),
b Nullable(Int64) STATISTICS(count_min),
c LowCardinality(Nullable(Int64)) STATISTICS(count_min),
pk String,
) Engine = MergeTree() ORDER BY pk;
select name from system.tables where name = 'tab2' and database = currentDatabase();
DROP TABLE IF EXISTS tab2 SYNC;

View File

@ -1,6 +1,31 @@
CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
After insert
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), less(b, 10)) (removed)
10
0
After drop statistic
Prewhere info
Prewhere filter
Prewhere filter column: and(less(b, 10), less(a, 10)) (removed)
10
CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
After add statistic
CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
After materialize statistic
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), less(b, 10)) (removed)
20
After merge
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), less(b, 10)) (removed)
20
CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
After rename
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), less(c, 10)) (removed)
20

View File

@ -1,195 +1,59 @@
-- Tags: no-fasttest
-- no-fasttest: 'count_min' sketches need a 3rd party library
-- Tests that DDL statements which create / drop / materialize statistics
SET mutations_sync = 1;
-- Tests that various DDL statements create/drop/materialize statistics
DROP TABLE IF EXISTS tab;
-- Error case: Can't create statistics when allow_experimental_statistics = 0
CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY }
SET allow_experimental_statistics = 1;
-- Error case: Unknown statistics types are rejected
CREATE TABLE tab (col Float64 STATISTICS(no_statistics_type)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY }
-- Error case: The same statistics type can't exist more than once on a column
CREATE TABLE tab (col Float64 STATISTICS(tdigest, tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY }
SET allow_suspicious_low_cardinality_types = 1;
-- Statistics can only be created on columns of specific data types (depending on the statistics kind), (*)
-- tdigest requires data_type.isValueRepresentedByInteger
-- These types work:
CREATE TABLE tab (col UInt8 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col UInt256 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Float32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Decimal32(3) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Date STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Date32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col DateTime STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col DateTime64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col IPv4 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Nullable(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
-- These types don't work:
CREATE TABLE tab (col String STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col FixedString(1) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Array(Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
-- uniq requires data_type.isValueRepresentedByInteger
-- These types work:
CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Float32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Decimal32(3) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Date STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Date32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col DateTime STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col DateTime64 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
-- These types don't work:
CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
-- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String
-- These types work:
CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab;
-- These types don't work:
CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
-- CREATE TABLE was easy, ALTER is more fun
SET allow_statistics_optimize = 1;
CREATE TABLE tab
(
f64 Float64,
f64_tdigest Float64 STATISTICS(tdigest),
f32 Float32,
s String,
a Array(Float64)
)
Engine = MergeTree()
ORDER BY tuple();
a Float64 STATISTICS(tdigest),
b Int64 STATISTICS(tdigest),
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
-- Error case: Unknown statistics types are rejected
-- (relevant for ADD and MODIFY)
ALTER TABLE tab ADD STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY }
ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY }
ALTER TABLE tab MODIFY STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY }
-- for some reason, ALTER TABLE tab MODIFY STATISTICS IF EXISTS is not supported
-- Error case: The same statistics type can't exist more than once on a column
-- (relevant for ADD and MODIFY)
-- Create the same statistics object twice
ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY }
ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY }
ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY }
-- Create an statistics which exists already
ALTER TABLE tab ADD STATISTICS f64_tdigest TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64_tdigest TYPE tdigest; -- no-op
ALTER TABLE tab MODIFY STATISTICS f64_tdigest TYPE tdigest; -- no-op
-- Error case: Column does not exist
-- (relevant for ADD, MODIFY, DROP, CLEAR, and MATERIALIZE)
-- Note that the results are unfortunately quite inconsistent ...
ALTER TABLE tab ADD STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab ADD STATISTICS IF NOT EXISTS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MODIFY STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab DROP STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab DROP STATISTICS IF EXISTS no_such_column; -- no-op
ALTER TABLE tab CLEAR STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab CLEAR STATISTICS IF EXISTS no_such_column; -- no-op
ALTER TABLE tab MATERIALIZE STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS no_such_column; -- { serverError ILLEGAL_STATISTICS }
-- Error case: Column exists but has no statistics
-- (relevant for MODIFY, DROP, CLEAR, and MATERIALIZE)
-- Note that the results are unfortunately quite inconsistent ...
ALTER TABLE tab MODIFY STATISTICS s TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab DROP STATISTICS s; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab DROP STATISTICS IF EXISTS s; -- no-op
ALTER TABLE tab CLEAR STATISTICS s; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab CLEAR STATISTICS IF EXISTS s; -- no-op
ALTER TABLE tab MATERIALIZE STATISTICS s; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS s; -- { serverError ILLEGAL_STATISTICS }
-- We don't check systematically that that statistics can only be created via ALTER ADD STATISTICS on columns of specific data types (the
-- internal type validation code is tested already above, (*)). Only do a rudimentary check for each statistics type with a data type that
-- works and one that doesn't work.
-- tdigest
-- Works:
ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64;
ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64;
-- Doesn't work:
ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
-- uniq
-- Works:
ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64;
ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64;
-- Doesn't work:
ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS }
-- count_min
-- Works:
ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64;
ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64;
-- Doesn't work:
ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS }
-- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing
-- statistics objects (e.g. tdigest can be created on Float64 and UInt64)
ALTER TABLE tab MODIFY COLUMN f64_tdigest UInt64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN }
-- Finally, do a full-circle test of a good case. Print table definition after each step.
-- Intentionally specifying _two_ columns and _two_ statistics types to have that also tested.
SHOW CREATE TABLE tab;
ALTER TABLE tab ADD STATISTICS f64, f32 TYPE tdigest, uniq;
SHOW CREATE TABLE tab;
ALTER TABLE tab MODIFY STATISTICS f64, f32 TYPE tdigest, uniq;
SHOW CREATE TABLE tab;
ALTER TABLE tab CLEAR STATISTICS f64, f32;
SHOW CREATE TABLE tab;
ALTER TABLE tab MATERIALIZE STATISTICS f64, f32;
SHOW CREATE TABLE tab;
ALTER TABLE tab DROP STATISTICS f64, f32;
SHOW CREATE TABLE tab;
DROP TABLE tab;
INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
SELECT 'After insert';
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT count(*) FROM tab WHERE b < 10 and a < 10;
SELECT count(*) FROM tab WHERE b < NULL and a < '10';
ALTER TABLE tab DROP STATISTICS a, b;
SELECT 'After drop statistic';
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT count(*) FROM tab WHERE b < 10 and a < 10;
SHOW CREATE TABLE tab;
ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest;
SELECT 'After add statistic';
SHOW CREATE TABLE tab;
ALTER TABLE tab MATERIALIZE STATISTICS a, b;
INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000;
SELECT 'After materialize statistic';
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT count(*) FROM tab WHERE b < 10 and a < 10;
OPTIMIZE TABLE tab FINAL;
SELECT 'After merge';
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT count(*) FROM tab WHERE b < 10 and a < 10;
ALTER TABLE tab RENAME COLUMN b TO c;
SHOW CREATE TABLE tab;
SELECT 'After rename';
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT count(*) FROM tab WHERE c < 10 and a < 10;
DROP TABLE IF EXISTS tab;

View File

@ -1,12 +0,0 @@
After insert
Prewhere info
Prewhere filter
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
After merge
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
After truncate, insert, and materialize
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)

View File

@ -1,36 +0,0 @@
-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0').
-- (The concrete statistics type, column data type and predicate type don't matter)
-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?)
DROP TABLE IF EXISTS tab;
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET enable_analyzer = 1;
SET materialize_statistics_on_insert = 0;
CREATE TABLE tab
(
a Int64 STATISTICS(tdigest),
b Int16 STATISTICS(tdigest),
) ENGINE = MergeTree() ORDER BY tuple()
SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics.
INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000;
SELECT 'After insert';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used)
OPTIMIZE TABLE tab FINAL;
SELECT 'After merge';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used)
TRUNCATE TABLE tab;
SET mutations_sync = 2;
INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000;
ALTER TABLE tab MATERIALIZE STATISTICS a, b;
SELECT 'After truncate, insert, and materialize';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used)
DROP TABLE tab;

View File

@ -0,0 +1,55 @@
-- Tests creating/dropping/materializing statistics produces the right exceptions.
DROP TABLE IF EXISTS tab;
-- Can't create statistics when allow_experimental_statistics = 0
CREATE TABLE tab
(
a Float64 STATISTICS(tdigest)
) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY }
SET allow_experimental_statistics = 1;
-- The same type of statistics can't exist more than once on a column
CREATE TABLE tab
(
a Float64 STATISTICS(tdigest, tdigest)
) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY }
-- Unknown statistics types are rejected
CREATE TABLE tab
(
a Float64 STATISTICS(no_statistics_type)
) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY }
-- tDigest statistics can only be created on numeric columns
CREATE TABLE tab
(
a String STATISTICS(tdigest),
) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS }
CREATE TABLE tab
(
a Float64,
b String
) Engine = MergeTree() ORDER BY tuple();
ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY }
ALTER TABLE tab ADD STATISTICS a TYPE tdigest;
ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest;
ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest;
-- Statistics can be created only on integer columns
ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab DROP STATISTICS a;
ALTER TABLE tab DROP STATISTICS IF EXISTS a;
ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab CLEAR STATISTICS IF EXISTS a;
ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS }
ALTER TABLE tab ADD STATISTICS a TYPE tdigest;
ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH;
ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN }
DROP TABLE tab;

View File

@ -0,0 +1,10 @@
10
10
10
statistics not used Condition less(b, 10_UInt8) moved to PREWHERE
statistics not used Condition less(a, 10_UInt8) moved to PREWHERE
statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE
statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE
statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE
statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE
2 0

View File

@ -0,0 +1,52 @@
-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0').
DROP TABLE IF EXISTS tab;
SET enable_analyzer = 1;
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET materialize_statistics_on_insert = 0;
CREATE TABLE tab
(
a Int64 STATISTICS(tdigest),
b Int16 STATISTICS(tdigest),
) ENGINE = MergeTree() ORDER BY tuple()
SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics.
INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000;
SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used';
OPTIMIZE TABLE tab FINAL;
SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge';
TRUNCATE TABLE tab;
SET mutations_sync = 2;
INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000;
ALTER TABLE tab MATERIALIZE STATISTICS a, b;
SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize';
DROP TABLE tab;
SYSTEM FLUSH LOGS;
SELECT log_comment, message FROM system.text_log JOIN
(
SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log
WHERE current_database = currentDatabase()
AND query LIKE 'SELECT count(*) FROM tab%'
AND type = 'QueryFinish'
) AS query_log USING (query_id)
WHERE message LIKE '%moved to PREWHERE%'
ORDER BY event_time_microseconds;
SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds'])
FROM system.query_log
WHERE current_database = currentDatabase()
AND query LIKE 'INSERT INTO tab SELECT%'
AND type = 'QueryFinish';

View File

@ -1,98 +0,0 @@
u64 and =
10
10
10
10
0
0
0
0
10
10
10
10
u64 and <
70
70
70
70
80
80
80
80
70
70
70
70
f64 and =
10
10
10
10
0
0
0
0
10
10
10
10
0
0
0
0
f64 and <
70
70
70
70
80
80
80
80
70
70
70
70
80
80
80
80
dt and =
0
0
0
0
10
10
10
10
dt and <
10000
10000
10000
10000
70
70
70
70
b and =
5000
5000
5000
5000
5000
5000
5000
5000
5000
5000
5000
5000
0
0
0
0
s and =
10
10

View File

@ -1,214 +0,0 @@
-- Tags: no-fasttest
-- no-fasttest: 'count_min' sketches need a 3rd party library
-- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types.
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
DROP TABLE IF EXISTS tab;
CREATE TABLE tab
(
u64 UInt64,
u64_tdigest UInt64 STATISTICS(tdigest),
u64_count_min UInt64 STATISTICS(count_min),
u64_uniq UInt64 STATISTICS(uniq),
f64 Float64,
f64_tdigest Float64 STATISTICS(tdigest),
f64_count_min Float64 STATISTICS(count_min),
f64_uniq Float64 STATISTICS(uniq),
dt DateTime,
dt_tdigest DateTime STATISTICS(tdigest),
dt_count_min DateTime STATISTICS(count_min),
dt_uniq DateTime STATISTICS(uniq),
b Bool,
b_tdigest Bool STATISTICS(tdigest),
b_count_min Bool STATISTICS(count_min),
b_uniq Bool STATISTICS(uniq),
s String,
-- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest
s_count_min String STATISTICS(count_min)
-- s_uniq String STATISTICS(uniq), -- not supported by uniq
) Engine = MergeTree() ORDER BY tuple()
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO tab
-- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000;
SELECT number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 1000,
number % 2,
number % 2,
number % 2,
number % 2,
toString(number % 1000),
toString(number % 1000)
FROM system.numbers LIMIT 10000;
-- u64 ----------------------------------------------------
SELECT 'u64 and =';
SELECT count(*) FROM tab WHERE u64 = 7;
SELECT count(*) FROM tab WHERE u64_tdigest = 7;
SELECT count(*) FROM tab WHERE u64_count_min = 7;
SELECT count(*) FROM tab WHERE u64_uniq = 7;
SELECT count(*) FROM tab WHERE u64 = 7.7;
SELECT count(*) FROM tab WHERE u64_tdigest = 7.7;
SELECT count(*) FROM tab WHERE u64_count_min = 7.7;
SELECT count(*) FROM tab WHERE u64_uniq = 7.7;
SELECT count(*) FROM tab WHERE u64 = '7';
SELECT count(*) FROM tab WHERE u64_tdigest = '7';
SELECT count(*) FROM tab WHERE u64_count_min = '7';
SELECT count(*) FROM tab WHERE u64_uniq = '7';
SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH }
SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH }
SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH }
SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH }
SELECT 'u64 and <';
SELECT count(*) FROM tab WHERE u64 < 7;
SELECT count(*) FROM tab WHERE u64_tdigest < 7;
SELECT count(*) FROM tab WHERE u64_count_min < 7;
SELECT count(*) FROM tab WHERE u64_uniq < 7;
SELECT count(*) FROM tab WHERE u64 < 7.7;
SELECT count(*) FROM tab WHERE u64_tdigest < 7.7;
SELECT count(*) FROM tab WHERE u64_count_min < 7.7;
SELECT count(*) FROM tab WHERE u64_uniq < 7.7;
SELECT count(*) FROM tab WHERE u64 < '7';
SELECT count(*) FROM tab WHERE u64_tdigest < '7';
SELECT count(*) FROM tab WHERE u64_count_min < '7';
SELECT count(*) FROM tab WHERE u64_uniq < '7';
SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH }
SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH }
SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH }
SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH }
-- f64 ----------------------------------------------------
SELECT 'f64 and =';
SELECT count(*) FROM tab WHERE f64 = 7;
SELECT count(*) FROM tab WHERE f64_tdigest = 7;
SELECT count(*) FROM tab WHERE f64_count_min = 7;
SELECT count(*) FROM tab WHERE f64_uniq = 7;
SELECT count(*) FROM tab WHERE f64 = 7.7;
SELECT count(*) FROM tab WHERE f64_tdigest = 7.7;
SELECT count(*) FROM tab WHERE f64_count_min = 7.7;
SELECT count(*) FROM tab WHERE f64_uniq = 7.7;
SELECT count(*) FROM tab WHERE f64 = '7';
SELECT count(*) FROM tab WHERE f64_tdigest = '7';
SELECT count(*) FROM tab WHERE f64_count_min = '7';
SELECT count(*) FROM tab WHERE f64_uniq = '7';
SELECT count(*) FROM tab WHERE f64 = '7.7';
SELECT count(*) FROM tab WHERE f64_tdigest = '7.7';
SELECT count(*) FROM tab WHERE f64_count_min = '7.7';
SELECT count(*) FROM tab WHERE f64_uniq = '7.7';
SELECT 'f64 and <';
SELECT count(*) FROM tab WHERE f64 < 7;
SELECT count(*) FROM tab WHERE f64_tdigest < 7;
SELECT count(*) FROM tab WHERE f64_count_min < 7;
SELECT count(*) FROM tab WHERE f64_uniq < 7;
SELECT count(*) FROM tab WHERE f64 < 7.7;
SELECT count(*) FROM tab WHERE f64_tdigest < 7.7;
SELECT count(*) FROM tab WHERE f64_count_min < 7.7;
SELECT count(*) FROM tab WHERE f64_uniq < 7.7;
SELECT count(*) FROM tab WHERE f64 < '7';
SELECT count(*) FROM tab WHERE f64_tdigest < '7';
SELECT count(*) FROM tab WHERE f64_count_min < '7';
SELECT count(*) FROM tab WHERE f64_uniq < '7';
SELECT count(*) FROM tab WHERE f64 < '7.7';
SELECT count(*) FROM tab WHERE f64_tdigest < '7.7';
SELECT count(*) FROM tab WHERE f64_count_min < '7.7';
SELECT count(*) FROM tab WHERE f64_uniq < '7.7';
-- dt ----------------------------------------------------
SELECT 'dt and =';
SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt = 7;
SELECT count(*) FROM tab WHERE dt_tdigest = 7;
SELECT count(*) FROM tab WHERE dt_count_min = 7;
SELECT count(*) FROM tab WHERE dt_uniq = 7;
SELECT 'dt and <';
SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13';
SELECT count(*) FROM tab WHERE dt < 7;
SELECT count(*) FROM tab WHERE dt_tdigest < 7;
SELECT count(*) FROM tab WHERE dt_count_min < 7;
SELECT count(*) FROM tab WHERE dt_uniq < 7;
-- b ----------------------------------------------------
SELECT 'b and =';
SELECT count(*) FROM tab WHERE b = true;
SELECT count(*) FROM tab WHERE b_tdigest = true;
SELECT count(*) FROM tab WHERE b_count_min = true;
SELECT count(*) FROM tab WHERE b_uniq = true;
SELECT count(*) FROM tab WHERE b = 'true';
SELECT count(*) FROM tab WHERE b_tdigest = 'true';
SELECT count(*) FROM tab WHERE b_count_min = 'true';
SELECT count(*) FROM tab WHERE b_uniq = 'true';
SELECT count(*) FROM tab WHERE b = 1;
SELECT count(*) FROM tab WHERE b_tdigest = 1;
SELECT count(*) FROM tab WHERE b_count_min = 1;
SELECT count(*) FROM tab WHERE b_uniq = 1;
SELECT count(*) FROM tab WHERE b = 1.1;
SELECT count(*) FROM tab WHERE b_tdigest = 1.1;
SELECT count(*) FROM tab WHERE b_count_min = 1.1;
SELECT count(*) FROM tab WHERE b_uniq = 1.1;
-- s ----------------------------------------------------
SELECT 's and =';
SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE }
-- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported
SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE }
-- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported
SELECT count(*) FROM tab WHERE s = '7';
-- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported
SELECT count(*) FROM tab WHERE s_count_min = '7';
-- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported
DROP TABLE tab;

View File

@ -0,0 +1,35 @@
CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
After insert
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed)
After merge
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed)
After modify TDigest
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed)
After drop
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed)

View File

@ -0,0 +1,73 @@
DROP TABLE IF EXISTS t1;
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET mutations_sync = 1;
CREATE TABLE t1
(
a Float64 STATISTICS(tdigest),
b Int64 STATISTICS(tdigest),
c Int64 STATISTICS(tdigest, uniq),
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
SHOW CREATE TABLE t1;
INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;
INSERT INTO t1 select 0, 0, 11, generateUUIDv4();
SELECT 'After insert';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
OPTIMIZE TABLE t1 FINAL;
SELECT 'After merge';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT 'After modify TDigest';
ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest;
ALTER TABLE t1 MATERIALIZE STATISTICS c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE t1 DROP STATISTICS c;
SELECT 'After drop';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
SET allow_suspicious_low_cardinality_types=1;
CREATE TABLE t2
(
a Float64 STATISTICS(tdigest),
b Int64 STATISTICS(tdigest),
c LowCardinality(Int64) STATISTICS(tdigest, uniq),
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
CREATE TABLE t3
(
a Float64 STATISTICS(tdigest),
b Int64 STATISTICS(tdigest),
c Nullable(Int64) STATISTICS(tdigest, uniq),
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;
DROP TABLE IF EXISTS t3;

View File

@ -1,20 +0,0 @@
After insert
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
After drop statistic
Prewhere info
Prewhere filter
Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
After add and materialize statistic
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
After merge
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
After rename
Prewhere info
Prewhere filter
Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed)

View File

@ -1,42 +0,0 @@
-- Test that the optimizer picks up column statistics
-- (The concrete statistics type, column data type and predicate type don't matter)
-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?)
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET mutations_sync = 1;
SET enable_analyzer = 1;
DROP TABLE IF EXISTS tab;
CREATE TABLE tab
(
a Float64 STATISTICS(tdigest),
b Int64 STATISTICS(tdigest)
) Engine = MergeTree() ORDER BY tuple()
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000;
SELECT 'After insert';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used)
ALTER TABLE tab DROP STATISTICS a, b;
SELECT 'After drop statistic';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used)
ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest;
ALTER TABLE tab MATERIALIZE STATISTICS a, b;
INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000;
SELECT 'After add and materialize statistic';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used)
OPTIMIZE TABLE tab FINAL;
SELECT 'After merge';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used)
ALTER TABLE tab RENAME COLUMN b TO c;
SELECT 'After rename';
SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then c (statistics used)
DROP TABLE IF EXISTS tab;