From eb800d9e3918387c243c0480ac9df37190a91dc7 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 9 Sep 2024 11:45:39 +0800 Subject: [PATCH 1/4] Rename statistics of type count_min to countmin --- .../engines/table-engines/mergetree-family/mergetree.md | 8 ++++---- src/Storages/Statistics/Statistics.cpp | 2 +- src/Storages/Statistics/StatisticsCountMinSketch.cpp | 4 ++-- src/Storages/StatisticsDescription.cpp | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 74f1295ef96..317d1cca61e 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1003,16 +1003,16 @@ They can be used for prewhere optimization only if we enable `set allow_statisti [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. -- `count_min` +- `CountMin` - [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. + [CountMin](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. ### Supported Data Types {#supported-data-types} | | (U)Int* | Float* | Decimal(*) | Date* | Boolean | Enum* | (Fixed)String | |-----------|---------|--------|------------|-------|---------|-------|------------------| -| count_min | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| CountMin | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | | MinMax | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✗ | | TDigest | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✗ | | Uniq | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | @@ -1022,7 +1022,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti | | Equality filters (==) | Range filters (>, >=, <, <=) | |-----------|-----------------------|------------------------------| -| count_min | ✔ | ✗ | +| CountMin | ✔ | ✗ | | MinMax | ✗ | ✔ | | TDigest | ✗ | ✔ | | Uniq | ✔ | ✗ | diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 795963bd55d..0557530515f 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -248,7 +248,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & co { auto it = creators.find(type); if (it == creators.end()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'count_min', 'minmax', 'tdigest' and 'uniq'", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'countmin', 'minmax', 'tdigest' and 'uniq'", type); auto stat_ptr = (it->second)(desc, column_desc.type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index 1a2459c230d..f477181ec2d 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -49,7 +49,7 @@ Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const if (isStringOrFixedString(data_type)) return sketch.get_estimate(val.safeGet()); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'countmin' does not support estimate data type of {}", data_type->getName()); } void StatisticsCountMinSketch::update(const ColumnPtr & column) @@ -88,7 +88,7 @@ void countMinSketchStatisticsValidator(const SingleStatisticsDescription & /*des DataTypePtr inner_data_type = removeNullable(data_type); inner_data_type = removeLowCardinalityAndNullable(inner_data_type); if (!inner_data_type->isValueRepresentedByNumber() && !isStringOrFixedString(inner_data_type)) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'countmin' does not support type {}", data_type->getName()); } StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 4e0d901d0c7..ac7fa8898de 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -48,11 +48,11 @@ static StatisticsType stringToStatisticsType(String type) return StatisticsType::TDigest; if (type == "uniq") return StatisticsType::Uniq; - if (type == "count_min") + if (type == "countmin") return StatisticsType::CountMinSketch; if (type == "minmax") return StatisticsType::MinMax; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'count_min', 'minmax', 'tdigest' and 'uniq'.", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'countmin', 'minmax', 'tdigest' and 'uniq'.", type); } String SingleStatisticsDescription::getTypeName() const @@ -64,11 +64,11 @@ String SingleStatisticsDescription::getTypeName() const case StatisticsType::Uniq: return "Uniq"; case StatisticsType::CountMinSketch: - return "count_min"; + return "countmin"; case StatisticsType::MinMax: return "minmax"; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'count_min', 'minmax', 'tdigest' and 'uniq'.", type); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'countmin', 'minmax', 'tdigest' and 'uniq'.", type); } } From 077b8239f55163ae25f901924f5a93997d084256 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 9 Sep 2024 11:52:58 +0800 Subject: [PATCH 2/4] Rename in tests --- ...atistics_create_materialize_drop.reference | 4 +- ...864_statistics_create_materialize_drop.sql | 10 +-- .../0_stateless/02864_statistics_ddl.sql | 56 ++++++++-------- .../02864_statistics_predicates.sql | 64 +++++++++---------- 4 files changed, 67 insertions(+), 67 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference index 4ca2c5e5f9b..1843964377d 100644 --- a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference +++ b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference @@ -1,5 +1,5 @@ Test create statistics: -CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(tdigest, uniq, count_min, minmax),\n `b` LowCardinality(Nullable(String)) STATISTICS(uniq, count_min),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(tdigest, uniq, count_min, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, count_min, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(tdigest, uniq, countmin, minmax),\n `b` LowCardinality(Nullable(String)) STATISTICS(uniq, countmin),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(tdigest, uniq, countmin, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, countmin, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 Test materialize and drop statistics: -CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` LowCardinality(Nullable(String)) STATISTICS(uniq, count_min),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` LowCardinality(Nullable(String)) STATISTICS(uniq, countmin),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` LowCardinality(Nullable(String)),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql index 90a57c99624..249e3c84a51 100644 --- a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql +++ b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql @@ -12,10 +12,10 @@ SELECT 'Test create statistics:'; CREATE TABLE tab ( - a LowCardinality(Int64) STATISTICS(count_min, minmax, tdigest, uniq), - b LowCardinality(Nullable(String)) STATISTICS(count_min, uniq), - c LowCardinality(Nullable(Int64)) STATISTICS(count_min, minmax, tdigest, uniq), - d DateTime STATISTICS(count_min, minmax, tdigest, uniq), + a LowCardinality(Int64) STATISTICS(countmin, minmax, tdigest, uniq), + b LowCardinality(Nullable(String)) STATISTICS(countmin, uniq), + c LowCardinality(Nullable(Int64)) STATISTICS(countmin, minmax, tdigest, uniq), + d DateTime STATISTICS(countmin, minmax, tdigest, uniq), pk String, ) Engine = MergeTree() ORDER BY pk; @@ -25,7 +25,7 @@ SHOW CREATE TABLE tab; SELECT 'Test materialize and drop statistics:'; ALTER TABLE tab DROP STATISTICS a, b, c, d; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq; +ALTER TABLE tab ADD STATISTICS b TYPE countmin, uniq; ALTER TABLE tab MATERIALIZE STATISTICS b; SHOW CREATE TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index bcaaa9e7b61..5b2c5cebc1d 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -1,5 +1,5 @@ -- Tags: no-fasttest --- no-fasttest: 'count_min' sketches need a 3rd party library +-- no-fasttest: 'countmin' sketches need a 3rd party library -- Tests that DDL statements which create / drop / materialize statistics @@ -71,29 +71,29 @@ CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } --- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String +-- countmin requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String -- These types work: -CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt8 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col String STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col FixedString(1) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -- These types don't work: -CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(countmin)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -- minmax requires data_type.isValueRepresentedByInteger -- These types work: @@ -187,17 +187,17 @@ ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATI -- uniq -- Works: ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64; -ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE countmin; ALTER TABLE tab DROP STATISTICS f64; -- Doesn't work: ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } --- count_min +-- countmin -- Works: -ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; -ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab ADD STATISTICS f64 TYPE countmin; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE countmin; ALTER TABLE tab DROP STATISTICS f64; -- Doesn't work: -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS a TYPE countmin; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE countmin; -- { serverError ILLEGAL_STATISTICS } -- minmax -- Works: ALTER TABLE tab ADD STATISTICS f64 TYPE minmax; ALTER TABLE tab DROP STATISTICS f64; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql index 473a7bc95ad..d7afba12c1d 100644 --- a/tests/queries/0_stateless/02864_statistics_predicates.sql +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -1,5 +1,5 @@ -- Tags: no-fasttest --- no-fasttest: 'count_min' sketches need a 3rd party library +-- no-fasttest: 'countmin' sketches need a 3rd party library -- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types. @@ -13,27 +13,27 @@ CREATE TABLE tab u64 UInt64, u64_tdigest UInt64 STATISTICS(tdigest), u64_minmax UInt64 STATISTICS(minmax), - u64_count_min UInt64 STATISTICS(count_min), + u64_countmin UInt64 STATISTICS(countmin), u64_uniq UInt64 STATISTICS(uniq), f64 Float64, f64_tdigest Float64 STATISTICS(tdigest), f64_minmax Float64 STATISTICS(minmax), - f64_count_min Float64 STATISTICS(count_min), + f64_countmin Float64 STATISTICS(countmin), f64_uniq Float64 STATISTICS(uniq), dt DateTime, dt_tdigest DateTime STATISTICS(tdigest), dt_minmax DateTime STATISTICS(minmax), - dt_count_min DateTime STATISTICS(count_min), + dt_countmin DateTime STATISTICS(countmin), dt_uniq DateTime STATISTICS(uniq), b Bool, b_tdigest Bool STATISTICS(tdigest), b_minmax Bool STATISTICS(minmax), - b_count_min Bool STATISTICS(count_min), + b_countmin Bool STATISTICS(countmin), b_uniq Bool STATISTICS(uniq), s String, -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest -- s_minmax String STATISTICS(minmax), -- not supported by minmax - s_count_min String STATISTICS(count_min), + s_countmin String STATISTICS(countmin), s_uniq String STATISTICS(uniq) ) Engine = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; @@ -72,25 +72,25 @@ SELECT 'u64 and ='; SELECT count(*) FROM tab WHERE u64 = 7; SELECT count(*) FROM tab WHERE u64_tdigest = 7; SELECT count(*) FROM tab WHERE u64_minmax = 7; -SELECT count(*) FROM tab WHERE u64_count_min = 7; +SELECT count(*) FROM tab WHERE u64_countmin = 7; SELECT count(*) FROM tab WHERE u64_uniq = 7; SELECT count(*) FROM tab WHERE u64 = 7.7; SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; SELECT count(*) FROM tab WHERE u64_minmax = 7.7; -SELECT count(*) FROM tab WHERE u64_count_min = 7.7; +SELECT count(*) FROM tab WHERE u64_countmin = 7.7; SELECT count(*) FROM tab WHERE u64_uniq = 7.7; SELECT count(*) FROM tab WHERE u64 = '7'; SELECT count(*) FROM tab WHERE u64_tdigest = '7'; SELECT count(*) FROM tab WHERE u64_minmax = '7'; -SELECT count(*) FROM tab WHERE u64_count_min = '7'; +SELECT count(*) FROM tab WHERE u64_countmin = '7'; SELECT count(*) FROM tab WHERE u64_uniq = '7'; SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_minmax = '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_countmin = '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } SELECT 'u64 and <'; @@ -98,25 +98,25 @@ SELECT 'u64 and <'; SELECT count(*) FROM tab WHERE u64 < 7; SELECT count(*) FROM tab WHERE u64_tdigest < 7; SELECT count(*) FROM tab WHERE u64_minmax < 7; -SELECT count(*) FROM tab WHERE u64_count_min < 7; +SELECT count(*) FROM tab WHERE u64_countmin < 7; SELECT count(*) FROM tab WHERE u64_uniq < 7; SELECT count(*) FROM tab WHERE u64 < 7.7; SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; SELECT count(*) FROM tab WHERE u64_minmax < 7.7; -SELECT count(*) FROM tab WHERE u64_count_min < 7.7; +SELECT count(*) FROM tab WHERE u64_countmin < 7.7; SELECT count(*) FROM tab WHERE u64_uniq < 7.7; SELECT count(*) FROM tab WHERE u64 < '7'; SELECT count(*) FROM tab WHERE u64_tdigest < '7'; SELECT count(*) FROM tab WHERE u64_minmax < '7'; -SELECT count(*) FROM tab WHERE u64_count_min < '7'; +SELECT count(*) FROM tab WHERE u64_countmin < '7'; SELECT count(*) FROM tab WHERE u64_uniq < '7'; SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_minmax < '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_countmin < '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } -- f64 ---------------------------------------------------- @@ -126,25 +126,25 @@ SELECT 'f64 and ='; SELECT count(*) FROM tab WHERE f64 = 7; SELECT count(*) FROM tab WHERE f64_tdigest = 7; SELECT count(*) FROM tab WHERE f64_minmax = 7; -SELECT count(*) FROM tab WHERE f64_count_min = 7; +SELECT count(*) FROM tab WHERE f64_countmin = 7; SELECT count(*) FROM tab WHERE f64_uniq = 7; SELECT count(*) FROM tab WHERE f64 = 7.7; SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; SELECT count(*) FROM tab WHERE f64_minmax = 7.7; -SELECT count(*) FROM tab WHERE f64_count_min = 7.7; +SELECT count(*) FROM tab WHERE f64_countmin = 7.7; SELECT count(*) FROM tab WHERE f64_uniq = 7.7; SELECT count(*) FROM tab WHERE f64 = '7'; SELECT count(*) FROM tab WHERE f64_tdigest = '7'; SELECT count(*) FROM tab WHERE f64_minmax = '7'; -SELECT count(*) FROM tab WHERE f64_count_min = '7'; +SELECT count(*) FROM tab WHERE f64_countmin = '7'; SELECT count(*) FROM tab WHERE f64_uniq = '7'; SELECT count(*) FROM tab WHERE f64 = '7.7'; SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; SELECT count(*) FROM tab WHERE f64_minmax = '7.7'; -SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; +SELECT count(*) FROM tab WHERE f64_countmin = '7.7'; SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; SELECT 'f64 and <'; @@ -152,25 +152,25 @@ SELECT 'f64 and <'; SELECT count(*) FROM tab WHERE f64 < 7; SELECT count(*) FROM tab WHERE f64_tdigest < 7; SELECT count(*) FROM tab WHERE f64_minmax < 7; -SELECT count(*) FROM tab WHERE f64_count_min < 7; +SELECT count(*) FROM tab WHERE f64_countmin < 7; SELECT count(*) FROM tab WHERE f64_uniq < 7; SELECT count(*) FROM tab WHERE f64 < 7.7; SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; SELECT count(*) FROM tab WHERE f64_minmax < 7.7; -SELECT count(*) FROM tab WHERE f64_count_min < 7.7; +SELECT count(*) FROM tab WHERE f64_countmin < 7.7; SELECT count(*) FROM tab WHERE f64_uniq < 7.7; SELECT count(*) FROM tab WHERE f64 < '7'; SELECT count(*) FROM tab WHERE f64_tdigest < '7'; SELECT count(*) FROM tab WHERE f64_minmax < '7'; -SELECT count(*) FROM tab WHERE f64_count_min < '7'; +SELECT count(*) FROM tab WHERE f64_countmin < '7'; SELECT count(*) FROM tab WHERE f64_uniq < '7'; SELECT count(*) FROM tab WHERE f64 < '7.7'; SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; SELECT count(*) FROM tab WHERE f64_minmax < '7.7'; -SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; +SELECT count(*) FROM tab WHERE f64_countmin < '7.7'; SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; -- dt ---------------------------------------------------- @@ -180,13 +180,13 @@ SELECT 'dt and ='; SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_minmax = '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_countmin = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt = 7; SELECT count(*) FROM tab WHERE dt_tdigest = 7; SELECT count(*) FROM tab WHERE dt_minmax = 7; -SELECT count(*) FROM tab WHERE dt_count_min = 7; +SELECT count(*) FROM tab WHERE dt_countmin = 7; SELECT count(*) FROM tab WHERE dt_uniq = 7; SELECT 'dt and <'; @@ -194,13 +194,13 @@ SELECT 'dt and <'; SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_minmax < '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_countmin < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt < 7; SELECT count(*) FROM tab WHERE dt_tdigest < 7; SELECT count(*) FROM tab WHERE dt_minmax < 7; -SELECT count(*) FROM tab WHERE dt_count_min < 7; +SELECT count(*) FROM tab WHERE dt_countmin < 7; SELECT count(*) FROM tab WHERE dt_uniq < 7; -- b ---------------------------------------------------- @@ -210,25 +210,25 @@ SELECT 'b and ='; SELECT count(*) FROM tab WHERE b = true; SELECT count(*) FROM tab WHERE b_tdigest = true; SELECT count(*) FROM tab WHERE b_minmax = true; -SELECT count(*) FROM tab WHERE b_count_min = true; +SELECT count(*) FROM tab WHERE b_countmin = true; SELECT count(*) FROM tab WHERE b_uniq = true; SELECT count(*) FROM tab WHERE b = 'true'; SELECT count(*) FROM tab WHERE b_tdigest = 'true'; SELECT count(*) FROM tab WHERE b_minmax = 'true'; -SELECT count(*) FROM tab WHERE b_count_min = 'true'; +SELECT count(*) FROM tab WHERE b_countmin = 'true'; SELECT count(*) FROM tab WHERE b_uniq = 'true'; SELECT count(*) FROM tab WHERE b = 1; SELECT count(*) FROM tab WHERE b_tdigest = 1; SELECT count(*) FROM tab WHERE b_minmax = 1; -SELECT count(*) FROM tab WHERE b_count_min = 1; +SELECT count(*) FROM tab WHERE b_countmin = 1; SELECT count(*) FROM tab WHERE b_uniq = 1; SELECT count(*) FROM tab WHERE b = 1.1; SELECT count(*) FROM tab WHERE b_tdigest = 1.1; SELECT count(*) FROM tab WHERE b_minmax = 1.1; -SELECT count(*) FROM tab WHERE b_count_min = 1.1; +SELECT count(*) FROM tab WHERE b_countmin = 1.1; SELECT count(*) FROM tab WHERE b_uniq = 1.1; -- s ---------------------------------------------------- @@ -238,13 +238,13 @@ SELECT 's and ='; SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } -- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported -- SELECT count(*) FROM tab WHERE s_minmax = 7; -- not supported -SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } +SELECT count(*) FROM tab WHERE s_countmin = 7; -- { serverError NO_COMMON_TYPE } SELECT count(*) FROM tab WHERE s_uniq = 7; -- { serverError NO_COMMON_TYPE } SELECT count(*) FROM tab WHERE s = '7'; -- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported -- SELECT count(*) FROM tab WHERE s_minmax = '7'; -- not supported -SELECT count(*) FROM tab WHERE s_count_min = '7'; +SELECT count(*) FROM tab WHERE s_countmin = '7'; SELECT count(*) FROM tab WHERE s_uniq = '7'; DROP TABLE tab; From 6539cbd1ce1997ca14c65131502cda2e0daf7676 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 9 Sep 2024 17:34:15 +0800 Subject: [PATCH 3/4] Add CountMin to dictionary file and a little fixup --- .../table-engines/mergetree-family/mergetree.md | 17 +++++++++++++++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 18 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 317d1cca61e..d17208ab099 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -991,6 +991,23 @@ They can be used for prewhere optimization only if we enable `set allow_statisti ### Available Types of Column Statistics {#available-types-of-column-statistics} +Names of statistics types are case-insensitive, i.e. both + +``` sql +CREATE TABLE tab +( +a LowCardinality(Int64) STATISTICS(minmax) +) [...] +``` +and +``` sql +CREATE TABLE tab +( +a LowCardinality(Int64) STATISTICS(MiNmAx) +) [...] +``` +work. + - `MinMax` The minimum and maximum column value which allows to estimate the selectivity of range filters on numeric columns. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index d10db5f0d3d..a51132e6d7f 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -120,6 +120,7 @@ CMPLNT CMake CMakeLists CODECS +CountMin COVID CPUFrequencyMHz CPUs From c4720d9728ba0e264da4834dea7fcaad6b2a8322 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 9 Sep 2024 11:18:42 +0000 Subject: [PATCH 4/4] Minor doc fixups --- .../mergetree-family/mergetree.md | 37 +++++++------------ 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index d17208ab099..0bbee5f86f3 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -991,48 +991,39 @@ They can be used for prewhere optimization only if we enable `set allow_statisti ### Available Types of Column Statistics {#available-types-of-column-statistics} -Names of statistics types are case-insensitive, i.e. both - -``` sql -CREATE TABLE tab -( -a LowCardinality(Int64) STATISTICS(minmax) -) [...] -``` -and -``` sql -CREATE TABLE tab -( -a LowCardinality(Int64) STATISTICS(MiNmAx) -) [...] -``` -work. - - `MinMax` The minimum and maximum column value which allows to estimate the selectivity of range filters on numeric columns. + Syntax: `minmax` + - `TDigest` [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. + Syntax: `tdigest` + - `Uniq` [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. + Syntax: `uniq` + - `CountMin` [CountMin](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. + Syntax `countmin` + ### Supported Data Types {#supported-data-types} -| | (U)Int* | Float* | Decimal(*) | Date* | Boolean | Enum* | (Fixed)String | -|-----------|---------|--------|------------|-------|---------|-------|------------------| -| CountMin | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | -| MinMax | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✗ | -| TDigest | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✗ | -| Uniq | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| | (U)Int*, Float*, Decimal(*), Date*, Boolean, Enum* | String or FixedString | +|-----------|----------------------------------------------------|-----------------------| +| CountMin | ✔ | ✔ | +| MinMax | ✔ | ✗ | +| TDigest | ✔ | ✗ | +| Uniq | ✔ | ✔ | ### Supported Operations {#supported-operations}