From 3395c7c74509980ad8b6f41b65f113b533391cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 27 Nov 2023 19:29:20 +0100 Subject: [PATCH] Provide a custom implementation for respect_null aggregations --- .../aggregate-functions/reference/any.md | 9 +- .../reference/first_value.md | 36 ++-- .../AggregateFunctionAny.cpp | 201 +++++++++++++++++- .../AggregateFunctionMinMaxAny.h | 158 +++----------- .../AggregateFunctionArgMinMax.cpp | 4 +- src/AggregateFunctions/HelpersMinMaxAny.h | 6 +- .../02922_respect_nulls_Nullable.reference | 32 +++ .../02922_respect_nulls_Nullable.sql | 63 ++++++ .../02922_respect_nulls_extensive.reference | 12 +- .../02922_respect_nulls_extensive.sql | 11 +- .../02922_respect_nulls_states.reference | 3 + .../02922_respect_nulls_states.sql | 14 +- 12 files changed, 382 insertions(+), 167 deletions(-) create mode 100644 tests/queries/0_stateless/02922_respect_nulls_Nullable.reference create mode 100644 tests/queries/0_stateless/02922_respect_nulls_Nullable.sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index f79fe66c05d..6286ebb3a35 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -5,7 +5,12 @@ sidebar_position: 6 # any -Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column. +Selects the first encountered value of a column. + +By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. + +The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../combinators.md) ) to modify this behaviour. + The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. @@ -13,4 +18,4 @@ In some cases, you can rely on the order of execution. This applies to cases whe When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. -- Alias: `any_value` +- Alias: `any_value`, `first_value`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md index c1965b23fe3..6c26f6cac44 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/first_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -5,9 +5,13 @@ sidebar_position: 7 # first_value -Selects the first encountered value, similar to `any`, but could accept NULL. -Mostly it should be used with [Window Functions](../../window-functions/index.md). -Without Window Functions the result will be random if the source stream is not ordered. +It is an alias for [`any`](../any.md) but it was introduced for compatibility with [Window Functions](../../window-functions/index.md), where sometimes it's necessary to process `NULL` values (by default all ClickHouse +aggregate functions ignore NULLs). + +It supports declaring a modifier to respect nulls (`RESPECT NULLS`), both under [Window Functions](../../window-functions/index.md) and in normal aggregations. + +As with `any`, without Window Functions the result will be random if the source stream is not ordered and the return type +matches the input type (Null is only returned if the input is Nullable or -OrNull combinator is added). ## examples @@ -23,15 +27,15 @@ INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null); ``` ### example1 -The NULL value is ignored at default. +By default, the NULL value is ignored. ```sql select first_value(b) from test_data; ``` ```text -┌─first_value_ignore_nulls(b)─┐ -│ 3 │ -└─────────────────────────────┘ +┌─any(b)─┐ +│ 3 │ +└────────┘ ``` ### example2 @@ -41,9 +45,9 @@ select first_value(b) ignore nulls from test_data ``` ```text -┌─first_value_ignore_nulls(b)─┐ -│ 3 │ -└─────────────────────────────┘ +┌─any(b) IGNORE NULLS ─┐ +│ 3 │ +└──────────────────────┘ ``` ### example3 @@ -53,9 +57,9 @@ select first_value(b) respect nulls from test_data ``` ```text -┌─first_value_respect_nulls(b)─┐ -│ ᴺᵁᴸᴸ │ -└──────────────────────────────┘ +┌─any(b) RESPECT NULLS ─┐ +│ ᴺᵁᴸᴸ │ +└───────────────────────┘ ``` ### example4 @@ -73,8 +77,8 @@ FROM ``` ```text -┌─first_value_respect_nulls(b)─┬─first_value(b)─┐ -│ ᴺᵁᴸᴸ │ 3 │ -└──────────────────────────────┴────────────────┘ +┌─any_respect_nulls(b)─┬─any(b)─┐ +│ ᴺᵁᴸᴸ │ 3 │ +└──────────────────────┴────────┘ ``` diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index da28602972c..15681eca817 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -1,22 +1,202 @@ #include #include +#include +#include +#include namespace DB { struct Settings; +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + namespace { -template