Merge pull request #71403 from petern48/alias_any_respect_nulls

Added camelCase aliases for `any`/`anyLast` aggregation functions
This commit is contained in:
Robert Schulze 2024-11-17 16:02:10 +00:00 committed by GitHub
commit 742f1eb41e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 140 additions and 37 deletions

View File

@ -5,7 +5,15 @@ sidebar_position: 102
# any
Selects the first encountered value of a column, ignoring any `NULL` values.
Selects the first encountered value of a column.
:::warning
As a query can be executed in arbitrary order, the result of this function is non-deterministic.
If you need an arbitrary but deterministic result, use functions [`min`](../reference/min.md) or [`max`](../reference/max.md).
:::
By default, the function never returns NULL, i.e. ignores NULL values in the input column.
However, if the function is used with the `RESPECT NULLS` modifier, it returns the first value reads no matter if NULL or not.
**Syntax**
@ -13,46 +21,51 @@ Selects the first encountered value of a column, ignoring any `NULL` values.
any(column) [RESPECT NULLS]
```
Aliases: `any_value`, [`first_value`](../reference/first_value.md).
Aliases `any(column)` (without `RESPECT NULLS`)
- `any_value`
- [`first_value`](../reference/first_value.md).
Alias for `any(column) RESPECT NULLS`
- `anyRespectNulls`, `any_respect_nulls`
- `firstValueRespectNulls`, `first_value_respect_nulls`
- `anyValueRespectNulls`, `any_value_respect_nulls`
**Parameters**
- `column`: The column name.
- `column`: The column name.
**Returned value**
:::note
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not.
:::
The first value encountered.
:::note
The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
:::
:::warning
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
To get a determinate result, you can use the [`min`](../reference/min.md) or [`max`](../reference/max.md) function instead of `any`.
The return type of the function is the same as the input, except for LowCardinality which is discarded.
This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column).
You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
:::
**Implementation details**
In some cases, you can rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
In some cases, you can rely on the order of execution.
This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions.
In other words, each column selected from the table must be used either in keys or inside aggregate functions.
To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
**Example**
Query:
```sql
CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log;
CREATE TABLE tab (city Nullable(String)) ENGINE=Memory;
INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL);
INSERT INTO tab (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL);
SELECT any(city) FROM any_nulls;
SELECT any(city), anyRespectNulls(city) FROM tab;
```
```response
┌─any(city)─┐
│ Amsterdam │
└───────────┘
┌─any(city)─┬─anyRespectNulls(city)─
│ Amsterdam │ ᴺᵁᴸᴸ │
└───────────┴───────────────────────
```

View File

@ -5,7 +5,15 @@ sidebar_position: 105
# anyLast
Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function.
Selects the last encountered value of a column.
:::warning
As a query can be executed in arbitrary order, the result of this function is non-deterministic.
If you need an arbitrary but deterministic result, use functions [`min`](../reference/min.md) or [`max`](../reference/max.md).
:::
By default, the function never returns NULL, i.e. ignores NULL values in the input column.
However, if the function is used with the `RESPECT NULLS` modifier, it returns the first value reads no matter if NULL or not.
**Syntax**
@ -13,12 +21,15 @@ Selects the last value encountered, ignoring any `NULL` values by default. The r
anyLast(column) [RESPECT NULLS]
```
**Parameters**
- `column`: The column name.
Alias `anyLast(column)` (without `RESPECT NULLS`)
- [`last_value`](../reference/last_value.md).
:::note
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the last value passed, regardless of whether it is `NULL` or not.
:::
Aliases for `anyLast(column) RESPECT NULLS`
- `anyLastRespectNulls`, `anyLast_respect_nulls`
- `lastValueRespectNulls`, `last_value_respect_nulls`
**Parameters**
- `column`: The column name.
**Returned value**
@ -29,15 +40,15 @@ Supports the `RESPECT NULLS` modifier after the function name. Using this modifi
Query:
```sql
CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log;
CREATE TABLE tab (city Nullable(String)) ENGINE=Memory;
INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL);
INSERT INTO tab (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL);
SELECT anyLast(city) FROM any_last_nulls;
SELECT anyLast(city), anyLastRespectNulls(city) FROM tab;
```
```response
┌─anyLast(city)─┐
│ Valencia │
└───────────────┘
┌─anyLast(city)─┬─anyLastRespectNulls(city)─
│ Valencia │ ᴺᵁᴸᴸ │
└───────────────┴───────────────────────────
```

View File

@ -15,7 +15,7 @@ first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]]
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
WINDOW window_name as ([PARTITION BY grouping_column] [ORDER BY sorting_column])
```
Alias: `any`.
@ -23,6 +23,8 @@ Alias: `any`.
:::note
Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped.
See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information.
Alias: `firstValueRespectNulls`
:::
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
@ -48,7 +50,7 @@ CREATE TABLE salaries
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
INSERT INTO salaries FORMAT VALUES
('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),

View File

@ -23,6 +23,8 @@ Alias: `anyLast`.
:::note
Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped.
See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information.
Alias: `lastValueRespectNulls`
:::
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
@ -33,7 +35,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind
**Example**
In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players.
In this example the `last_value` function is used to find the lowest paid footballer from a fictional dataset of salaries of Premier League football players.
Query:
@ -48,7 +50,7 @@ CREATE TABLE salaries
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
INSERT INTO salaries FORMAT VALUES
('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),

View File

@ -221,11 +221,16 @@ void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory & factor
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("anyRespectNulls", "any_respect_nulls", AggregateFunctionFactory::Case::Sensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("firstValueRespectNulls", "any_respect_nulls", AggregateFunctionFactory::Case::Sensitive);
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("anyValueRespectNulls", "any_respect_nulls", AggregateFunctionFactory::Case::Sensitive);
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("anyLastRespectNulls", "anyLast_respect_nulls", AggregateFunctionFactory::Case::Sensitive);
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias("lastValueRespectNulls", "anyLast_respect_nulls", AggregateFunctionFactory::Case::Sensitive);
/// Must happen after registering any and anyLast
factory.registerNullsActionTransformation("any", "any_respect_nulls");

View File

@ -0,0 +1,30 @@
anyRespectNulls
0
\N
\N
0
6
firstValueRespectNulls
0
\N
\N
0
6
anyValueRespectNulls
0
\N
\N
0
6
lastValueRespectNulls
4
\N
\N
0
9
anyLastRespectNulls
4
\N
\N
0
9

View File

@ -0,0 +1,40 @@
-- Tests aliases of any and anyLast functions
-- aliases of any
SELECT 'anyRespectNulls';
SELECT anyRespectNulls(number) FROM numbers(5);
SELECT arrayReduce('anyRespectNulls', [NULL, 10]::Array(Nullable(UInt8)));
SELECT anyRespectNullsMerge(t) FROM (SELECT anyRespectNullsState(NULL::Nullable(UInt8)) as t FROM numbers(5));
SELECT finalizeAggregation(CAST(unhex('01'), 'AggregateFunction(anyRespectNulls, UInt64)'));
SELECT anyRespectNullsIf (number, NOT isNull(number) AND (assumeNotNull(number) > 5)) FROM numbers(10);
SELECT 'firstValueRespectNulls';
SELECT firstValueRespectNulls(number) FROM numbers(5);
SELECT arrayReduce('firstValueRespectNulls', [NULL, 10]::Array(Nullable(UInt8)));
SELECT firstValueRespectNullsMerge(t) FROM (SELECT firstValueRespectNullsState(NULL::Nullable(UInt8)) as t FROM numbers(5));
SELECT finalizeAggregation(CAST(unhex('01'), 'AggregateFunction(firstValueRespectNulls, UInt64)'));
SELECT firstValueRespectNullsIf (number, NOT isNull(number) AND (assumeNotNull(number) > 5)) FROM numbers(10);
SELECT 'anyValueRespectNulls';
SELECT anyValueRespectNulls(number) FROM numbers(5);
SELECT arrayReduce('anyValueRespectNulls', [NULL, 10]::Array(Nullable(UInt8)));
SELECT anyValueRespectNullsMerge(t) FROM (SELECT anyValueRespectNullsState(NULL::Nullable(UInt8)) as t FROM numbers(5));
SELECT finalizeAggregation(CAST(unhex('01'), 'AggregateFunction(anyValueRespectNulls, UInt64)'));
SELECT anyValueRespectNullsIf (number, NOT isNull(number) AND (assumeNotNull(number) > 5)) FROM numbers(10);
-- aliases of anyLast
SELECT 'lastValueRespectNulls';
SELECT lastValueRespectNulls(number) FROM numbers(5);
SELECT arrayReduce('lastValueRespectNulls', [10, NULL]::Array(Nullable(UInt8)));
SELECT lastValueRespectNullsMerge(t) FROM (SELECT lastValueRespectNullsState(NULL::Nullable(UInt8)) as t FROM numbers(5));
SELECT finalizeAggregation(CAST(unhex('01'), 'AggregateFunction(lastValueRespectNulls, UInt64)'));
SELECT lastValueRespectNullsIf (number, NOT isNull(number) AND (assumeNotNull(number) > 5)) FROM numbers(10);
SELECT 'anyLastRespectNulls';
SELECT anyLastRespectNulls(number) FROM numbers(5);
SELECT arrayReduce('anyLastRespectNulls', [10, NULL]::Array(Nullable(UInt8)));
SELECT anyLastRespectNullsMerge(t) FROM (SELECT anyLastRespectNullsState(NULL::Nullable(UInt8)) as t FROM numbers(5));
SELECT finalizeAggregation(CAST(unhex('01'), 'AggregateFunction(anyLastRespectNulls, UInt64)'));
SELECT anyLastRespectNullsIf (number, NOT isNull(number) AND (assumeNotNull(number) > 5)) FROM numbers(10);