Merge pull request #46467 from bigo-sg/first_value

This commit is contained in:
vdimir 2023-05-10 14:56:14 +02:00 committed by GitHub
commit 1b7f54e886
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 336 additions and 28 deletions

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/first_value
sidebar_position: 7
---
# first_value
Selects the first encountered value, similar to `any`, but could accept NULL.
## examples
```sql
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
```
### example1
The NULL value is ignored at default.
```sql
select first_value(b) from test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
│ 3 │
└─────────────────────────────┘
```
### example2
The NULL value is ignored.
```sql
select first_value(b) ignore nulls sfrom test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
│ 3 │
└─────────────────────────────┘
```
### example3
The NULL value is accepted.
```sql
select first_value(b) respect nulls from test_data
```
```text
┌─first_value_respect_nulls(b)─┐
│ ᴺᵁᴸᴸ │
└──────────────────────────────┘
```

View File

@ -26,6 +26,8 @@ ClickHouse-specific aggregate functions:
- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md)
- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md)
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)

View File

@ -0,0 +1,53 @@
---
slug: /en/sql-reference/aggregate-functions/reference/last_value
sidebar_position: 8
---
# first_value
Selects the last encountered value, similar to `anyLast`, but could accept NULL.
## examples
```sql
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
```
### example1
The NULL value is ignored at default.
```sql
select last_value(b) from test_data
```
```text
┌─last_value_ignore_nulls(b)─┐
│ 5 │
└────────────────────────────┘
```
### example2
The NULL value is ignored.
```sql
select last_value(b) ignore nulls from test_data
```
```text
┌─last_value_ignore_nulls(b)─┐
│ 5 │
└────────────────────────────┘
```
### example3
The NULL value is accepted.
```sql
select last_value(b) respect nulls from test_data
```
```text
┌─last_value_respect_nulls(b)─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────┘
```

View File

@ -14,11 +14,29 @@ AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAny(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(
createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue<
AggregateFunctionsSingleValue,
AggregateFunctionAnyLastData,
RespectNulls>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters, settings));
@ -38,9 +56,15 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
factory.registerFunction("first_value",
{ createAggregateFunctionAny, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("first_value_respect_nulls",
{ createAggregateFunctionNullableAny<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value",
{ createAggregateFunctionAnyLast, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value_respect_nulls",
{ createAggregateFunctionNullableAnyLast<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -768,19 +768,23 @@ static_assert(
/// For any other value types.
template <bool IS_NULLABLE = false>
struct SingleValueDataGeneric
{
private:
using Self = SingleValueDataGeneric;
Field value;
bool has_value = false;
public:
static constexpr bool is_nullable = false;
static constexpr bool is_nullable = IS_NULLABLE;
static constexpr bool is_any = false;
bool has() const
{
if constexpr (is_nullable)
return has_value;
return !value.isNull();
}
@ -815,11 +819,15 @@ public:
void change(const IColumn & column, size_t row_num, Arena *)
{
column.get(row_num, value);
if constexpr (is_nullable)
has_value = true;
}
void change(const Self & to, Arena *)
{
value = to.value;
if constexpr (is_nullable)
has_value = true;
}
bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
@ -835,7 +843,7 @@ public:
bool changeFirstTime(const Self & to, Arena * arena)
{
if (!has() && to.has())
if (!has() && (is_nullable || to.has()))
{
change(to, arena);
return true;
@ -870,27 +878,61 @@ public:
}
else
{
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
if constexpr (is_nullable)
{
value = new_value;
return true;
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || new_value < value))
{
value = new_value;
return true;
}
else
return false;
}
else
return false;
{
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{
value = new_value;
return true;
}
else
return false;
}
}
}
bool changeIfLess(const Self & to, Arena * arena)
{
if (to.has() && (!has() || to.value < value))
if (!to.has())
return false;
if constexpr (is_nullable)
{
change(to, arena);
return true;
if (!has())
{
change(to, arena);
return true;
}
if (to.value.isNull() || (!value.isNull() && to.value < value))
{
value = to.value;
return true;
}
return false;
}
else
return false;
{
if (!has() || to.value < value)
{
change(to, arena);
return true;
}
else
return false;
}
}
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
@ -902,27 +944,55 @@ public:
}
else
{
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
if constexpr (is_nullable)
{
value = new_value;
return true;
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || value < new_value))
{
value = new_value;
return true;
}
return false;
}
else
return false;
{
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{
value = new_value;
return true;
}
else
return false;
}
}
}
bool changeIfGreater(const Self & to, Arena * arena)
{
if (to.has() && (!has() || to.value > value))
if (!to.has())
return false;
if constexpr (is_nullable)
{
change(to, arena);
return true;
if (!value.isNull() && (to.value.isNull() || value < to.value))
{
value = to.value;
return true;
}
return false;
}
else
return false;
{
if (!has() || to.value > value)
{
change(to, arena);
return true;
}
else
return false;
}
}
bool isEqualTo(const IColumn & column, size_t row_num) const
@ -1359,6 +1429,17 @@ public:
this->data(place).insertResultInto(to);
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
{
if (Data::is_nullable)
return nested_function;
return nullptr;
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override

View File

@ -9,7 +9,6 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
struct Settings;
@ -22,7 +21,6 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<Data<SingleValueDataFixed<TYPE>>>(argument_type); /// NOLINT
@ -46,7 +44,28 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type);
}
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
// If the result value could be null (excluding the case that no row is matched),
// use SingleValueDataGeneric.
if constexpr (!RespectNulls)
{
return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
}
else
{
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
}
UNREACHABLE();
}
@ -79,7 +98,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
if (which.idx == TypeIndex::String)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type);
}
template <template <typename> class MinMaxData>
@ -115,7 +134,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
if (which.idx == TypeIndex::String)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type);
}
}

View File

@ -1,4 +1,5 @@
#include <string_view>
#include <unordered_map>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/ParserSetQuery.h>
@ -25,6 +26,7 @@
#include <Common/logger_useful.h>
#include <Parsers/queryToString.h>
#include <Parsers/CommonParsers.h>
using namespace std::literals;
@ -1085,6 +1087,8 @@ public:
ParserKeyword filter("FILTER");
ParserKeyword over("OVER");
ParserKeyword respect_nulls("RESPECT NULLS");
ParserKeyword ignore_nulls("IGNORE NULLS");
if (filter.ignore(pos, expected))
{
@ -1100,6 +1104,17 @@ public:
return false;
}
NullsAction nulls_action = NullsAction::EMPTY;
if (respect_nulls.ignore(pos, expected))
{
nulls_action = NullsAction::RESPECT_NULLS;
}
if (ignore_nulls.ignore(pos, expected))
{
nulls_action = NullsAction::IGNORE_NULLS;
}
function_node->name = transformFunctionNameForRepectNulls(function_node->name, nulls_action);
if (over.ignore(pos, expected))
{
function_node->is_window_function = true;
@ -1132,6 +1147,30 @@ private:
bool allow_function_parameters;
bool is_compound_name;
enum NullsAction
{
EMPTY = 0,
RESPECT_NULLS = 1,
IGNORE_NULLS = 2,
};
static String transformFunctionNameForRepectNulls(const String & original_function_name, NullsAction nulls_action)
{
static std::unordered_map<String, std::vector<String>> renamed_functions_with_nulls = {
{"first_value", {"first_value", "first_value_respect_nulls", "first_value"}},
{"last_value", {"last_value", "last_value_respect_nulls", "last_value"}},
};
auto it = renamed_functions_with_nulls.find(original_function_name);
if (it == renamed_functions_with_nulls.end())
{
if (nulls_action == NullsAction::EMPTY)
return original_function_name;
else
throw Exception(
ErrorCodes::SYNTAX_ERROR, "Function {} does not support RESPECT NULLS or IGNORE NULLS", original_function_name);
}
return it->second[nulls_action];
}
};
/// Layer for priority brackets and tuple function
@ -2353,7 +2392,6 @@ bool ParserExpressionImpl::parse(std::unique_ptr<Layer> start, IParser::Pos & po
{
if (!layers.back()->parse(pos, expected, next))
break;
if (layers.back()->isFinished())
{
if (layers.size() == 1)

View File

@ -0,0 +1,20 @@
-- { echo }
-- create table
drop table if exists test;
create table test(`a` Nullable(Int32), `b` Nullable(Int32)) ENGINE = Memory;
insert into test (a,b) values (1,null), (2,3), (4, 5), (6,null);
-- first value
select first_value(b) from test;
3
select first_value(b) ignore nulls from test;
3
select first_value(b) respect nulls from test;
\N
-- last value
select last_value(b) from test;
5
select last_value(b) ignore nulls from test;
5
select last_value(b) respect nulls from test;
\N

View File

@ -0,0 +1,16 @@
-- { echo }
-- create table
drop table if exists test;
create table test(`a` Nullable(Int32), `b` Nullable(Int32)) ENGINE = Memory;
insert into test (a,b) values (1,null), (2,3), (4, 5), (6,null);
-- first value
select first_value(b) from test;
select first_value(b) ignore nulls from test;
select first_value(b) respect nulls from test;
-- last value
select last_value(b) from test;
select last_value(b) ignore nulls from test;
select last_value(b) respect nulls from test;