ClickHouse/src/Functions/FunctionsTimeWindow.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

698 lines
31 KiB
C++
Raw Normal View History

2021-06-01 03:01:35 +00:00
#include <numeric>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
2020-01-14 03:08:54 +00:00
#include <Functions/FunctionFactory.h>
2021-06-01 03:01:35 +00:00
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsTimeWindow.h>
2020-01-14 03:08:54 +00:00
namespace DB
{
2021-06-01 03:01:35 +00:00
2021-06-02 01:52:54 +00:00
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int SYNTAX_ERROR;
2021-06-02 01:52:54 +00:00
}
2021-06-01 03:01:35 +00:00
namespace
{
std::tuple<IntervalKind::Kind, Int64>
dispatchForIntervalColumns(const ColumnWithTypeAndName & interval_column, const String & function_name)
{
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
if (!interval_type)
throw Exception(
"Illegal column " + interval_column.name + " of argument of function " + function_name, ErrorCodes::ILLEGAL_COLUMN);
const auto * interval_column_const_int64 = checkAndGetColumnConst<ColumnInt64>(interval_column.column.get());
if (!interval_column_const_int64)
throw Exception(
"Illegal column " + interval_column.name + " of argument of function " + function_name, ErrorCodes::ILLEGAL_COLUMN);
Int64 num_units = interval_column_const_int64->getValue<Int64>();
if (num_units <= 0)
throw Exception(
"Value for column " + interval_column.name + " of function " + function_name + " must be positive",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
return {interval_type->getKind(), num_units};
}
ColumnPtr executeWindowBound(const ColumnPtr & column, int index, const String & function_name)
{
if (const ColumnTuple * col_tuple = checkAndGetColumn<ColumnTuple>(column.get()); col_tuple)
{
if (!checkColumn<ColumnVector<UInt32>>(*col_tuple->getColumnPtr(index)))
throw Exception(
"Illegal column for first argument of function " + function_name + ". Must be a Tuple(DataTime, DataTime)",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return col_tuple->getColumnPtr(index);
}
else
{
throw Exception(
"Illegal column for first argument of function " + function_name + ". Must be Tuple",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
void checkFirstArgument(const ColumnWithTypeAndName & argument, const String & function_name)
{
if (!isDateTime(argument.type))
throw Exception(
"Illegal type " + argument.type->getName() + " of argument of function " + function_name
+ ". Should be a date with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void checkIntervalArgument(const ColumnWithTypeAndName & argument, const String & function_name, IntervalKind & interval_kind, bool & result_type_is_date)
{
2021-11-27 14:46:43 +00:00
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(argument.type.get());
2021-06-01 03:01:35 +00:00
if (!interval_type)
throw Exception(
"Illegal type " + argument.type->getName() + " of argument of function " + function_name
+ ". Should be an interval of time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
interval_kind = interval_type->getKind();
result_type_is_date = (interval_type->getKind() == IntervalKind::Year) || (interval_type->getKind() == IntervalKind::Quarter)
|| (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Week);
}
void checkIntervalArgument(const ColumnWithTypeAndName & argument, const String & function_name, bool & result_type_is_date)
{
IntervalKind interval_kind;
checkIntervalArgument(argument, function_name, interval_kind, result_type_is_date);
}
void checkTimeZoneArgument(
const ColumnWithTypeAndName & argument,
const String & function_name)
{
if (!WhichDataType(argument.type).isString())
throw Exception(
"Illegal type " + argument.type->getName() + " of argument of function " + function_name
+ ". This argument is optional and must be a constant string with timezone name",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
bool checkIntervalOrTimeZoneArgument(const ColumnWithTypeAndName & argument, const String & function_name, IntervalKind & interval_kind, bool & result_type_is_date)
{
if (WhichDataType(argument.type).isString())
{
checkTimeZoneArgument(argument, function_name);
return false;
}
checkIntervalArgument(argument, function_name, interval_kind, result_type_is_date);
return true;
}
}
template <>
struct TimeWindowImpl<TUMBLE>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "tumble";
2021-06-01 03:01:35 +00:00
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
bool result_type_is_date;
if (arguments.size() == 2)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, result_type_is_date);
}
else if (arguments.size() == 3)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, result_type_is_date);
checkTimeZoneArgument(arguments.at(2), function_name);
}
else
{
throw Exception(
"Number of arguments for function " + function_name + " doesn't match: passed " + toString(arguments.size())
+ ", should be 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
2021-11-27 14:46:43 +00:00
DataTypePtr data_type = nullptr;
2021-06-01 03:01:35 +00:00
if (result_type_is_date)
2021-11-27 14:46:43 +00:00
data_type = std::make_shared<DataTypeDate>();
2021-06-01 03:01:35 +00:00
else
2021-11-27 14:46:43 +00:00
data_type = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
2021-06-01 03:01:35 +00:00
2021-11-27 14:46:43 +00:00
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
2021-06-01 03:01:35 +00:00
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto & interval_column = arguments[1];
const auto & from_datatype = *time_column.type.get();
const auto which_type = WhichDataType(from_datatype);
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
if (!which_type.isDateTime() || !time_column_vec)
throw Exception(
"Illegal column " + time_column.name + " of function " + function_name + ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto interval = dispatchForIntervalColumns(interval_column, function_name);
switch (std::get<0>(interval))
{
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeTumble<UInt32, IntervalKind::Nanosecond>(*time_column_vec, std::get<1>(interval), time_zone);
// case IntervalKind::Microsecond:
// return executeTumble<UInt32, IntervalKind::Microsecond>(*time_column_vec, std::get<1>(interval), time_zone);
// case IntervalKind::Millisecond:
// return executeTumble<UInt32, IntervalKind::Millisecond>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Second:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Minute:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt32, IntervalKind::Minute>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Hour:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt32, IntervalKind::Hour>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Day:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt32, IntervalKind::Day>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Week:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt16, IntervalKind::Week>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Month:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt16, IntervalKind::Month>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Quarter:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Year:
2021-11-27 14:46:43 +00:00
return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
2021-06-01 03:01:35 +00:00
}
__builtin_unreachable();
}
template <typename ToType, IntervalKind::Kind unit>
2021-11-27 14:46:43 +00:00
static ColumnPtr executeTumble(const ColumnUInt32 & time_column, UInt64 num_units, const DateLUTImpl & time_zone)
2021-06-01 03:01:35 +00:00
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
for (size_t i = 0; i != size; ++i)
{
start_data[i] = ToStartOfTransform<unit>::execute(time_data[i], num_units, time_zone);
end_data[i] = AddTime<unit>::execute(start_data[i], num_units, time_zone);
}
MutableColumns result;
result.emplace_back(std::move(start));
result.emplace_back(std::move(end));
return ColumnTuple::create(std::move(result));
}
};
template <>
struct TimeWindowImpl<TUMBLE_START>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "tumbleStart";
2021-06-01 03:01:35 +00:00
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
if (arguments.size() == 1)
{
2021-11-27 14:46:43 +00:00
auto type = WhichDataType(arguments[0].type);
if (type.isTuple())
return std::static_pointer_cast<const DataTypeTuple>(arguments[0].type)->getElement(0);
2021-11-27 14:46:43 +00:00
else if (type.isUInt32())
return std::make_shared<DataTypeDateTime>();
else
2021-06-01 03:01:35 +00:00
throw Exception(
"Illegal type of first argument of function " + function_name + " should be DateTime, Tuple or UInt32",
ErrorCodes::ILLEGAL_COLUMN);
2021-06-01 03:01:35 +00:00
}
else
{
return std::static_pointer_cast<const DataTypeTuple>(TimeWindowImpl<TUMBLE>::getReturnType(arguments, function_name))
2021-11-26 18:30:22 +00:00
->getElement(0);
2021-06-01 03:01:35 +00:00
}
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
2021-12-04 12:30:04 +00:00
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
2021-11-27 14:46:43 +00:00
ColumnPtr result_column;
2021-12-04 12:30:04 +00:00
if (arguments.size() == 1)
{
if (which_type.isUInt32())
return time_column.column;
else //isTuple
result_column = time_column.column;
}
2021-06-01 03:01:35 +00:00
else
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
2021-11-27 14:46:43 +00:00
return executeWindowBound(result_column, 0, function_name);
2021-06-01 03:01:35 +00:00
}
};
template <>
struct TimeWindowImpl<TUMBLE_END>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "tumbleEnd";
2021-06-01 03:01:35 +00:00
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
return TimeWindowImpl<TUMBLE_START>::getReturnType(arguments, function_name);
2021-06-01 03:01:35 +00:00
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name)
{
2021-12-04 12:30:04 +00:00
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
2021-11-27 14:46:43 +00:00
ColumnPtr result_column;
2021-12-04 12:30:04 +00:00
if (arguments.size() == 1)
{
if (which_type.isUInt32())
return time_column.column;
else //isTuple
result_column = time_column.column;
}
2021-06-01 03:01:35 +00:00
else
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
2021-11-27 14:46:43 +00:00
return executeWindowBound(result_column, 1, function_name);
2021-06-01 03:01:35 +00:00
}
};
template <>
struct TimeWindowImpl<HOP>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "hop";
2021-06-01 03:01:35 +00:00
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
bool result_type_is_date;
IntervalKind interval_kind_1;
IntervalKind interval_kind_2;
if (arguments.size() == 3)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
checkIntervalArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date);
}
else if (arguments.size() == 4)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
checkIntervalArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date);
checkTimeZoneArgument(arguments.at(3), function_name);
}
else
{
throw Exception(
"Number of arguments for function " + function_name + " doesn't match: passed " + toString(arguments.size())
+ ", should be 3 or 4",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (interval_kind_1 != interval_kind_2)
throw Exception(
"Illegal type of window and hop column of function " + function_name + ", must be same", ErrorCodes::ILLEGAL_COLUMN);
2021-11-27 14:46:43 +00:00
DataTypePtr data_type = nullptr;
2021-06-01 03:01:35 +00:00
if (result_type_is_date)
2021-11-27 14:46:43 +00:00
data_type = std::make_shared<DataTypeDate>();
2021-06-01 03:01:35 +00:00
else
2021-11-27 14:46:43 +00:00
data_type = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 3, 0));
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
2021-06-01 03:01:35 +00:00
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
const auto & window_interval_column = arguments[2];
const auto & from_datatype = *time_column.type.get();
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0);
if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec)
throw Exception(
"Illegal column " + time_column.name + " argument of function " + function_name
+ ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto hop_interval = dispatchForIntervalColumns(hop_interval_column, function_name);
auto window_interval = dispatchForIntervalColumns(window_interval_column, function_name);
if (std::get<1>(hop_interval) > std::get<1>(window_interval))
throw Exception(
"Value for hop interval of function " + function_name + " must not larger than window interval",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
switch (std::get<0>(window_interval))
{
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeHop<UInt32, IntervalKind::Nanosecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Microsecond:
// return executeHop<UInt32, IntervalKind::Microsecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Millisecond:
// return executeHop<UInt32, IntervalKind::Millisecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Second:
2021-11-27 14:46:43 +00:00
return executeHop<UInt32, IntervalKind::Second>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Minute:
2021-11-27 14:46:43 +00:00
return executeHop<UInt32, IntervalKind::Minute>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Hour:
2021-11-27 14:46:43 +00:00
return executeHop<UInt32, IntervalKind::Hour>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Day:
2021-11-27 14:46:43 +00:00
return executeHop<UInt32, IntervalKind::Day>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Week:
2021-11-27 14:46:43 +00:00
return executeHop<UInt16, IntervalKind::Week>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Month:
2021-11-27 14:46:43 +00:00
return executeHop<UInt16, IntervalKind::Month>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Quarter:
2021-11-27 14:46:43 +00:00
return executeHop<UInt16, IntervalKind::Quarter>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Year:
2021-11-27 14:46:43 +00:00
return executeHop<UInt16, IntervalKind::Year>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
2021-06-01 03:01:35 +00:00
}
__builtin_unreachable();
}
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
2021-11-27 14:46:43 +00:00
executeHop(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
2021-06-01 03:01:35 +00:00
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
for (size_t i = 0; i < size; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
2022-05-04 12:56:09 +00:00
wstart = AddTime<kind>::execute(wend, -window_num_units, time_zone);
2021-06-01 03:01:35 +00:00
ToType wend_latest;
do
{
2021-11-27 14:46:43 +00:00
wend_latest = wend;
2022-05-04 12:56:09 +00:00
wend = AddTime<kind>::execute(wend, -hop_num_units, time_zone);
2021-11-27 14:46:43 +00:00
} while (wend > time_data[i]);
2021-06-01 03:01:35 +00:00
end_data[i] = wend_latest;
2022-05-04 12:56:09 +00:00
start_data[i] = AddTime<kind>::execute(wend_latest, -window_num_units, time_zone);
2021-06-01 03:01:35 +00:00
}
MutableColumns result;
result.emplace_back(std::move(start));
result.emplace_back(std::move(end));
return ColumnTuple::create(std::move(result));
}
};
template <>
struct TimeWindowImpl<WINDOW_ID>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "windowID";
2021-06-01 03:01:35 +00:00
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
bool result_type_is_date;
IntervalKind interval_kind_1;
IntervalKind interval_kind_2;
if (arguments.size() == 2)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
}
else if (arguments.size() == 3)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
if (checkIntervalOrTimeZoneArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date))
{
if (interval_kind_1 != interval_kind_2)
throw Exception(
"Illegal type of window and hop column of function " + function_name + ", must be same", ErrorCodes::ILLEGAL_COLUMN);
}
}
else if (arguments.size() == 4)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
checkIntervalArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date);
checkTimeZoneArgument(arguments.at(3), function_name);
}
else
{
throw Exception(
"Number of arguments for function " + function_name + " doesn't match: passed " + toString(arguments.size())
+ ", should be 2, 3 or 4",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (result_type_is_date)
return std::make_shared<DataTypeUInt16>();
else
return std::make_shared<DataTypeUInt32>();
}
[[maybe_unused]] static ColumnPtr
dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
const auto & window_interval_column = arguments[2];
const auto & from_datatype = *time_column.type.get();
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0);
if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec)
throw Exception(
"Illegal column " + time_column.name + " argument of function " + function_name
+ ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto hop_interval = dispatchForIntervalColumns(hop_interval_column, function_name);
auto window_interval = dispatchForIntervalColumns(window_interval_column, function_name);
if (std::get<1>(hop_interval) > std::get<1>(window_interval))
throw Exception(
"Value for hop interval of function " + function_name + " must not larger than window interval",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
switch (std::get<0>(window_interval))
{
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeHopSlice<UInt32, IntervalKind::Nanosecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Microsecond:
// return executeHopSlice<UInt32, IntervalKind::Microsecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Millisecond:
// return executeHopSlice<UInt32, IntervalKind::Millisecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
2021-06-01 03:01:35 +00:00
case IntervalKind::Second:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt32, IntervalKind::Second>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Minute:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt32, IntervalKind::Minute>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Hour:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt32, IntervalKind::Hour>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Day:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt32, IntervalKind::Day>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Week:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt16, IntervalKind::Week>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Month:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt16, IntervalKind::Month>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Quarter:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt16, IntervalKind::Quarter>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Year:
2021-11-27 14:46:43 +00:00
return executeHopSlice<UInt16, IntervalKind::Year>(
2021-06-01 03:01:35 +00:00
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
2021-06-01 03:01:35 +00:00
}
__builtin_unreachable();
}
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
2021-11-27 14:46:43 +00:00
executeHopSlice(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
2021-06-01 03:01:35 +00:00
{
Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units);
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto end = ColumnVector<ToType>::create();
auto & end_data = end->getData();
end_data.resize(size);
for (size_t i = 0; i < size; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
ToType wend_latest;
do
{
2021-11-27 14:46:43 +00:00
wend_latest = wend;
2022-05-04 12:56:09 +00:00
wend = AddTime<kind>::execute(wend, -gcd_num_units, time_zone);
2021-11-27 14:46:43 +00:00
} while (wend > time_data[i]);
2021-06-01 03:01:35 +00:00
end_data[i] = wend_latest;
}
return end;
}
[[maybe_unused]] static ColumnPtr
dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
ColumnPtr column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
2021-06-01 03:01:35 +00:00
return executeWindowBound(column, 1, function_name);
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
if (arguments.size() == 2)
return dispatchForTumbleColumns(arguments, function_name);
else
{
const auto & third_column = arguments[2];
if (arguments.size() == 3 && WhichDataType(third_column.type).isString())
return dispatchForTumbleColumns(arguments, function_name);
else
return dispatchForHopColumns(arguments, function_name);
}
}
};
template <>
struct TimeWindowImpl<HOP_START>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "hopStart";
2021-06-01 03:01:35 +00:00
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
if (arguments.size() == 1)
{
2021-11-27 14:46:43 +00:00
auto type = WhichDataType(arguments[0].type);
if (type.isTuple())
return std::static_pointer_cast<const DataTypeTuple>(arguments[0].type)->getElement(0);
2021-11-27 14:46:43 +00:00
else if (type.isUInt32())
return std::make_shared<DataTypeDateTime>();
else
2021-06-01 03:01:35 +00:00
throw Exception(
"Illegal type of first argument of function " + function_name + " should be DateTime, Tuple or UInt32",
ErrorCodes::ILLEGAL_COLUMN);
}
else
{
return std::static_pointer_cast<const DataTypeTuple>(TimeWindowImpl<HOP>::getReturnType(arguments, function_name))->getElement(0);
2021-06-01 03:01:35 +00:00
}
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
2021-11-27 14:46:43 +00:00
ColumnPtr result_column;
2021-06-01 03:01:35 +00:00
if (arguments.size() == 1)
{
if (which_type.isUInt32())
return time_column.column;
else //isTuple
2021-11-27 14:46:43 +00:00
result_column = time_column.column;
2021-06-01 03:01:35 +00:00
}
else
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name);
2021-11-27 14:46:43 +00:00
return executeWindowBound(result_column, 0, function_name);
2021-06-01 03:01:35 +00:00
}
};
template <>
struct TimeWindowImpl<HOP_END>
2021-06-01 03:01:35 +00:00
{
2021-12-07 08:14:00 +00:00
static constexpr auto name = "hopEnd";
2021-06-01 03:01:35 +00:00
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
return TimeWindowImpl<HOP_START>::getReturnType(arguments, function_name);
2021-06-01 03:01:35 +00:00
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
2021-11-27 14:46:43 +00:00
ColumnPtr result_column;
2021-06-01 03:01:35 +00:00
if (arguments.size() == 1)
{
if (which_type.isUInt32())
return time_column.column;
else //isTuple
2021-11-27 14:46:43 +00:00
result_column = time_column.column;
2021-06-01 03:01:35 +00:00
}
else
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name);
2021-06-01 03:01:35 +00:00
2021-11-27 14:46:43 +00:00
return executeWindowBound(result_column, 1, function_name);
2021-06-01 03:01:35 +00:00
}
};
2021-12-10 09:59:50 +00:00
template <TimeWindowFunctionName type>
DataTypePtr FunctionTimeWindow<type>::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
2021-06-01 03:01:35 +00:00
{
return TimeWindowImpl<type>::getReturnType(arguments, name);
2021-06-01 03:01:35 +00:00
}
2021-12-10 09:59:50 +00:00
template <TimeWindowFunctionName type>
ColumnPtr FunctionTimeWindow<type>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const
2021-06-01 03:01:35 +00:00
{
return TimeWindowImpl<type>::dispatchForColumns(arguments, name);
2021-06-01 03:01:35 +00:00
}
REGISTER_FUNCTION(TimeWindow)
2020-01-14 03:08:54 +00:00
{
factory.registerFunction<FunctionTumble>();
factory.registerFunction<FunctionHop>();
factory.registerFunction<FunctionTumbleStart>();
factory.registerFunction<FunctionTumbleEnd>();
factory.registerFunction<FunctionHopStart>();
factory.registerFunction<FunctionHopEnd>();
2020-06-17 15:06:19 +00:00
factory.registerFunction<FunctionWindowId>();
2020-01-14 03:08:54 +00:00
}
2020-01-14 16:24:26 +00:00
}