ClickHouse/src/Functions/toStartOfInterval.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

265 lines
13 KiB
C++
Raw Normal View History

#include <Columns/ColumnsDateTime.h>
2019-02-11 11:59:17 +00:00
#include <Columns/ColumnsNumber.h>
2023-12-11 15:27:41 +00:00
#include <Common/DateLUTImpl.h>
#include <Common/IntervalKind.h>
2019-02-11 11:59:17 +00:00
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
2019-02-11 11:59:17 +00:00
#include <DataTypes/DataTypeInterval.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionFactory.h>
2021-05-17 07:30:42 +00:00
#include <Functions/IFunction.h>
2019-02-11 11:59:17 +00:00
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
class FunctionToStartOfInterval : public IFunction
{
public:
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToStartOfInterval>(); }
2019-02-11 11:59:17 +00:00
2023-09-29 16:52:48 +00:00
static constexpr auto name = "toStartOfInterval";
2019-02-11 11:59:17 +00:00
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
2023-09-29 16:52:48 +00:00
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
bool hasInformationAboutMonotonicity() const override { return true; }
2023-12-11 15:27:41 +00:00
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override { return { .is_monotonic = true, .is_always_monotonic = true }; }
2019-02-11 11:59:17 +00:00
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
2023-12-11 15:27:41 +00:00
bool value_is_date = false;
auto check_first_argument = [&]
{
2023-12-11 15:27:41 +00:00
const DataTypePtr & type_arg1 = arguments[0].type;
if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 1st argument of function {}, expected a Date, DateTime or DateTime64",
type_arg1->getName(), getName());
value_is_date = isDate(type_arg1);
2019-02-11 11:59:17 +00:00
};
const DataTypeInterval * interval_type = nullptr;
2023-12-11 15:27:41 +00:00
enum class ResultType
{
Date,
DateTime,
DateTime64
};
ResultType result_type;
auto check_second_argument = [&]
{
2023-12-11 15:27:41 +00:00
const DataTypePtr & type_arg2 = arguments[1].type;
interval_type = checkAndGetDataType<DataTypeInterval>(type_arg2.get());
2019-02-11 11:59:17 +00:00
if (!interval_type)
2023-12-11 15:27:41 +00:00
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 2nd argument of function {}, expected a time interval",
type_arg2->getName(), getName());
2023-10-08 16:30:02 +00:00
switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case)
2023-09-29 16:52:48 +00:00
{
2024-02-27 13:37:21 +00:00
case IntervalKind::Kind::Nanosecond:
case IntervalKind::Kind::Microsecond:
case IntervalKind::Kind::Millisecond:
2023-12-11 15:27:41 +00:00
result_type = ResultType::DateTime64;
2023-09-29 16:52:48 +00:00
break;
2024-02-27 13:37:21 +00:00
case IntervalKind::Kind::Second:
case IntervalKind::Kind::Minute:
case IntervalKind::Kind::Hour:
case IntervalKind::Kind::Day: /// weird why Day leads to DateTime but too afraid to change it
2023-12-11 15:27:41 +00:00
result_type = ResultType::DateTime;
2023-09-29 16:52:48 +00:00
break;
2024-02-27 13:37:21 +00:00
case IntervalKind::Kind::Week:
case IntervalKind::Kind::Month:
case IntervalKind::Kind::Quarter:
case IntervalKind::Kind::Year:
2023-12-11 15:27:41 +00:00
result_type = ResultType::Date;
2023-09-29 16:52:48 +00:00
break;
}
2019-02-11 11:59:17 +00:00
};
2023-12-11 15:27:41 +00:00
auto check_third_argument = [&]
{
2023-12-11 15:27:41 +00:00
const DataTypePtr & type_arg3 = arguments[2].type;
if (!isString(type_arg3))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 3rd argument of function {}, expected a constant timezone string",
type_arg3->getName(), getName());
if (value_is_date && result_type == ResultType::Date) /// weird why this is && instead of || but too afraid to change it
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
2023-12-11 15:27:41 +00:00
"The timezone argument of function {} with interval type {} is allowed only when the 1st argument has type DateTime or DateTimt64",
getName(), interval_type->getKind().toString());
2019-02-11 11:59:17 +00:00
};
if (arguments.size() == 2)
{
check_first_argument();
2023-12-11 15:27:41 +00:00
check_second_argument();
2019-02-11 11:59:17 +00:00
}
else if (arguments.size() == 3)
{
check_first_argument();
2023-12-11 15:27:41 +00:00
check_second_argument();
check_third_argument();
2019-02-11 11:59:17 +00:00
}
else
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
getName(), arguments.size());
2019-02-11 11:59:17 +00:00
}
2023-12-11 15:27:41 +00:00
switch (result_type)
2022-02-15 23:43:08 +00:00
{
2023-12-11 15:27:41 +00:00
case ResultType::Date:
return std::make_shared<DataTypeDate>();
case ResultType::DateTime:
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
case ResultType::DateTime64:
{
UInt32 scale = 0;
2024-02-27 13:37:21 +00:00
if (interval_type->getKind() == IntervalKind::Kind::Nanosecond)
2023-12-11 15:27:41 +00:00
scale = 9;
2024-02-27 13:37:21 +00:00
else if (interval_type->getKind() == IntervalKind::Kind::Microsecond)
2023-12-11 15:27:41 +00:00
scale = 6;
2024-02-27 13:37:21 +00:00
else if (interval_type->getKind() == IntervalKind::Kind::Millisecond)
2023-12-11 15:27:41 +00:00
scale = 3;
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
}
2022-02-15 23:43:08 +00:00
}
2023-12-11 15:27:41 +00:00
std::unreachable();
2019-02-11 11:59:17 +00:00
}
2022-02-15 23:43:08 +00:00
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
2019-02-11 11:59:17 +00:00
{
2020-10-19 15:27:41 +00:00
const auto & time_column = arguments[0];
const auto & interval_column = arguments[1];
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
2023-09-29 16:52:48 +00:00
auto result_column = dispatchForTimeColumn(time_column, interval_column, result_type, time_zone);
2020-10-19 15:27:41 +00:00
return result_column;
2019-02-11 11:59:17 +00:00
}
private:
2023-09-29 16:52:48 +00:00
ColumnPtr dispatchForTimeColumn(
2023-12-11 15:27:41 +00:00
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column,
const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
2019-02-11 11:59:17 +00:00
{
2023-12-11 15:27:41 +00:00
const auto & time_column_type = *time_column.type.get();
const auto & time_column_col = *time_column.column.get();
2022-02-15 23:43:08 +00:00
2023-12-11 15:27:41 +00:00
if (isDateTime64(time_column_type))
2022-02-15 23:43:08 +00:00
{
2023-12-11 15:27:41 +00:00
const auto * time_column_vec = checkAndGetColumn<ColumnDateTime64>(time_column_col);
auto scale = assert_cast<const DataTypeDateTime64 &>(time_column_type).getScale();
2022-02-15 23:43:08 +00:00
if (time_column_vec)
2023-12-11 15:27:41 +00:00
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64 &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale);
2019-02-11 11:59:17 +00:00
}
2023-12-11 15:27:41 +00:00
else if (isDateTime(time_column_type))
2019-02-11 11:59:17 +00:00
{
2023-12-11 15:27:41 +00:00
const auto * time_column_vec = checkAndGetColumn<ColumnDateTime>(time_column_col);
2019-02-11 11:59:17 +00:00
if (time_column_vec)
2023-12-11 15:27:41 +00:00
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone);
2019-10-22 07:43:14 +00:00
}
2023-12-11 15:27:41 +00:00
else if (isDate(time_column_type))
{
2023-12-11 15:27:41 +00:00
const auto * time_column_vec = checkAndGetColumn<ColumnDate>(time_column_col);
if (time_column_vec)
2023-12-11 15:27:41 +00:00
return dispatchForIntervalColumn(assert_cast<const DataTypeDate &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone);
2019-02-11 11:59:17 +00:00
}
2023-12-11 15:27:41 +00:00
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName());
2019-02-11 11:59:17 +00:00
}
2023-12-11 15:27:41 +00:00
template <typename TimeDataType, typename TimeColumnType>
2019-02-11 11:59:17 +00:00
ColumnPtr dispatchForIntervalColumn(
2023-09-29 16:52:48 +00:00
const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column,
2023-12-11 15:27:41 +00:00
const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const
2019-02-11 11:59:17 +00:00
{
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
if (!interval_type)
2023-12-11 15:27:41 +00:00
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName());
2023-09-29 16:52:48 +00:00
2019-02-11 11:59:17 +00:00
const auto * interval_column_const_int64 = checkAndGetColumnConst<ColumnInt64>(interval_column.column.get());
if (!interval_column_const_int64)
2023-12-11 15:27:41 +00:00
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a const time interval", getName());
2023-09-29 16:52:48 +00:00
2023-12-11 15:27:41 +00:00
const Int64 num_units = interval_column_const_int64->getValue<Int64>();
2019-02-11 11:59:17 +00:00
if (num_units <= 0)
2023-12-11 15:27:41 +00:00
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for 2nd argument of function {} must be positive", getName());
2019-02-11 11:59:17 +00:00
2023-10-08 16:30:02 +00:00
switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case)
2019-02-11 11:59:17 +00:00
{
2024-02-27 13:37:21 +00:00
case IntervalKind::Kind::Nanosecond:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime64, IntervalKind::Kind::Nanosecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Microsecond:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime64, IntervalKind::Kind::Microsecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Millisecond:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime64, IntervalKind::Kind::Millisecond>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Second:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Kind::Second>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Minute:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Kind::Minute>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Hour:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Kind::Hour>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Day:
return execute<TimeDataType, TimeColumnType, DataTypeDateTime, IntervalKind::Kind::Day>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Week:
return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Kind::Week>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Month:
return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Kind::Month>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Quarter:
return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Kind::Quarter>(time_data_type, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Kind::Year:
return execute<TimeDataType, TimeColumnType, DataTypeDate, IntervalKind::Kind::Year>(time_data_type, time_column, num_units, result_type, time_zone, scale);
2019-02-11 11:59:17 +00:00
}
2023-12-11 15:27:41 +00:00
std::unreachable();
2019-02-11 11:59:17 +00:00
}
2023-12-11 15:27:41 +00:00
template <typename TimeDataType, typename TimeColumnType, typename ResultDataType, IntervalKind::Kind unit>
ColumnPtr execute(
const TimeDataType &, const TimeColumnType & time_column_type, Int64 num_units,
const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const
2019-02-11 11:59:17 +00:00
{
2023-12-11 15:27:41 +00:00
using ResultColumnType = typename ResultDataType::ColumnType;
using ResultFieldType = typename ResultDataType::FieldType;
2022-02-15 23:43:08 +00:00
const auto & time_data = time_column_type.getData();
size_t size = time_data.size();
auto result_col = result_type->createColumn();
2023-12-11 15:27:41 +00:00
auto * col_to = assert_cast<ResultColumnType *>(result_col.get());
2022-02-15 23:43:08 +00:00
auto & result_data = col_to->getData();
2019-02-11 11:59:17 +00:00
result_data.resize(size);
2019-10-22 07:43:14 +00:00
2022-02-15 23:43:08 +00:00
Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
for (size_t i = 0; i != size; ++i)
2023-12-11 15:27:41 +00:00
result_data[i] = static_cast<ResultFieldType>(ToStartOfInterval<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier));
2022-02-15 23:43:08 +00:00
return result_col;
2019-02-11 11:59:17 +00:00
}
};
REGISTER_FUNCTION(ToStartOfInterval)
2019-02-11 11:59:17 +00:00
{
factory.registerFunction<FunctionToStartOfInterval>();
}
}