Merge pull request #27704 from jasine/master

feat: add conversions  between snowflake id and  dateTime(dateTime64)
This commit is contained in:
Kseniia Sumarokova 2021-08-20 10:48:40 +03:00 committed by GitHub
commit 273b8b9bc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 475 additions and 0 deletions

View File

@ -1339,3 +1339,149 @@ Result:
│ 2,"good" │
└───────────────────────────────────────────┘
```
## snowflakeToDateTime {#snowflakeToDateTime}
Extract time from snowflake id as DateTime format.
**Syntax**
``` sql
snowflakeToDateTime(value [, time_zone])
```
**Parameters**
- `value``snowflake id`, Int64 value.
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- value converted to the `DateTime` data type.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
```
Result:
``` text
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:57:56 │
└──────────────────────────────────────────────────────────────────┘
```
## snowflakeToDateTime64 {#snowflakeToDateTime64}
Extract time from snowflake id as DateTime64 format.
**Syntax**
``` sql
snowflakeToDateTime64(value [, time_zone])
```
**Parameters**
- `value``snowflake id`, Int64 value.
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- value converted to the `DateTime64` data type.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
```
Result:
``` text
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:58:19.841 │
└────────────────────────────────────────────────────────────────────┘
```
## dateTimeToSnowflake {#dateTimeToSnowflake}
Convert DateTime to the first snowflake id at the giving time.
**Syntax**
``` sql
dateTimeToSnowflake(value)
```
**Parameters**
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- `value` converted to the `Int64` data type as the first snowflake id at that time.
**Example**
Query:
``` sql
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt
SELECT dateTimeToSnowflake(dt);
```
Result:
``` text
┌─dateTimeToSnowflake(dt)─┐
│ 1426860702823350272 │
└─────────────────────────┘
```
## dateTime64ToSnowflake {#dateTime64ToSnowflake}
Convert DateTime64 to the first snowflake id at the giving time.
**Syntax**
``` sql
dateTime64ToSnowflake(value)
```
**Parameters**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type as the first snowflake id at that time.
**Example**
Query:
``` sql
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64
SELECT dateTime64ToSnowflake(dt64);
```
Result:
``` text
┌─dateTime64ToSnowflake(dt64)─┐
│ 1426860704886947840 │
└─────────────────────────────┘
```

View File

@ -0,0 +1,207 @@
#pragma once
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <common/arithmeticOverflow.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** According to Twitter's post on Snowflake, we can extract the timestamp for a snowflake ID by right shifting
* the snowflake ID by 22 bits(10 bits machine ID and 12 bits sequence ID) and adding the Twitter epoch time of 1288834974657.
* https://en.wikipedia.org/wiki/Snowflake_ID
* https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake
* https://ws-dl.blogspot.com/2019/08/2019-08-03-tweetedat-finding-tweet.html
*/
static constexpr long snowflake_epoch = 1288834974657L;
static constexpr int time_shift = 22;
class FunctionDateTimeToSnowflake : public IFunction
{
private:
const char * name;
public:
FunctionDateTimeToSnowflake(const char * name_) : name(name_) { }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isVariadic() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isDateTime(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be DateTime", name);
return std::make_shared<DataTypeInt64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & src = arguments[0];
const auto & col = *src.column;
auto res_column = ColumnInt64::create(input_rows_count);
auto & result_data = res_column->getData();
const auto & source_data = typeid_cast<const ColumnUInt32 &>(col).getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
result_data[i] = (Int64(source_data[i]) * 1000 - snowflake_epoch) << time_shift;
}
return res_column;
}
};
class FunctionSnowflakeToDateTime : public IFunction
{
private:
const char * name;
public:
FunctionSnowflakeToDateTime(const char * name_) : name(name_) { }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() < 1 || arguments.size() > 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name);
if (!typeid_cast<const DataTypeInt64 *>(arguments[0].type.get()))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The first argument for function {} must be Int64", name);
std::string timezone;
if (arguments.size() == 2)
timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0);
return std::make_shared<DataTypeDateTime>(timezone);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & src = arguments[0];
const auto & col = *src.column;
auto res_column = ColumnUInt32::create(input_rows_count);
auto & result_data = res_column->getData();
const auto & source_data = typeid_cast<const ColumnInt64 &>(col).getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
result_data[i] = ((source_data[i] >> time_shift) + snowflake_epoch) / 1000;
}
return res_column;
}
};
class FunctionDateTime64ToSnowflake : public IFunction
{
private:
const char * name;
public:
FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isVariadic() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isDateTime64(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be DateTime64", name);
return std::make_shared<DataTypeInt64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & src = arguments[0];
const auto & col = *src.column;
auto res_column = ColumnInt64::create(input_rows_count);
auto & result_data = res_column->getData();
const auto & source_data = typeid_cast<const ColumnDecimal<DateTime64> &>(col).getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
result_data[i] = (source_data[i] - snowflake_epoch) << time_shift;
}
return res_column;
}
};
class FunctionSnowflakeToDateTime64 : public IFunction
{
private:
const char * name;
public:
FunctionSnowflakeToDateTime64(const char * name_) : name(name_) { }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() < 1 || arguments.size() > 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name);
if (!typeid_cast<const DataTypeInt64 *>(arguments[0].type.get()))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The first argument for function {} must be Int64", name);
std::string timezone;
if (arguments.size() == 2)
timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0);
return std::make_shared<DataTypeDateTime64>(3, timezone);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & src = arguments[0];
const auto & col = *src.column;
auto res_column = ColumnDecimal<DateTime64>::create(input_rows_count, 3);
auto & result_data = res_column->getData();
const auto & source_data = typeid_cast<const ColumnInt64 &>(col).getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
result_data[i] = (source_data[i] >> time_shift) + snowflake_epoch;
}
return res_column;
}
};
}

View File

@ -51,6 +51,7 @@ void registerFunctionBitHammingDistance(FunctionFactory & factory);
void registerFunctionTupleHammingDistance(FunctionFactory & factory);
void registerFunctionsStringHash(FunctionFactory & factory);
void registerFunctionValidateNestedArraySizes(FunctionFactory & factory);
void registerFunctionsSnowflake(FunctionFactory & factory);
#if !defined(ARCADIA_BUILD)
void registerFunctionBayesAB(FunctionFactory &);
#endif
@ -115,6 +116,7 @@ void registerFunctions()
registerFunctionTupleHammingDistance(factory);
registerFunctionsStringHash(factory);
registerFunctionValidateNestedArraySizes(factory);
registerFunctionsSnowflake(factory);
#if !defined(ARCADIA_BUILD)
registerFunctionBayesAB(factory);

View File

@ -0,0 +1,22 @@
namespace DB
{
class FunctionFactory;
void registerDateTimeToSnowflake(FunctionFactory &);
void registerSnowflakeToDateTime(FunctionFactory &);
void registerDateTime64ToSnowflake(FunctionFactory &);
void registerSnowflakeToDateTime64(FunctionFactory &);
void registerFunctionsSnowflake(FunctionFactory & factory)
{
registerDateTimeToSnowflake(factory);
registerSnowflakeToDateTime(factory);
registerDateTime64ToSnowflake(factory);
registerSnowflakeToDateTime64(factory);
}
}

View File

@ -0,0 +1,34 @@
#include <Functions/FunctionSnowflake.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerDateTimeToSnowflake(FunctionFactory & factory)
{
factory.registerFunction("dateTimeToSnowflake",
[](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(
std::make_shared<FunctionDateTimeToSnowflake>("dateTimeToSnowflake")); });
}
void registerDateTime64ToSnowflake(FunctionFactory & factory)
{
factory.registerFunction("dateTime64ToSnowflake",
[](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(
std::make_shared<FunctionDateTime64ToSnowflake>("dateTime64ToSnowflake")); });
}
void registerSnowflakeToDateTime(FunctionFactory & factory)
{
factory.registerFunction("snowflakeToDateTime",
[](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(
std::make_shared<FunctionSnowflakeToDateTime>("snowflakeToDateTime")); });
}
void registerSnowflakeToDateTime64(FunctionFactory & factory)
{
factory.registerFunction("snowflakeToDateTime64",
[](ContextPtr){ return std::make_unique<FunctionToOverloadResolverAdaptor>(
std::make_shared<FunctionSnowflakeToDateTime64>("snowflakeToDateTime64")); });
}
}

View File

@ -0,0 +1,6 @@
const column
2021-08-15 18:57:56 1426860702823350272
2021-08-15 18:57:56.492 1426860704886947840
non-const column
2021-08-15 18:57:56 1426860702823350272
2021-08-15 18:57:56.492 1426860704886947840

View File

@ -0,0 +1,23 @@
-- Error cases
SELECT dateTimeToSnowflake(); -- {serverError 42}
SELECT dateTime64ToSnowflake(); -- {serverError 42}
SELECT dateTimeToSnowflake('abc'); -- {serverError 43}
SELECT dateTime64ToSnowflake('abc'); -- {serverError 43}
SELECT dateTimeToSnowflake('abc', 123); -- {serverError 42}
SELECT dateTime64ToSnowflake('abc', 123); -- {serverError 42}
SELECT 'const column';
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt
SELECT dt, dateTimeToSnowflake(dt);
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64
SELECT dt64, dateTime64ToSnowflake(dt64);
SELECT 'non-const column';
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS x
SELECT materialize(x) as dt, dateTimeToSnowflake(dt);;
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS x
SELECT materialize(x) as dt64, dateTime64ToSnowflake(dt64);

View File

@ -0,0 +1,3 @@
const column
UTC 1426860704886947840 2021-08-15 10:57:56 DateTime(\'UTC\') 2021-08-15 10:57:56.492 DateTime64(3, \'UTC\')
Asia/Shanghai 1426860704886947840 2021-08-15 18:57:56 DateTime(\'Asia/Shanghai\') 2021-08-15 18:57:56.492 DateTime64(3, \'Asia/Shanghai\')

View File

@ -0,0 +1,32 @@
-- -- Error cases
SELECT snowflakeToDateTime(); -- {serverError 42}
SELECT snowflakeToDateTime64(); -- {serverError 42}
SELECT snowflakeToDateTime('abc'); -- {serverError 43}
SELECT snowflakeToDateTime64('abc'); -- {serverError 43}
SELECT snowflakeToDateTime('abc', 123); -- {serverError 43}
SELECT snowflakeToDateTime64('abc', 123); -- {serverError 43}
SELECT 'const column';
WITH
CAST(1426860704886947840 AS Int64) AS i64,
'UTC' AS tz
SELECT
tz,
i64,
snowflakeToDateTime(i64, tz) as dt,
toTypeName(dt),
snowflakeToDateTime64(i64, tz) as dt64,
toTypeName(dt64);
WITH
CAST(1426860704886947840 AS Int64) AS i64,
'Asia/Shanghai' AS tz
SELECT
tz,
i64,
snowflakeToDateTime(i64, tz) as dt,
toTypeName(dt),
snowflakeToDateTime64(i64, tz) as dt64,
toTypeName(dt64);