Merge pull request #69179 from lwz9103/master

Improve compatibility of cast(timestamp as string) with spark
This commit is contained in:
Daniil Ivanik 2024-10-16 14:24:56 +00:00 committed by GitHub
commit f6fa8424cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 154 additions and 13 deletions

View File

@ -93,6 +93,18 @@ See also:
- [DateTime data type.](../../sql-reference/data-types/datetime.md)
- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md)
## date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands {#date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands}
Dynamically cut the trailing zeros of `datetime64` values to adjust the output scale to `(0, 3, 6)`, corresponding to `seconds`, `milliseconds`, and `microseconds`.
See changes in following examples:
- 2012-01-01 00:11:22.000000 -> 2012-01-01 00:11:22
- 2012-01-01 00:11:22.120000 -> 2012-01-01 00:11:22.120
- 2012-01-01 00:11:22.123400 -> 2012-01-01 00:11:22.123400
Default value: `false`.
## date_time_overflow_behavior {#date_time_overflow_behavior}
Type: DateTimeOverflowBehavior

View File

@ -610,6 +610,9 @@ See also:
- [Interval](../../sql-reference/data-types/special-data-types/interval.md)
)", 0) \
\
M(Bool, date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands, false, R"(
Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to [0, 3, 6],
corresponding to 'seconds', 'milliseconds', and 'microseconds')", 0) \
M(Bool, input_format_ipv4_default_on_conversion_error, false, R"(
Deserialization of IPv4 will use default values instead of throwing exception on conversion error.

View File

@ -102,6 +102,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_experimental_refreshable_materialized_view", false, true, "Not experimental anymore"},
{"max_parts_to_move", 1000, 1000, "New setting"},
{"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."},
{"date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands", false, false, "Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to (0, 3, 6), corresponding to 'seconds', 'milliseconds', and 'microseconds'."}
}
},
{"24.9",

View File

@ -26,7 +26,10 @@ void SerializationDateTime64::serializeText(const IColumn & column, size_t row_n
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, scale, ostr, time_zone);
if (settings.date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands)
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(value, scale, ostr, time_zone);
else
writeDateTimeText(value, scale, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeDateTimeUnixTimestamp(value, scale, ostr);

View File

@ -142,6 +142,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.custom.allow_variable_number_of_columns = settings[Setting::input_format_custom_allow_variable_number_of_columns];
format_settings.date_time_input_format = settings[Setting::date_time_input_format];
format_settings.date_time_output_format = settings[Setting::date_time_output_format];
format_settings.date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = settings[Setting::date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands];
format_settings.interval.output_format = settings[Setting::interval_output_format];
format_settings.input_format_ipv4_default_on_conversion_error = settings[Setting::input_format_ipv4_default_on_conversion_error];
format_settings.input_format_ipv6_default_on_conversion_error = settings[Setting::input_format_ipv6_default_on_conversion_error];

View File

@ -99,6 +99,8 @@ struct FormatSettings
Saturate
};
bool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = false;
DateTimeOverflowBehavior date_time_overflow_behavior = DateTimeOverflowBehavior::Ignore;
bool input_format_ipv4_default_on_conversion_error = false;

View File

@ -83,6 +83,7 @@ namespace Setting
extern const SettingsBool input_format_ipv4_default_on_conversion_error;
extern const SettingsBool input_format_ipv6_default_on_conversion_error;
extern const SettingsBool precise_float_parsing;
extern const SettingsBool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands;
}
namespace ErrorCodes
@ -1397,10 +1398,19 @@ struct ConvertImpl
offsets_to.resize(size);
WriteBufferFromVector<ColumnString::Chars> write_buffer(data_to);
const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
const FromDataType & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column);
bool cut_trailing_zeros_align_to_groups_of_thousands = false;
if (DB::CurrentThread::isInitialized())
{
const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
if (query_context)
cut_trailing_zeros_align_to_groups_of_thousands = query_context->getSettingsRef()[Setting::date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands];
}
if (!null_map && arguments.size() > 1)
null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst());
@ -1415,7 +1425,18 @@ struct ConvertImpl
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty");
}
bool is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
bool is_ok = true;
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
{
if (cut_trailing_zeros_align_to_groups_of_thousands)
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64(vec_from[i]), type.getScale(), write_buffer, *time_zone);
else
is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
}
else
{
is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
}
null_map->getData()[i] |= !is_ok;
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
@ -1432,7 +1453,17 @@ struct ConvertImpl
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty");
}
FormatImpl<FromDataType>::template execute<void>(vec_from[i], write_buffer, &type, time_zone);
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
{
if (cut_trailing_zeros_align_to_groups_of_thousands)
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64(vec_from[i]), type.getScale(), write_buffer, *time_zone);
else
FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
}
else
{
FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
}
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
}

View File

@ -811,7 +811,7 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
void writeIPv6Text(const IPv6 & ip, WriteBuffer & buf);
template <typename DecimalType>
template <typename DecimalType, bool cut_trailing_zeros_align_to_groups_of_thousands = false>
inline void writeDateTime64FractionalText(typename DecimalType::NativeType fractional, UInt32 scale, WriteBuffer & buf)
{
static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DecimalType>;
@ -822,7 +822,23 @@ inline void writeDateTime64FractionalText(typename DecimalType::NativeType fract
for (Int32 pos = scale - 1; pos >= 0 && fractional; --pos, fractional /= DateTime64(10))
data[pos] += fractional % DateTime64(10);
writeString(&data[0], static_cast<size_t>(scale), buf);
if constexpr (cut_trailing_zeros_align_to_groups_of_thousands)
{
UInt32 last_none_zero_pos = 0;
for (UInt32 pos = 0; pos < scale; ++pos)
{
if (data[pos] != '0')
{
last_none_zero_pos = pos;
}
}
size_t new_scale = (last_none_zero_pos >= 3 ? 6 : 3);
writeString(&data[0], new_scale, buf);
}
else
{
writeString(&data[0], static_cast<size_t>(scale), buf);
}
}
static const char digits100[201] =
@ -935,7 +951,12 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
}
/// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone.
template <char date_delimeter = '-', char time_delimeter = ':', char between_date_time_delimiter = ' ', char fractional_time_delimiter = '.'>
template <
char date_delimeter = '-',
char time_delimeter = ':',
char between_date_time_delimiter = ' ',
char fractional_time_delimiter = '.',
bool cut_trailing_zeros_align_to_groups_of_thousands = false>
inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
{
static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
@ -960,12 +981,27 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer &
}
writeDateTimeText<date_delimeter, time_delimeter, between_date_time_delimiter>(LocalDateTime(components.whole, time_zone), buf);
if (scale > 0)
if constexpr (cut_trailing_zeros_align_to_groups_of_thousands)
{
buf.write(fractional_time_delimiter);
writeDateTime64FractionalText<DateTime64>(components.fractional, scale, buf);
if (scale > 0 && components.fractional != 0)
{
buf.write(fractional_time_delimiter);
writeDateTime64FractionalText<DateTime64, true>(components.fractional, scale, buf);
}
}
else
{
if (scale > 0)
{
buf.write(fractional_time_delimiter);
writeDateTime64FractionalText<DateTime64, false>(components.fractional, scale, buf);
}
}
}
inline void writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
{
writeDateTimeText<'-', ':', ' ', '.', true>(datetime64, scale, buf, time_zone);
}
/// In the RFC 1123 format: "Tue, 03 Dec 2019 00:11:50 GMT". You must provide GMT DateLUT.

View File

@ -78,7 +78,7 @@ TEST(DateTimeToStringTest, RFC1123)
ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT");
}
template <typename ValueType>
template <typename ValueType, bool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = false>
class DateTimeToStringParamTestBase : public ::testing::TestWithParam<DateTimeToStringParamTestCase<ValueType>>
{
public:
@ -99,7 +99,10 @@ public:
}
else if constexpr (std::is_same_v<ValueType, DateTime64WithScale>)
{
writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name));
if constexpr (date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands)
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(input.value, input.scale, out, DateLUT::instance(timezone_name));
else
writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name));
}
ASSERT_EQ(expected, out.str());
@ -130,6 +133,14 @@ TEST_P(DateTimeToStringParamTestDateTime64, writeDateText)
ASSERT_NO_FATAL_FAILURE(test(GetParam()));
}
class DateTimeToStringParamTestDateTime64TrimZeros : public DateTimeToStringParamTestBase<DateTime64WithScale, true>
{};
TEST_P(DateTimeToStringParamTestDateTime64TrimZeros, writeDateText)
{
ASSERT_NO_FATAL_FAILURE(test(GetParam()));
}
static const Int32 NON_ZERO_TIME_T = 10 * 365 * 3600 * 24 + 123456; /// NOTE This arithmetic is obviously wrong but it's ok for test.
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDayNum,
@ -212,3 +223,36 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64,
// },
})
);
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64TrimZeros,
::testing::ValuesIn(std::initializer_list<DateTimeToStringParamTestCase<DateTime64WithScale>>
{
/// Inside basic LUT boundaries
{
"Zero DateTime64 with scale 0",
DateTime64WithScale{0, 0},
"1970-01-01 00:00:00"
},
{
"Zero DateTime64 with scale 6, fractional is trimmed",
DateTime64WithScale{0, 6},
"1970-01-01 00:00:00"
},
{
"DateTime64 with scale 3, fractional is trimmed",
DateTime64WithScale{NON_ZERO_TIME_T * 1000LL, 3},
"1979-12-31 10:17:36"
},
{
"DateTime64 with scale 6, fractional is partially trimmed",
DateTime64WithScale{120000, 6},
"1970-01-01 00:00:00.120"
},
{
"DateTime64 with scale 6, fractional is kept",
DateTime64WithScale{123456, 6},
"1970-01-01 00:00:00.123456"
},
})
);

View File

@ -22,3 +22,4 @@
(8,'8',[0,1,2,3,4,5,6,7]) (8,\'8\',[0,1,2,3,4,5,6,7])
(9,'9',[0,1,2,3,4,5,6,7,8]) (9,\'9\',[0,1,2,3,4,5,6,7,8])
0A
2024-01-01 00:00:00 2024-01-01 00:00:00.100 (1,\'2024-01-01 00:00:00.120\') [\'2024-01-01 00:00:00.123\',\'2024-01-01 00:00:00.123400\'] 2024-01-01 00:00:00

View File

@ -5,3 +5,10 @@ SELECT hex(toString(countState())) FROM (SELECT * FROM system.numbers LIMIT 10);
SELECT CAST((1, 'Hello', toDate('2016-01-01')) AS String), CAST([1, 2, 3] AS String);
SELECT (number, toString(number), range(number)) AS x, CAST(x AS String) FROM system.numbers LIMIT 10;
SELECT hex(CAST(countState() AS String)) FROM (SELECT * FROM system.numbers LIMIT 10);
SELECT toDateTime64('2024-01-01 00:00:00.00', 6),
cast(toDateTime64('2024-01-01 00:00:00.100', 6) as String),
toString((1, toDateTime64('2024-01-01 00:00:00.12000', 6))),
toString([toDateTime64('2024-01-01 00:00:00.123000', 6), toDateTime64('2024-01-01 00:00:00.123400', 6)]),
JSONExtractString('{"a" : "2024-01-01 00:00:00"}', 'a')::DateTime64(6)
SETTINGS date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = true;