mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
Merge pull request #69179 from lwz9103/master
Improve compatibility of cast(timestamp as string) with spark
This commit is contained in:
commit
f6fa8424cf
@ -93,6 +93,18 @@ See also:
|
||||
- [DateTime data type.](../../sql-reference/data-types/datetime.md)
|
||||
- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md)
|
||||
|
||||
## date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands {#date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands}
|
||||
|
||||
Dynamically cut the trailing zeros of `datetime64` values to adjust the output scale to `(0, 3, 6)`, corresponding to `seconds`, `milliseconds`, and `microseconds`.
|
||||
|
||||
See changes in following examples:
|
||||
|
||||
- 2012-01-01 00:11:22.000000 -> 2012-01-01 00:11:22
|
||||
- 2012-01-01 00:11:22.120000 -> 2012-01-01 00:11:22.120
|
||||
- 2012-01-01 00:11:22.123400 -> 2012-01-01 00:11:22.123400
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## date_time_overflow_behavior {#date_time_overflow_behavior}
|
||||
|
||||
Type: DateTimeOverflowBehavior
|
||||
|
@ -610,6 +610,9 @@ See also:
|
||||
- [Interval](../../sql-reference/data-types/special-data-types/interval.md)
|
||||
)", 0) \
|
||||
\
|
||||
M(Bool, date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands, false, R"(
|
||||
Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to [0, 3, 6],
|
||||
corresponding to 'seconds', 'milliseconds', and 'microseconds')", 0) \
|
||||
M(Bool, input_format_ipv4_default_on_conversion_error, false, R"(
|
||||
Deserialization of IPv4 will use default values instead of throwing exception on conversion error.
|
||||
|
||||
|
@ -102,6 +102,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"allow_experimental_refreshable_materialized_view", false, true, "Not experimental anymore"},
|
||||
{"max_parts_to_move", 1000, 1000, "New setting"},
|
||||
{"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."},
|
||||
{"date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands", false, false, "Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to (0, 3, 6), corresponding to 'seconds', 'milliseconds', and 'microseconds'."}
|
||||
}
|
||||
},
|
||||
{"24.9",
|
||||
|
@ -26,6 +26,9 @@ void SerializationDateTime64::serializeText(const IColumn & column, size_t row_n
|
||||
switch (settings.date_time_output_format)
|
||||
{
|
||||
case FormatSettings::DateTimeOutputFormat::Simple:
|
||||
if (settings.date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands)
|
||||
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(value, scale, ostr, time_zone);
|
||||
else
|
||||
writeDateTimeText(value, scale, ostr, time_zone);
|
||||
return;
|
||||
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
|
||||
|
@ -142,6 +142,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.custom.allow_variable_number_of_columns = settings[Setting::input_format_custom_allow_variable_number_of_columns];
|
||||
format_settings.date_time_input_format = settings[Setting::date_time_input_format];
|
||||
format_settings.date_time_output_format = settings[Setting::date_time_output_format];
|
||||
format_settings.date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = settings[Setting::date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands];
|
||||
format_settings.interval.output_format = settings[Setting::interval_output_format];
|
||||
format_settings.input_format_ipv4_default_on_conversion_error = settings[Setting::input_format_ipv4_default_on_conversion_error];
|
||||
format_settings.input_format_ipv6_default_on_conversion_error = settings[Setting::input_format_ipv6_default_on_conversion_error];
|
||||
|
@ -99,6 +99,8 @@ struct FormatSettings
|
||||
Saturate
|
||||
};
|
||||
|
||||
bool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = false;
|
||||
|
||||
DateTimeOverflowBehavior date_time_overflow_behavior = DateTimeOverflowBehavior::Ignore;
|
||||
|
||||
bool input_format_ipv4_default_on_conversion_error = false;
|
||||
|
@ -83,6 +83,7 @@ namespace Setting
|
||||
extern const SettingsBool input_format_ipv4_default_on_conversion_error;
|
||||
extern const SettingsBool input_format_ipv6_default_on_conversion_error;
|
||||
extern const SettingsBool precise_float_parsing;
|
||||
extern const SettingsBool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -1397,10 +1398,19 @@ struct ConvertImpl
|
||||
offsets_to.resize(size);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> write_buffer(data_to);
|
||||
const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
|
||||
const FromDataType & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
|
||||
|
||||
ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column);
|
||||
|
||||
bool cut_trailing_zeros_align_to_groups_of_thousands = false;
|
||||
if (DB::CurrentThread::isInitialized())
|
||||
{
|
||||
const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
|
||||
|
||||
if (query_context)
|
||||
cut_trailing_zeros_align_to_groups_of_thousands = query_context->getSettingsRef()[Setting::date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands];
|
||||
}
|
||||
|
||||
if (!null_map && arguments.size() > 1)
|
||||
null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst());
|
||||
|
||||
@ -1415,7 +1425,18 @@ struct ConvertImpl
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty");
|
||||
}
|
||||
bool is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
|
||||
bool is_ok = true;
|
||||
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
|
||||
{
|
||||
if (cut_trailing_zeros_align_to_groups_of_thousands)
|
||||
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64(vec_from[i]), type.getScale(), write_buffer, *time_zone);
|
||||
else
|
||||
is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
|
||||
}
|
||||
else
|
||||
{
|
||||
is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
|
||||
}
|
||||
null_map->getData()[i] |= !is_ok;
|
||||
writeChar(0, write_buffer);
|
||||
offsets_to[i] = write_buffer.count();
|
||||
@ -1432,7 +1453,17 @@ struct ConvertImpl
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty");
|
||||
}
|
||||
FormatImpl<FromDataType>::template execute<void>(vec_from[i], write_buffer, &type, time_zone);
|
||||
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
|
||||
{
|
||||
if (cut_trailing_zeros_align_to_groups_of_thousands)
|
||||
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64(vec_from[i]), type.getScale(), write_buffer, *time_zone);
|
||||
else
|
||||
FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
|
||||
}
|
||||
else
|
||||
{
|
||||
FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
|
||||
}
|
||||
writeChar(0, write_buffer);
|
||||
offsets_to[i] = write_buffer.count();
|
||||
}
|
||||
|
@ -811,7 +811,7 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
|
||||
void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
|
||||
void writeIPv6Text(const IPv6 & ip, WriteBuffer & buf);
|
||||
|
||||
template <typename DecimalType>
|
||||
template <typename DecimalType, bool cut_trailing_zeros_align_to_groups_of_thousands = false>
|
||||
inline void writeDateTime64FractionalText(typename DecimalType::NativeType fractional, UInt32 scale, WriteBuffer & buf)
|
||||
{
|
||||
static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DecimalType>;
|
||||
@ -822,8 +822,24 @@ inline void writeDateTime64FractionalText(typename DecimalType::NativeType fract
|
||||
for (Int32 pos = scale - 1; pos >= 0 && fractional; --pos, fractional /= DateTime64(10))
|
||||
data[pos] += fractional % DateTime64(10);
|
||||
|
||||
if constexpr (cut_trailing_zeros_align_to_groups_of_thousands)
|
||||
{
|
||||
UInt32 last_none_zero_pos = 0;
|
||||
for (UInt32 pos = 0; pos < scale; ++pos)
|
||||
{
|
||||
if (data[pos] != '0')
|
||||
{
|
||||
last_none_zero_pos = pos;
|
||||
}
|
||||
}
|
||||
size_t new_scale = (last_none_zero_pos >= 3 ? 6 : 3);
|
||||
writeString(&data[0], new_scale, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeString(&data[0], static_cast<size_t>(scale), buf);
|
||||
}
|
||||
}
|
||||
|
||||
static const char digits100[201] =
|
||||
"00010203040506070809"
|
||||
@ -935,7 +951,12 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
|
||||
}
|
||||
|
||||
/// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone.
|
||||
template <char date_delimeter = '-', char time_delimeter = ':', char between_date_time_delimiter = ' ', char fractional_time_delimiter = '.'>
|
||||
template <
|
||||
char date_delimeter = '-',
|
||||
char time_delimeter = ':',
|
||||
char between_date_time_delimiter = ' ',
|
||||
char fractional_time_delimiter = '.',
|
||||
bool cut_trailing_zeros_align_to_groups_of_thousands = false>
|
||||
inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
|
||||
{
|
||||
static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
|
||||
@ -960,13 +981,28 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer &
|
||||
}
|
||||
|
||||
writeDateTimeText<date_delimeter, time_delimeter, between_date_time_delimiter>(LocalDateTime(components.whole, time_zone), buf);
|
||||
|
||||
if constexpr (cut_trailing_zeros_align_to_groups_of_thousands)
|
||||
{
|
||||
if (scale > 0 && components.fractional != 0)
|
||||
{
|
||||
buf.write(fractional_time_delimiter);
|
||||
writeDateTime64FractionalText<DateTime64, true>(components.fractional, scale, buf);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (scale > 0)
|
||||
{
|
||||
buf.write(fractional_time_delimiter);
|
||||
writeDateTime64FractionalText<DateTime64>(components.fractional, scale, buf);
|
||||
writeDateTime64FractionalText<DateTime64, false>(components.fractional, scale, buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
|
||||
{
|
||||
writeDateTimeText<'-', ':', ' ', '.', true>(datetime64, scale, buf, time_zone);
|
||||
}
|
||||
|
||||
/// In the RFC 1123 format: "Tue, 03 Dec 2019 00:11:50 GMT". You must provide GMT DateLUT.
|
||||
/// This is needed for HTTP requests.
|
||||
|
@ -78,7 +78,7 @@ TEST(DateTimeToStringTest, RFC1123)
|
||||
ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT");
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
template <typename ValueType, bool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = false>
|
||||
class DateTimeToStringParamTestBase : public ::testing::TestWithParam<DateTimeToStringParamTestCase<ValueType>>
|
||||
{
|
||||
public:
|
||||
@ -99,6 +99,9 @@ public:
|
||||
}
|
||||
else if constexpr (std::is_same_v<ValueType, DateTime64WithScale>)
|
||||
{
|
||||
if constexpr (date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands)
|
||||
writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(input.value, input.scale, out, DateLUT::instance(timezone_name));
|
||||
else
|
||||
writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name));
|
||||
}
|
||||
|
||||
@ -130,6 +133,14 @@ TEST_P(DateTimeToStringParamTestDateTime64, writeDateText)
|
||||
ASSERT_NO_FATAL_FAILURE(test(GetParam()));
|
||||
}
|
||||
|
||||
class DateTimeToStringParamTestDateTime64TrimZeros : public DateTimeToStringParamTestBase<DateTime64WithScale, true>
|
||||
{};
|
||||
|
||||
TEST_P(DateTimeToStringParamTestDateTime64TrimZeros, writeDateText)
|
||||
{
|
||||
ASSERT_NO_FATAL_FAILURE(test(GetParam()));
|
||||
}
|
||||
|
||||
static const Int32 NON_ZERO_TIME_T = 10 * 365 * 3600 * 24 + 123456; /// NOTE This arithmetic is obviously wrong but it's ok for test.
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDayNum,
|
||||
@ -212,3 +223,36 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64,
|
||||
// },
|
||||
})
|
||||
);
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64TrimZeros,
|
||||
::testing::ValuesIn(std::initializer_list<DateTimeToStringParamTestCase<DateTime64WithScale>>
|
||||
{
|
||||
/// Inside basic LUT boundaries
|
||||
{
|
||||
"Zero DateTime64 with scale 0",
|
||||
DateTime64WithScale{0, 0},
|
||||
"1970-01-01 00:00:00"
|
||||
},
|
||||
{
|
||||
"Zero DateTime64 with scale 6, fractional is trimmed",
|
||||
DateTime64WithScale{0, 6},
|
||||
"1970-01-01 00:00:00"
|
||||
},
|
||||
{
|
||||
"DateTime64 with scale 3, fractional is trimmed",
|
||||
DateTime64WithScale{NON_ZERO_TIME_T * 1000LL, 3},
|
||||
"1979-12-31 10:17:36"
|
||||
},
|
||||
{
|
||||
"DateTime64 with scale 6, fractional is partially trimmed",
|
||||
DateTime64WithScale{120000, 6},
|
||||
"1970-01-01 00:00:00.120"
|
||||
},
|
||||
{
|
||||
"DateTime64 with scale 6, fractional is kept",
|
||||
DateTime64WithScale{123456, 6},
|
||||
"1970-01-01 00:00:00.123456"
|
||||
},
|
||||
})
|
||||
);
|
||||
|
@ -22,3 +22,4 @@
|
||||
(8,'8',[0,1,2,3,4,5,6,7]) (8,\'8\',[0,1,2,3,4,5,6,7])
|
||||
(9,'9',[0,1,2,3,4,5,6,7,8]) (9,\'9\',[0,1,2,3,4,5,6,7,8])
|
||||
0A
|
||||
2024-01-01 00:00:00 2024-01-01 00:00:00.100 (1,\'2024-01-01 00:00:00.120\') [\'2024-01-01 00:00:00.123\',\'2024-01-01 00:00:00.123400\'] 2024-01-01 00:00:00
|
||||
|
@ -5,3 +5,10 @@ SELECT hex(toString(countState())) FROM (SELECT * FROM system.numbers LIMIT 10);
|
||||
SELECT CAST((1, 'Hello', toDate('2016-01-01')) AS String), CAST([1, 2, 3] AS String);
|
||||
SELECT (number, toString(number), range(number)) AS x, CAST(x AS String) FROM system.numbers LIMIT 10;
|
||||
SELECT hex(CAST(countState() AS String)) FROM (SELECT * FROM system.numbers LIMIT 10);
|
||||
|
||||
SELECT toDateTime64('2024-01-01 00:00:00.00', 6),
|
||||
cast(toDateTime64('2024-01-01 00:00:00.100', 6) as String),
|
||||
toString((1, toDateTime64('2024-01-01 00:00:00.12000', 6))),
|
||||
toString([toDateTime64('2024-01-01 00:00:00.123000', 6), toDateTime64('2024-01-01 00:00:00.123400', 6)]),
|
||||
JSONExtractString('{"a" : "2024-01-01 00:00:00"}', 'a')::DateTime64(6)
|
||||
SETTINGS date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = true;
|
||||
|
Loading…
Reference in New Issue
Block a user