mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 13:32:13 +00:00
Merge pull request #70957 from bigo-sg/tostring-fix
Function cast support Spark text output
This commit is contained in:
commit
e8dab58d2d
@ -1242,6 +1242,9 @@ Set the quoting rule for identifiers in SHOW CREATE query
|
||||
)", 0) \
|
||||
DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"(
|
||||
Set the quoting style for identifiers in SHOW CREATE query
|
||||
)", 0) \
|
||||
DECLARE(String, composed_data_type_output_format_mode, "default", R"(
|
||||
Set composed data type output format mode, default or spark.
|
||||
)", 0) \
|
||||
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
|
@ -67,6 +67,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"max_bytes_ratio_before_external_group_by", 0., 0., "New setting."},
|
||||
{"max_bytes_ratio_before_external_sort", 0., 0., "New setting."},
|
||||
{"use_async_executor_for_materialized_views", false, false, "New setting."},
|
||||
{"composed_data_type_output_format_mode", "default", "default", "New setting"},
|
||||
{"http_response_headers", "", "", "New setting."},
|
||||
}
|
||||
},
|
||||
|
@ -401,7 +401,7 @@ void SerializationArray::deserializeBinaryBulkWithMultipleStreams(
|
||||
|
||||
|
||||
template <typename Writer>
|
||||
static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
|
||||
static void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, Writer && write_nested)
|
||||
{
|
||||
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
|
||||
const ColumnArray::Offsets & offsets = column_array.getOffsets();
|
||||
@ -412,10 +412,14 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
|
||||
const IColumn & nested_column = column_array.getData();
|
||||
|
||||
writeChar('[', ostr);
|
||||
for (size_t i = offset; i < next_offset; ++i)
|
||||
|
||||
if (next_offset != offset)
|
||||
write_nested(nested_column, offset);
|
||||
for (size_t i = offset + 1; i < next_offset; ++i)
|
||||
{
|
||||
if (i != offset)
|
||||
writeChar(',', ostr);
|
||||
writeChar(',', ostr);
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
writeChar(' ', ostr);
|
||||
write_nested(nested_column, i);
|
||||
}
|
||||
writeChar(']', ostr);
|
||||
@ -520,10 +524,13 @@ static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reade
|
||||
|
||||
void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeTextImpl(column, row_num, ostr,
|
||||
serializeTextImpl(column, row_num, settings, ostr,
|
||||
[&](const IColumn & nested_column, size_t i)
|
||||
{
|
||||
nested->serializeTextQuoted(nested_column, i, ostr, settings);
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
nested->serializeText(nested_column, i, ostr, settings);
|
||||
else
|
||||
nested->serializeTextQuoted(nested_column, i, ostr, settings);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -90,6 +90,7 @@ template <typename KeyWriter, typename ValueWriter>
|
||||
void SerializationMap::serializeTextImpl(
|
||||
const IColumn & column,
|
||||
size_t row_num,
|
||||
const FormatSettings & settings,
|
||||
WriteBuffer & ostr,
|
||||
KeyWriter && key_writer,
|
||||
ValueWriter && value_writer) const
|
||||
@ -104,15 +105,31 @@ void SerializationMap::serializeTextImpl(
|
||||
size_t next_offset = offsets[row_num];
|
||||
|
||||
writeChar('{', ostr);
|
||||
for (size_t i = offset; i < next_offset; ++i)
|
||||
if (offset != next_offset)
|
||||
{
|
||||
if (i != offset)
|
||||
writeChar(',', ostr);
|
||||
|
||||
key_writer(ostr, key, nested_tuple.getColumn(0), i);
|
||||
writeChar(':', ostr);
|
||||
value_writer(ostr, value, nested_tuple.getColumn(1), i);
|
||||
key_writer(ostr, key, nested_tuple.getColumn(0), offset);
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
writeString(std::string_view(" -> "), ostr);
|
||||
else
|
||||
writeChar(':', ostr);
|
||||
value_writer(ostr, value, nested_tuple.getColumn(1), offset);
|
||||
}
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
for (size_t i = offset + 1; i < next_offset; ++i)
|
||||
{
|
||||
writeString(std::string_view(", "), ostr);
|
||||
key_writer(ostr, key, nested_tuple.getColumn(0), i);
|
||||
writeString(std::string_view(" -> "), ostr);
|
||||
value_writer(ostr, value, nested_tuple.getColumn(1), i);
|
||||
}
|
||||
else
|
||||
for (size_t i = offset + 1; i < next_offset; ++i)
|
||||
{
|
||||
writeChar(',', ostr);
|
||||
key_writer(ostr, key, nested_tuple.getColumn(0), i);
|
||||
writeChar(':', ostr);
|
||||
value_writer(ostr, value, nested_tuple.getColumn(1), i);
|
||||
}
|
||||
writeChar('}', ostr);
|
||||
}
|
||||
|
||||
@ -221,10 +238,13 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri
|
||||
{
|
||||
auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
|
||||
{
|
||||
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
subcolumn_serialization->serializeText(subcolumn, pos, buf, settings);
|
||||
else
|
||||
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
|
||||
};
|
||||
|
||||
serializeTextImpl(column, row_num, ostr, writer, writer);
|
||||
serializeTextImpl(column, row_num, settings, ostr, writer, writer);
|
||||
}
|
||||
|
||||
void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
@ -266,7 +286,7 @@ bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, c
|
||||
|
||||
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeTextImpl(column, row_num, ostr,
|
||||
serializeTextImpl(column, row_num, settings, ostr,
|
||||
[&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
|
||||
{
|
||||
/// We need to double-quote all keys (including integers) to produce valid JSON.
|
||||
|
@ -70,7 +70,7 @@ public:
|
||||
|
||||
private:
|
||||
template <typename KeyWriter, typename ValueWriter>
|
||||
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
|
||||
void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
|
||||
|
||||
template <typename ReturnType = void, typename Reader>
|
||||
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
|
||||
|
@ -137,12 +137,25 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr,
|
||||
void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('(', ostr);
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
if (!elems.empty())
|
||||
{
|
||||
if (i != 0)
|
||||
writeChar(',', ostr);
|
||||
elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings);
|
||||
else
|
||||
elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings);
|
||||
}
|
||||
if (settings.composed_data_type_output_format_mode == "spark")
|
||||
for (size_t i = 1; i < elems.size(); ++i)
|
||||
{
|
||||
writeString(std::string_view(", "), ostr);
|
||||
elems[i]->serializeText(extractElementColumn(column, i), row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
for (size_t i = 1; i < elems.size(); ++i)
|
||||
{
|
||||
writeChar(',', ostr);
|
||||
elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
|
||||
}
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
|
||||
|
@ -251,6 +251,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions];
|
||||
format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions];
|
||||
format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote];
|
||||
format_settings.composed_data_type_output_format_mode = settings[Setting::composed_data_type_output_format_mode];
|
||||
format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header];
|
||||
format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header];
|
||||
format_settings.write_statistics = settings[Setting::output_format_write_statistics];
|
||||
|
@ -38,6 +38,7 @@ struct FormatSettings
|
||||
bool try_infer_variant = false;
|
||||
|
||||
bool seekable_read = true;
|
||||
String composed_data_type_output_format_mode = "default";
|
||||
UInt64 max_rows_to_read_for_schema_inference = 25000;
|
||||
UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024;
|
||||
|
||||
|
@ -0,0 +1,16 @@
|
||||
-- array format --
|
||||
[\'1\']
|
||||
[1, 2, abc, \'1\']
|
||||
[1, 2, abc, \'1\']
|
||||
[1, 2, abc, \'1\']
|
||||
[1, 2, abc, \'1\']
|
||||
-- map format --
|
||||
{1343 -> fe, afe -> fefe}
|
||||
{1343 -> fe, afe -> fefe}
|
||||
{1343 -> fe, afe -> fefe}
|
||||
{1343 -> fe, afe -> fefe}
|
||||
-- tuple format --
|
||||
(1, 3, abc)
|
||||
(1, 3, abc)
|
||||
(1, 3, abc)
|
||||
(1, 3, abc)
|
18
tests/queries/0_stateless/03259_to_string_spark_format.sql
Normal file
18
tests/queries/0_stateless/03259_to_string_spark_format.sql
Normal file
@ -0,0 +1,18 @@
|
||||
SELECT '-- array format --';
|
||||
SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
|
||||
SELECT '-- map format --';
|
||||
SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
|
||||
SELECT '-- tuple format --';
|
||||
SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||
SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = 'spark';
|
Loading…
Reference in New Issue
Block a user