mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 13:32:13 +00:00
Merge pull request #70957 from bigo-sg/tostring-fix
Function cast support Spark text output
This commit is contained in:
commit
e8dab58d2d
@ -1242,6 +1242,9 @@ Set the quoting rule for identifiers in SHOW CREATE query
|
|||||||
)", 0) \
|
)", 0) \
|
||||||
DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"(
|
DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"(
|
||||||
Set the quoting style for identifiers in SHOW CREATE query
|
Set the quoting style for identifiers in SHOW CREATE query
|
||||||
|
)", 0) \
|
||||||
|
DECLARE(String, composed_data_type_output_format_mode, "default", R"(
|
||||||
|
Set composed data type output format mode, default or spark.
|
||||||
)", 0) \
|
)", 0) \
|
||||||
|
|
||||||
// End of FORMAT_FACTORY_SETTINGS
|
// End of FORMAT_FACTORY_SETTINGS
|
||||||
|
@ -67,6 +67,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
|||||||
{"max_bytes_ratio_before_external_group_by", 0., 0., "New setting."},
|
{"max_bytes_ratio_before_external_group_by", 0., 0., "New setting."},
|
||||||
{"max_bytes_ratio_before_external_sort", 0., 0., "New setting."},
|
{"max_bytes_ratio_before_external_sort", 0., 0., "New setting."},
|
||||||
{"use_async_executor_for_materialized_views", false, false, "New setting."},
|
{"use_async_executor_for_materialized_views", false, false, "New setting."},
|
||||||
|
{"composed_data_type_output_format_mode", "default", "default", "New setting"},
|
||||||
{"http_response_headers", "", "", "New setting."},
|
{"http_response_headers", "", "", "New setting."},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -401,7 +401,7 @@ void SerializationArray::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
|
|
||||||
|
|
||||||
template <typename Writer>
|
template <typename Writer>
|
||||||
static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
|
static void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, Writer && write_nested)
|
||||||
{
|
{
|
||||||
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
|
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
|
||||||
const ColumnArray::Offsets & offsets = column_array.getOffsets();
|
const ColumnArray::Offsets & offsets = column_array.getOffsets();
|
||||||
@ -412,10 +412,14 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
|
|||||||
const IColumn & nested_column = column_array.getData();
|
const IColumn & nested_column = column_array.getData();
|
||||||
|
|
||||||
writeChar('[', ostr);
|
writeChar('[', ostr);
|
||||||
for (size_t i = offset; i < next_offset; ++i)
|
|
||||||
|
if (next_offset != offset)
|
||||||
|
write_nested(nested_column, offset);
|
||||||
|
for (size_t i = offset + 1; i < next_offset; ++i)
|
||||||
{
|
{
|
||||||
if (i != offset)
|
writeChar(',', ostr);
|
||||||
writeChar(',', ostr);
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
|
writeChar(' ', ostr);
|
||||||
write_nested(nested_column, i);
|
write_nested(nested_column, i);
|
||||||
}
|
}
|
||||||
writeChar(']', ostr);
|
writeChar(']', ostr);
|
||||||
@ -520,10 +524,13 @@ static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reade
|
|||||||
|
|
||||||
void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||||
{
|
{
|
||||||
serializeTextImpl(column, row_num, ostr,
|
serializeTextImpl(column, row_num, settings, ostr,
|
||||||
[&](const IColumn & nested_column, size_t i)
|
[&](const IColumn & nested_column, size_t i)
|
||||||
{
|
{
|
||||||
nested->serializeTextQuoted(nested_column, i, ostr, settings);
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
|
nested->serializeText(nested_column, i, ostr, settings);
|
||||||
|
else
|
||||||
|
nested->serializeTextQuoted(nested_column, i, ostr, settings);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,6 +90,7 @@ template <typename KeyWriter, typename ValueWriter>
|
|||||||
void SerializationMap::serializeTextImpl(
|
void SerializationMap::serializeTextImpl(
|
||||||
const IColumn & column,
|
const IColumn & column,
|
||||||
size_t row_num,
|
size_t row_num,
|
||||||
|
const FormatSettings & settings,
|
||||||
WriteBuffer & ostr,
|
WriteBuffer & ostr,
|
||||||
KeyWriter && key_writer,
|
KeyWriter && key_writer,
|
||||||
ValueWriter && value_writer) const
|
ValueWriter && value_writer) const
|
||||||
@ -104,15 +105,31 @@ void SerializationMap::serializeTextImpl(
|
|||||||
size_t next_offset = offsets[row_num];
|
size_t next_offset = offsets[row_num];
|
||||||
|
|
||||||
writeChar('{', ostr);
|
writeChar('{', ostr);
|
||||||
for (size_t i = offset; i < next_offset; ++i)
|
if (offset != next_offset)
|
||||||
{
|
{
|
||||||
if (i != offset)
|
key_writer(ostr, key, nested_tuple.getColumn(0), offset);
|
||||||
writeChar(',', ostr);
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
|
writeString(std::string_view(" -> "), ostr);
|
||||||
key_writer(ostr, key, nested_tuple.getColumn(0), i);
|
else
|
||||||
writeChar(':', ostr);
|
writeChar(':', ostr);
|
||||||
value_writer(ostr, value, nested_tuple.getColumn(1), i);
|
value_writer(ostr, value, nested_tuple.getColumn(1), offset);
|
||||||
}
|
}
|
||||||
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
|
for (size_t i = offset + 1; i < next_offset; ++i)
|
||||||
|
{
|
||||||
|
writeString(std::string_view(", "), ostr);
|
||||||
|
key_writer(ostr, key, nested_tuple.getColumn(0), i);
|
||||||
|
writeString(std::string_view(" -> "), ostr);
|
||||||
|
value_writer(ostr, value, nested_tuple.getColumn(1), i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for (size_t i = offset + 1; i < next_offset; ++i)
|
||||||
|
{
|
||||||
|
writeChar(',', ostr);
|
||||||
|
key_writer(ostr, key, nested_tuple.getColumn(0), i);
|
||||||
|
writeChar(':', ostr);
|
||||||
|
value_writer(ostr, value, nested_tuple.getColumn(1), i);
|
||||||
|
}
|
||||||
writeChar('}', ostr);
|
writeChar('}', ostr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -221,10 +238,13 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri
|
|||||||
{
|
{
|
||||||
auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
|
auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
|
||||||
{
|
{
|
||||||
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
|
subcolumn_serialization->serializeText(subcolumn, pos, buf, settings);
|
||||||
|
else
|
||||||
|
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
|
||||||
};
|
};
|
||||||
|
|
||||||
serializeTextImpl(column, row_num, ostr, writer, writer);
|
serializeTextImpl(column, row_num, settings, ostr, writer, writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||||
@ -266,7 +286,7 @@ bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, c
|
|||||||
|
|
||||||
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||||
{
|
{
|
||||||
serializeTextImpl(column, row_num, ostr,
|
serializeTextImpl(column, row_num, settings, ostr,
|
||||||
[&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
|
[&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
|
||||||
{
|
{
|
||||||
/// We need to double-quote all keys (including integers) to produce valid JSON.
|
/// We need to double-quote all keys (including integers) to produce valid JSON.
|
||||||
|
@ -70,7 +70,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename KeyWriter, typename ValueWriter>
|
template <typename KeyWriter, typename ValueWriter>
|
||||||
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
|
void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
|
||||||
|
|
||||||
template <typename ReturnType = void, typename Reader>
|
template <typename ReturnType = void, typename Reader>
|
||||||
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
|
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
|
||||||
|
@ -137,12 +137,25 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr,
|
|||||||
void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||||
{
|
{
|
||||||
writeChar('(', ostr);
|
writeChar('(', ostr);
|
||||||
for (size_t i = 0; i < elems.size(); ++i)
|
if (!elems.empty())
|
||||||
{
|
{
|
||||||
if (i != 0)
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
writeChar(',', ostr);
|
elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings);
|
||||||
elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
|
else
|
||||||
|
elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings);
|
||||||
}
|
}
|
||||||
|
if (settings.composed_data_type_output_format_mode == "spark")
|
||||||
|
for (size_t i = 1; i < elems.size(); ++i)
|
||||||
|
{
|
||||||
|
writeString(std::string_view(", "), ostr);
|
||||||
|
elems[i]->serializeText(extractElementColumn(column, i), row_num, ostr, settings);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for (size_t i = 1; i < elems.size(); ++i)
|
||||||
|
{
|
||||||
|
writeChar(',', ostr);
|
||||||
|
elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
|
||||||
|
}
|
||||||
writeChar(')', ostr);
|
writeChar(')', ostr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,6 +251,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
|||||||
format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions];
|
format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions];
|
||||||
format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions];
|
format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions];
|
||||||
format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote];
|
format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote];
|
||||||
|
format_settings.composed_data_type_output_format_mode = settings[Setting::composed_data_type_output_format_mode];
|
||||||
format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header];
|
format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header];
|
||||||
format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header];
|
format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header];
|
||||||
format_settings.write_statistics = settings[Setting::output_format_write_statistics];
|
format_settings.write_statistics = settings[Setting::output_format_write_statistics];
|
||||||
|
@ -38,6 +38,7 @@ struct FormatSettings
|
|||||||
bool try_infer_variant = false;
|
bool try_infer_variant = false;
|
||||||
|
|
||||||
bool seekable_read = true;
|
bool seekable_read = true;
|
||||||
|
String composed_data_type_output_format_mode = "default";
|
||||||
UInt64 max_rows_to_read_for_schema_inference = 25000;
|
UInt64 max_rows_to_read_for_schema_inference = 25000;
|
||||||
UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024;
|
UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024;
|
||||||
|
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
-- array format --
|
||||||
|
[\'1\']
|
||||||
|
[1, 2, abc, \'1\']
|
||||||
|
[1, 2, abc, \'1\']
|
||||||
|
[1, 2, abc, \'1\']
|
||||||
|
[1, 2, abc, \'1\']
|
||||||
|
-- map format --
|
||||||
|
{1343 -> fe, afe -> fefe}
|
||||||
|
{1343 -> fe, afe -> fefe}
|
||||||
|
{1343 -> fe, afe -> fefe}
|
||||||
|
{1343 -> fe, afe -> fefe}
|
||||||
|
-- tuple format --
|
||||||
|
(1, 3, abc)
|
||||||
|
(1, 3, abc)
|
||||||
|
(1, 3, abc)
|
||||||
|
(1, 3, abc)
|
18
tests/queries/0_stateless/03259_to_string_spark_format.sql
Normal file
18
tests/queries/0_stateless/03259_to_string_spark_format.sql
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
SELECT '-- array format --';
|
||||||
|
SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
|
||||||
|
SELECT '-- map format --';
|
||||||
|
SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
|
||||||
|
SELECT '-- tuple format --';
|
||||||
|
SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
|
||||||
|
SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = 'spark';
|
Loading…
Reference in New Issue
Block a user