From f413ccd75912b8d9abeec0f3ebfbca659bdd972a Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 23 Oct 2024 16:35:57 +0800 Subject: [PATCH 01/10] cast support spark text output --- src/Core/FormatFactorySettingsDeclaration.h | 3 ++ .../Serializations/SerializationArray.cpp | 19 ++++++--- .../Serializations/SerializationMap.cpp | 41 +++++++++++++++---- .../Serializations/SerializationMap.h | 3 +- .../Serializations/SerializationTuple.cpp | 18 ++++++-- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../03259_to_string_spark_format.reference | 16 ++++++++ .../03259_to_string_spark_format.sql | 18 ++++++++ 9 files changed, 103 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/03259_to_string_spark_format.reference create mode 100644 tests/queries/0_stateless/03259_to_string_spark_format.sql diff --git a/src/Core/FormatFactorySettingsDeclaration.h b/src/Core/FormatFactorySettingsDeclaration.h index 28fae8e305f..9b4b89c9b78 100644 --- a/src/Core/FormatFactorySettingsDeclaration.h +++ b/src/Core/FormatFactorySettingsDeclaration.h @@ -1230,6 +1230,9 @@ Set the quoting rule for identifiers in SHOW CREATE query )", 0) \ M(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"( Set the quoting style for identifiers in SHOW CREATE query +)", 0) \ + M(Bool, spark_text_output_format, false, R"( +If true, use Apache Spark output format )", 0) \ // End of FORMAT_FACTORY_SETTINGS diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index b7c002c95fc..a1bdfa5e68a 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -401,7 +401,7 @@ void SerializationArray::deserializeBinaryBulkWithMultipleStreams( template -static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested) +static void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, Writer && write_nested) { const ColumnArray & column_array = assert_cast(column); const ColumnArray::Offsets & offsets = column_array.getOffsets(); @@ -412,10 +412,14 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe const IColumn & nested_column = column_array.getData(); writeChar('[', ostr); - for (size_t i = offset; i < next_offset; ++i) + + if (next_offset != offset) + write_nested(nested_column, offset); + for (size_t i = offset + 1; i < next_offset; ++i) { - if (i != offset) - writeChar(',', ostr); + writeChar(',', ostr); + if (settings.spark_text_output_format) + writeChar(' ', ostr); write_nested(nested_column, i); } writeChar(']', ostr); @@ -520,10 +524,13 @@ static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reade void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - serializeTextImpl(column, row_num, ostr, + serializeTextImpl(column, row_num, settings, ostr, [&](const IColumn & nested_column, size_t i) { - nested->serializeTextQuoted(nested_column, i, ostr, settings); + if (settings.spark_text_output_format) + nested->serializeText(nested_column, i, ostr, settings); + else + nested->serializeTextQuoted(nested_column, i, ostr, settings); }); } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 6538589e4f8..b7140de2893 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -90,6 +90,7 @@ template void SerializationMap::serializeTextImpl( const IColumn & column, size_t row_num, + const FormatSettings & settings, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const @@ -104,13 +105,36 @@ void SerializationMap::serializeTextImpl( size_t next_offset = offsets[row_num]; writeChar('{', ostr); - for (size_t i = offset; i < next_offset; ++i) + if (offset != next_offset) { - if (i != offset) - writeChar(',', ostr); + key_writer(ostr, key, nested_tuple.getColumn(0), offset); + if (settings.spark_text_output_format) + { + writeChar(' ', ostr); + writeChar('-', ostr); + writeChar('>', ostr); + writeChar(' ', ostr); + } + else + writeChar(':', ostr); + value_writer(ostr, value, nested_tuple.getColumn(1), offset); + } + for (size_t i = offset + 1; i < next_offset; ++i) + { + writeChar(',', ostr); + if (settings.spark_text_output_format) + writeChar(' ', ostr); key_writer(ostr, key, nested_tuple.getColumn(0), i); - writeChar(':', ostr); + if (settings.spark_text_output_format) + { + writeChar(' ', ostr); + writeChar('-', ostr); + writeChar('>', ostr); + writeChar(' ', ostr); + } + else + writeChar(':', ostr); value_writer(ostr, value, nested_tuple.getColumn(1), i); } writeChar('}', ostr); @@ -221,10 +245,13 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri { auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos) { - subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings); + if (settings.spark_text_output_format) + subcolumn_serialization->serializeText(subcolumn, pos, buf, settings); + else + subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings); }; - serializeTextImpl(column, row_num, ostr, writer, writer); + serializeTextImpl(column, row_num, settings, ostr, writer, writer); } void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const @@ -266,7 +293,7 @@ bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, c void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - serializeTextImpl(column, row_num, ostr, + serializeTextImpl(column, row_num, settings, ostr, [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos) { /// We need to double-quote all keys (including integers) to produce valid JSON. diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 007d153ec7e..49e31756286 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Formats/FormatSettings.h" namespace DB @@ -70,7 +71,7 @@ public: private: template - void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const; + void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const; template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 366949e7ac0..6ccb2cfd604 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -137,11 +137,23 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('(', ostr); + if (elems.size()) + { + if (settings.spark_text_output_format) + elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings); + else + elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings); + } for (size_t i = 0; i < elems.size(); ++i) { - if (i != 0) - writeChar(',', ostr); - elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings); + writeChar(',', ostr); + if (settings.spark_text_output_format) + { + writeChar(' ', ostr); + elems[i]->serializeText(extractElementColumn(column, i), row_num, ostr, settings); + } + else + elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings); } writeChar(')', ostr); } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f2142b857cf..38104eebd5b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -249,6 +249,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions]; format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions]; format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote]; + format_settings.spark_text_output_format = settings[Setting::spark_text_output_format]; format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header]; format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header]; format_settings.write_statistics = settings[Setting::output_format_write_statistics]; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8f551f54e7f..a8c5a11944f 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -38,6 +38,7 @@ struct FormatSettings bool try_infer_variant = false; bool seekable_read = true; + bool spark_text_output_format = false; UInt64 max_rows_to_read_for_schema_inference = 25000; UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024; diff --git a/tests/queries/0_stateless/03259_to_string_spark_format.reference b/tests/queries/0_stateless/03259_to_string_spark_format.reference new file mode 100644 index 00000000000..b9c59143d24 --- /dev/null +++ b/tests/queries/0_stateless/03259_to_string_spark_format.reference @@ -0,0 +1,16 @@ +-- array format +[\'1\'] +[1, 2, abc, \'1\'] +[1, 2, abc, \'1\'] +[1, 2, abc, \'1\'] +[1, 2, abc, \'1\'] +-- map format +{1343 -> fe, afe -> fefe} +{1343 -> fe, afe -> fefe} +{1343 -> fe, afe -> fefe} +{1343 -> fe, afe -> fefe} +-- tuple format +(1, 1, 3, abc) +(1, 1, 3, abc) +(1, 1, 3, abc) +(1, 1, 3, abc) diff --git a/tests/queries/0_stateless/03259_to_string_spark_format.sql b/tests/queries/0_stateless/03259_to_string_spark_format.sql new file mode 100644 index 00000000000..a202d417a64 --- /dev/null +++ b/tests/queries/0_stateless/03259_to_string_spark_format.sql @@ -0,0 +1,18 @@ +SELECT '-- array format'; +SELECT CAST(array('\'1\'') , 'String') SETTINGS spark_text_output_format=1; +SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS spark_text_output_format = 1; +SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS spark_text_output_format = 1; +SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS spark_text_output_format = 1; +SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS spark_text_output_format = 1; + +SELECT '-- map format'; +SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS spark_text_output_format = 1; +SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS spark_text_output_format = 1; +SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS spark_text_output_format = 1; +SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS spark_text_output_format = 1; + +SELECT '-- tuple format'; +SELECT toString(('1', '3', 'abc')) SETTINGS spark_text_output_format = 1; +SELECT toString((materialize('1'), '3', 'abc')) SETTINGS spark_text_output_format = 1; +SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS spark_text_output_format = 1; +SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS spark_text_output_format = 1; From d73c302d44b506320baaeb6b386c56a1a74920a4 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 23 Oct 2024 17:16:41 +0800 Subject: [PATCH 02/10] clean code --- src/DataTypes/Serializations/SerializationMap.cpp | 14 ++------------ src/DataTypes/Serializations/SerializationMap.h | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index b7140de2893..737b357a565 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -109,12 +109,7 @@ void SerializationMap::serializeTextImpl( { key_writer(ostr, key, nested_tuple.getColumn(0), offset); if (settings.spark_text_output_format) - { - writeChar(' ', ostr); - writeChar('-', ostr); - writeChar('>', ostr); - writeChar(' ', ostr); - } + writeString(std::string_view(" -> "), ostr); else writeChar(':', ostr); value_writer(ostr, value, nested_tuple.getColumn(1), offset); @@ -127,12 +122,7 @@ void SerializationMap::serializeTextImpl( key_writer(ostr, key, nested_tuple.getColumn(0), i); if (settings.spark_text_output_format) - { - writeChar(' ', ostr); - writeChar('-', ostr); - writeChar('>', ostr); - writeChar(' ', ostr); - } + writeString(std::string_view(" -> "), ostr); else writeChar(':', ostr); value_writer(ostr, value, nested_tuple.getColumn(1), i); diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 49e31756286..ad6f7808cb6 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -1,7 +1,7 @@ #pragma once #include -#include "Formats/FormatSettings.h" +//#include namespace DB From 24a3d1574d9ce15337e9234dfab2780ada6eba68 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 23 Oct 2024 17:18:18 +0800 Subject: [PATCH 03/10] delete useless header --- src/DataTypes/Serializations/SerializationMap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index ad6f7808cb6..74229bcfb15 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -1,7 +1,6 @@ #pragma once #include -//#include namespace DB From 67d2210dd8843e748f15d22b02124801c0cce450 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 24 Oct 2024 09:30:10 +0800 Subject: [PATCH 04/10] fix tuple index bug and clean code --- .../Serializations/SerializationMap.cpp | 25 +++++++++++-------- .../Serializations/SerializationTuple.cpp | 15 +++++------ .../03259_to_string_spark_format.reference | 8 +++--- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 737b357a565..aa38ef2aab4 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -114,19 +114,22 @@ void SerializationMap::serializeTextImpl( writeChar(':', ostr); value_writer(ostr, value, nested_tuple.getColumn(1), offset); } - for (size_t i = offset + 1; i < next_offset; ++i) - { - writeChar(',', ostr); - if (settings.spark_text_output_format) - writeChar(' ', ostr); - - key_writer(ostr, key, nested_tuple.getColumn(0), i); - if (settings.spark_text_output_format) + if (settings.spark_text_output_format) + for (size_t i = offset + 1; i < next_offset; ++i) + { + writeString(std::string_view(", "), ostr); + key_writer(ostr, key, nested_tuple.getColumn(0), i); writeString(std::string_view(" -> "), ostr); - else + value_writer(ostr, value, nested_tuple.getColumn(1), i); + } + else + for (size_t i = offset + 1; i < next_offset; ++i) + { + writeChar(',', ostr); + key_writer(ostr, key, nested_tuple.getColumn(0), i); writeChar(':', ostr); - value_writer(ostr, value, nested_tuple.getColumn(1), i); - } + value_writer(ostr, value, nested_tuple.getColumn(1), i); + } writeChar('}', ostr); } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 6ccb2cfd604..b2cba0d1b71 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -144,17 +144,18 @@ void SerializationTuple::serializeText(const IColumn & column, size_t row_num, W else elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings); } - for (size_t i = 0; i < elems.size(); ++i) - { - writeChar(',', ostr); - if (settings.spark_text_output_format) + if (settings.spark_text_output_format) + for (size_t i = 1; i < elems.size(); ++i) { - writeChar(' ', ostr); + writeString(std::string_view(", "), ostr); elems[i]->serializeText(extractElementColumn(column, i), row_num, ostr, settings); } - else + else + for (size_t i = 1; i < elems.size(); ++i) + { + writeChar(',', ostr); elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings); - } + } writeChar(')', ostr); } diff --git a/tests/queries/0_stateless/03259_to_string_spark_format.reference b/tests/queries/0_stateless/03259_to_string_spark_format.reference index b9c59143d24..14f5b157ca8 100644 --- a/tests/queries/0_stateless/03259_to_string_spark_format.reference +++ b/tests/queries/0_stateless/03259_to_string_spark_format.reference @@ -10,7 +10,7 @@ {1343 -> fe, afe -> fefe} {1343 -> fe, afe -> fefe} -- tuple format -(1, 1, 3, abc) -(1, 1, 3, abc) -(1, 1, 3, abc) -(1, 1, 3, abc) +(1, 3, abc) +(1, 3, abc) +(1, 3, abc) +(1, 3, abc) From 17c88bf5a26f356e883405c58e0d5bcc2091d0b0 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 24 Oct 2024 11:24:58 +0800 Subject: [PATCH 05/10] add SettingsChangesHistory --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 0cec234b7cd..9406422dcb6 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -98,6 +98,7 @@ static std::initializer_list Date: Thu, 24 Oct 2024 18:18:25 +0800 Subject: [PATCH 06/10] fix debug build fail --- src/DataTypes/Serializations/SerializationTuple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index b2cba0d1b71..c0eff79037c 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -137,7 +137,7 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('(', ostr); - if (elems.size()) + if (!elems.empty()) { if (settings.spark_text_output_format) elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings); From d592bd147dbc0ab3ad24dcc3d71a920e55ade9db Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Fri, 25 Oct 2024 09:22:59 +0800 Subject: [PATCH 07/10] fix build after refactor --- src/Core/FormatFactorySettings.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/FormatFactorySettings.h b/src/Core/FormatFactorySettings.h index a095bffc4c9..208ad66611a 100644 --- a/src/Core/FormatFactorySettings.h +++ b/src/Core/FormatFactorySettings.h @@ -1242,6 +1242,9 @@ Set the quoting rule for identifiers in SHOW CREATE query )", 0) \ DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"( Set the quoting style for identifiers in SHOW CREATE query +)", 0) \ + DECLARE(Bool, spark_text_output_format, false, R"( +Set whether use Apache Spark output text format for arrays, map and tuples. )", 0) \ // End of FORMAT_FACTORY_SETTINGS From 7dbead934c602f3e6c0ec9da80a726208ec2d639 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 20 Nov 2024 18:25:41 +0800 Subject: [PATCH 08/10] add composed_data_type_output_format_mode --- src/Core/FormatFactorySettings.h | 4 +-- src/Core/SettingsChangesHistory.cpp | 2 +- .../Serializations/SerializationArray.cpp | 4 +-- .../Serializations/SerializationMap.cpp | 6 ++-- .../Serializations/SerializationTuple.cpp | 4 +-- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- .../03259_to_string_spark_format.sql | 32 +++++++++---------- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/Core/FormatFactorySettings.h b/src/Core/FormatFactorySettings.h index 208ad66611a..45f8d9bb9b6 100644 --- a/src/Core/FormatFactorySettings.h +++ b/src/Core/FormatFactorySettings.h @@ -1243,8 +1243,8 @@ Set the quoting rule for identifiers in SHOW CREATE query DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"( Set the quoting style for identifiers in SHOW CREATE query )", 0) \ - DECLARE(Bool, spark_text_output_format, false, R"( -Set whether use Apache Spark output text format for arrays, map and tuples. + DECLARE(String, composed_data_type_output_format_mode, "default", R"( +Set composed data type output format mode, default or spark. )", 0) \ // End of FORMAT_FACTORY_SETTINGS diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 0a2ce0e13b1..8a73de3e422 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -112,7 +112,7 @@ static std::initializer_listserializeText(nested_column, i, ostr, settings); else nested->serializeTextQuoted(nested_column, i, ostr, settings); diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index aa38ef2aab4..c51255d1dce 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -108,13 +108,13 @@ void SerializationMap::serializeTextImpl( if (offset != next_offset) { key_writer(ostr, key, nested_tuple.getColumn(0), offset); - if (settings.spark_text_output_format) + if (settings.composed_data_type_output_format_mode == "spark") writeString(std::string_view(" -> "), ostr); else writeChar(':', ostr); value_writer(ostr, value, nested_tuple.getColumn(1), offset); } - if (settings.spark_text_output_format) + if (settings.composed_data_type_output_format_mode == "spark") for (size_t i = offset + 1; i < next_offset; ++i) { writeString(std::string_view(", "), ostr); @@ -238,7 +238,7 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri { auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos) { - if (settings.spark_text_output_format) + if (settings.composed_data_type_output_format_mode == "spark") subcolumn_serialization->serializeText(subcolumn, pos, buf, settings); else subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings); diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index c0eff79037c..ec92ceb9110 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -139,12 +139,12 @@ void SerializationTuple::serializeText(const IColumn & column, size_t row_num, W writeChar('(', ostr); if (!elems.empty()) { - if (settings.spark_text_output_format) + if (settings.composed_data_type_output_format_mode == "spark") elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings); else elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings); } - if (settings.spark_text_output_format) + if (settings.composed_data_type_output_format_mode == "spark") for (size_t i = 1; i < elems.size(); ++i) { writeString(std::string_view(", "), ostr); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 813a550a124..fa464b9fb7c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -251,7 +251,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions]; format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions]; format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote]; - format_settings.spark_text_output_format = settings[Setting::spark_text_output_format]; + format_settings.composed_data_type_output_format_mode = settings[Setting::composed_data_type_output_format_mode]; format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header]; format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header]; format_settings.write_statistics = settings[Setting::output_format_write_statistics]; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index c726b918a19..b81616bf017 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -38,7 +38,7 @@ struct FormatSettings bool try_infer_variant = false; bool seekable_read = true; - bool spark_text_output_format = false; + String composed_data_type_output_format_mode = "default"; UInt64 max_rows_to_read_for_schema_inference = 25000; UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024; diff --git a/tests/queries/0_stateless/03259_to_string_spark_format.sql b/tests/queries/0_stateless/03259_to_string_spark_format.sql index a202d417a64..ff9682c7571 100644 --- a/tests/queries/0_stateless/03259_to_string_spark_format.sql +++ b/tests/queries/0_stateless/03259_to_string_spark_format.sql @@ -1,18 +1,18 @@ -SELECT '-- array format'; -SELECT CAST(array('\'1\'') , 'String') SETTINGS spark_text_output_format=1; -SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS spark_text_output_format = 1; -SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS spark_text_output_format = 1; -SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS spark_text_output_format = 1; -SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS spark_text_output_format = 1; +SELECT '-- array format --'; +SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT '-- map format'; -SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS spark_text_output_format = 1; -SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS spark_text_output_format = 1; -SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS spark_text_output_format = 1; -SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS spark_text_output_format = 1; +SELECT '-- map format --'; +SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT '-- tuple format'; -SELECT toString(('1', '3', 'abc')) SETTINGS spark_text_output_format = 1; -SELECT toString((materialize('1'), '3', 'abc')) SETTINGS spark_text_output_format = 1; -SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS spark_text_output_format = 1; -SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS spark_text_output_format = 1; +SELECT '-- tuple format --'; +SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = "spark"; From de626a88c8b1af2e2bbc97124aaacaadeb1b64c4 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 20 Nov 2024 18:31:30 +0800 Subject: [PATCH 09/10] fix compile error --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6b92d1e8954..d320bf88536 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -137,7 +137,7 @@ static std::initializer_list Date: Thu, 21 Nov 2024 08:50:55 +0800 Subject: [PATCH 10/10] add composed_data_type_output_format_mode --- src/Core/SettingsChangesHistory.cpp | 2 +- .../03259_to_string_spark_format.reference | 6 ++--- .../03259_to_string_spark_format.sql | 26 +++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index d320bf88536..a892e077ecd 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -138,7 +138,7 @@ static std::initializer_list fe, afe -> fefe} {1343 -> fe, afe -> fefe} {1343 -> fe, afe -> fefe} {1343 -> fe, afe -> fefe} --- tuple format +-- tuple format -- (1, 3, abc) (1, 3, abc) (1, 3, abc) diff --git a/tests/queries/0_stateless/03259_to_string_spark_format.sql b/tests/queries/0_stateless/03259_to_string_spark_format.sql index ff9682c7571..7a657f803bd 100644 --- a/tests/queries/0_stateless/03259_to_string_spark_format.sql +++ b/tests/queries/0_stateless/03259_to_string_spark_format.sql @@ -1,18 +1,18 @@ SELECT '-- array format --'; -SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = 'spark'; SELECT '-- map format --'; -SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = 'spark'; SELECT '-- tuple format --'; -SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = "spark"; -SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = "spark"; +SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark'; +SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = 'spark';