diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4045a61de5a..6836a597047 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -441,6 +441,7 @@ class IColumn; M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \ \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ + M(Bool, output_format_json_named_tuples_as_objects, false, "Serialize named tuple columns as JSON objects.", 0) \ M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \ \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 453cb7f37a3..02fc49f7e9a 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -25,12 +25,13 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; - extern const int EMPTY_DATA_PASSED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int DUPLICATE_COLUMN; extern const int BAD_ARGUMENTS; + extern const int DUPLICATE_COLUMN; + extern const int EMPTY_DATA_PASSED; + extern const int LOGICAL_ERROR; extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; } @@ -145,6 +146,20 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl) try { impl(); + + // Check that all columns now have the same size. + size_t new_size = column.size(); + for (auto i : ext::range(1, ext::size(elems))) + { + const auto & element_column = extractElementColumn(column, i); + if (element_column.size() != new_size) + { + // This is not a logical error because it may work with + // user-supplied data. + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH, + "Cannot read a tuple because not all elements are present"); + } + } } catch (...) { @@ -213,37 +228,93 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - writeChar('[', ostr); - for (const auto i : ext::range(0, ext::size(elems))) + if (settings.json.named_tuples_as_objects + && have_explicit_names) { - if (i != 0) - writeChar(',', ostr); - elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + writeChar('{', ostr); + for (const auto i : ext::range(0, ext::size(elems))) + { + if (i != 0) + { + writeChar(',', ostr); + } + writeJSONString(names[i], ostr, settings); + writeChar(':', ostr); + elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + } + writeChar('}', ostr); + } + else + { + writeChar('[', ostr); + for (const auto i : ext::range(0, ext::size(elems))) + { + if (i != 0) + writeChar(',', ostr); + elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + } + writeChar(']', ostr); } - writeChar(']', ostr); } void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - const size_t size = elems.size(); - assertChar('[', istr); - - addElementSafe(elems, column, [&] + if (settings.json.named_tuples_as_objects + && have_explicit_names) { - for (const auto i : ext::range(0, size)) - { - skipWhitespaceIfAny(istr); - if (i != 0) - { - assertChar(',', istr); - skipWhitespaceIfAny(istr); - } - elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings); - } - }); + skipWhitespaceIfAny(istr); + assertChar('{', istr); + skipWhitespaceIfAny(istr); - skipWhitespaceIfAny(istr); - assertChar(']', istr); + addElementSafe(elems, column, [&] + { + // Require all elements but in arbitrary order. + for (auto i : ext::range(0, ext::size(elems))) + { + if (i > 0) + { + skipWhitespaceIfAny(istr); + assertChar(',', istr); + skipWhitespaceIfAny(istr); + } + + std::string name; + readDoubleQuotedString(name, istr); + skipWhitespaceIfAny(istr); + assertChar(':', istr); + skipWhitespaceIfAny(istr); + + const size_t element_pos = getPositionByName(name); + auto & element_column = extractElementColumn(column, element_pos); + elems[element_pos]->deserializeAsTextJSON(element_column, istr, settings); + } + }); + + skipWhitespaceIfAny(istr); + assertChar('}', istr); + } + else + { + const size_t size = elems.size(); + assertChar('[', istr); + + addElementSafe(elems, column, [&] + { + for (const auto i : ext::range(0, size)) + { + skipWhitespaceIfAny(istr); + if (i != 0) + { + assertChar(',', istr); + skipWhitespaceIfAny(istr); + } + elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings); + } + }); + + skipWhitespaceIfAny(istr); + assertChar(']', istr); + } } void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index e5337ad72a7..da63151613e 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -80,6 +80,7 @@ FormatSettings getFormatSettings(const Context & context, format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; format_settings.json.array_of_rows = settings.output_format_json_array_of_rows; format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes; + format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index ead0900afc7..8fe3756e012 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -90,6 +90,7 @@ struct FormatSettings bool quote_64bit_integers = true; bool quote_denormals = true; bool escape_forward_slashes = true; + bool named_tuples_as_objects = false; bool serialize_as_strings = false; } json; diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index 6d24391ed46..e4cbde6f9d3 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -66,9 +66,13 @@ public: names.emplace_back(argument.name); } - /// Create named tuple if possible. + /// Create named tuple if possible. We don't print tuple element names + /// because they are bad anyway -- aliases are not used, e.g. tuple(1 a) + /// will have element name '1' and not 'a'. If we ever change this, and + /// add the ability to access tuple elements by name, like tuple(1 a).a, + /// we should probably enable printing for better discoverability. if (DataTypeTuple::canBeCreatedWithNames(names)) - return std::make_shared(types, names, false); + return std::make_shared(types, names, false /*print names*/); return std::make_shared(types); } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 73ee0dfcd95..8232ce0b22d 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -493,8 +493,12 @@ template static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf) { if (buf.eof() || *buf.position() != quote) - throw Exception("Cannot parse quoted string: expected opening quote", - ErrorCodes::CANNOT_PARSE_QUOTED_STRING); + { + throw Exception(ErrorCodes::CANNOT_PARSE_QUOTED_STRING, + "Cannot parse quoted string: expected opening quote '{}', got '{}'", + std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()}); + } + ++buf.position(); while (!buf.eof()) diff --git a/tests/queries/0_stateless/01499_json_named_tuples.reference b/tests/queries/0_stateless/01499_json_named_tuples.reference new file mode 100644 index 00000000000..6c7c03aa4a5 --- /dev/null +++ b/tests/queries/0_stateless/01499_json_named_tuples.reference @@ -0,0 +1,3 @@ +{"c":{"a":0,"b":0}} +{"c":{"a":1,"b":2}} +{"c":{"a":2,"b":4}} diff --git a/tests/queries/0_stateless/01499_json_named_tuples.sql b/tests/queries/0_stateless/01499_json_named_tuples.sql new file mode 100644 index 00000000000..77c667f16df --- /dev/null +++ b/tests/queries/0_stateless/01499_json_named_tuples.sql @@ -0,0 +1,6 @@ +create table named_tuples engine File(JSONEachRow) + settings output_format_json_named_tuples_as_objects = 1 + as select cast(tuple(number, number * 2), 'Tuple(a int, b int)') c + from numbers(3); + +select * from named_tuples format JSONEachRow settings output_format_json_named_tuples_as_objects = 1;