Allow formatting named tuples as JSON objects

This commit is contained in:
Alexander Kuzmenkov 2020-11-18 13:38:30 +03:00
parent 0bc60e2d53
commit f2b3f5f8b6
8 changed files with 123 additions and 32 deletions

View File

@ -441,6 +441,7 @@ class IColumn;
M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \ M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
\ \
M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
M(Bool, output_format_json_named_tuples_as_objects, false, "Serialize named tuple columns as JSON objects.", 0) \
\ \
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
@ -517,7 +518,7 @@ struct Settings : public BaseSettings<SettingsTraits>
}; };
/* /*
* User-specified file format settings for File and ULR engines. * User-specified file format settings for File and URL engines.
*/ */
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS) DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)

View File

@ -25,12 +25,13 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR;
extern const int EMPTY_DATA_PASSED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int DUPLICATE_COLUMN;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
extern const int EMPTY_DATA_PASSED;
extern const int LOGICAL_ERROR;
extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
} }
@ -145,6 +146,20 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
try try
{ {
impl(); impl();
// Check that all columns now have the same size.
size_t new_size = column.size();
for (auto i : ext::range(1, ext::size(elems)))
{
const auto & element_column = extractElementColumn(column, i);
if (element_column.size() != new_size)
{
// This is not a logical error because it may work with
// user-supplied data.
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
"Cannot read a tuple because not all elements are present");
}
}
} }
catch (...) catch (...)
{ {
@ -213,37 +228,93 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F
void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{ {
writeChar('[', ostr); if (settings.json.named_tuples_as_objects
for (const auto i : ext::range(0, ext::size(elems))) && have_explicit_names)
{ {
if (i != 0) writeChar('{', ostr);
writeChar(',', ostr); for (const auto i : ext::range(0, ext::size(elems)))
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); {
if (i != 0)
{
writeChar(',', ostr);
}
writeJSONString(names[i], ostr, settings);
writeChar(':', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar('}', ostr);
}
else
{
writeChar('[', ostr);
for (const auto i : ext::range(0, ext::size(elems)))
{
if (i != 0)
writeChar(',', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar(']', ostr);
} }
writeChar(']', ostr);
} }
void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
const size_t size = elems.size(); if (settings.json.named_tuples_as_objects
assertChar('[', istr); && have_explicit_names)
addElementSafe(elems, column, [&]
{ {
for (const auto i : ext::range(0, size)) skipWhitespaceIfAny(istr);
{ assertChar('{', istr);
skipWhitespaceIfAny(istr); skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
}
});
skipWhitespaceIfAny(istr); addElementSafe(elems, column, [&]
assertChar(']', istr); {
// Require all elements but in arbitrary order.
for (auto i : ext::range(0, ext::size(elems)))
{
if (i > 0)
{
skipWhitespaceIfAny(istr);
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
std::string name;
readDoubleQuotedString(name, istr);
skipWhitespaceIfAny(istr);
assertChar(':', istr);
skipWhitespaceIfAny(istr);
const size_t element_pos = getPositionByName(name);
auto & element_column = extractElementColumn(column, element_pos);
elems[element_pos]->deserializeAsTextJSON(element_column, istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar('}', istr);
}
else
{
const size_t size = elems.size();
assertChar('[', istr);
addElementSafe(elems, column, [&]
{
for (const auto i : ext::range(0, size))
{
skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar(']', istr);
}
} }
void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -79,6 +79,7 @@ FormatSettings getFormatSettings(const Context & context,
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes; format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.null_as_default = settings.input_format_null_as_default; format_settings.null_as_default = settings.input_format_null_as_default;

View File

@ -89,6 +89,7 @@ struct FormatSettings
bool quote_64bit_integers = true; bool quote_64bit_integers = true;
bool quote_denormals = true; bool quote_denormals = true;
bool escape_forward_slashes = true; bool escape_forward_slashes = true;
bool named_tuples_as_objects = false;
bool serialize_as_strings = false; bool serialize_as_strings = false;
} json; } json;

View File

@ -66,9 +66,13 @@ public:
names.emplace_back(argument.name); names.emplace_back(argument.name);
} }
/// Create named tuple if possible. /// Create named tuple if possible. We don't print tuple element names
/// because they are bad anyway -- aliases are not used, e.g. tuple(1 a)
/// will have element name '1' and not 'a'. If we ever change this, and
/// add the ability to access tuple elements by name, like tuple(1 a).a,
/// we should probably enable printing for better discoverability.
if (DataTypeTuple::canBeCreatedWithNames(names)) if (DataTypeTuple::canBeCreatedWithNames(names))
return std::make_shared<DataTypeTuple>(types, names, false); return std::make_shared<DataTypeTuple>(types, names, false /*print names*/);
return std::make_shared<DataTypeTuple>(types); return std::make_shared<DataTypeTuple>(types);
} }

View File

@ -493,8 +493,12 @@ template <char quote, bool enable_sql_style_quoting, typename Vector>
static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf) static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
{ {
if (buf.eof() || *buf.position() != quote) if (buf.eof() || *buf.position() != quote)
throw Exception("Cannot parse quoted string: expected opening quote", {
ErrorCodes::CANNOT_PARSE_QUOTED_STRING); throw Exception(ErrorCodes::CANNOT_PARSE_QUOTED_STRING,
"Cannot parse quoted string: expected opening quote '{}', got '{}'",
std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()});
}
++buf.position(); ++buf.position();
while (!buf.eof()) while (!buf.eof())

View File

@ -0,0 +1,3 @@
{"c":{"a":0,"b":0}}
{"c":{"a":1,"b":2}}
{"c":{"a":2,"b":4}}

View File

@ -0,0 +1,6 @@
create table named_tuples engine File(JSONEachRow)
settings output_format_json_named_tuples_as_objects = 1
as select cast(tuple(number, number * 2), 'Tuple(a int, b int)') c
from numbers(3);
select * from named_tuples format JSONEachRow settings output_format_json_named_tuples_as_objects = 1;