Merge pull request #17175 from ClickHouse/aku/named-tuple

Allow formatting named tuples as JSON objects
This commit is contained in:
Alexander Kuzmenkov 2020-11-20 12:48:24 +03:00 committed by GitHub
commit bde805cb3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 122 additions and 31 deletions

View File

@ -441,6 +441,7 @@ class IColumn;
M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
\
M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
M(Bool, output_format_json_named_tuples_as_objects, false, "Serialize named tuple columns as JSON objects.", 0) \
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
\
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \

View File

@ -25,12 +25,13 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int EMPTY_DATA_PASSED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int DUPLICATE_COLUMN;
extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
extern const int EMPTY_DATA_PASSED;
extern const int LOGICAL_ERROR;
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
}
@ -145,6 +146,20 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
try
{
impl();
// Check that all columns now have the same size.
size_t new_size = column.size();
for (auto i : ext::range(1, ext::size(elems)))
{
const auto & element_column = extractElementColumn(column, i);
if (element_column.size() != new_size)
{
// This is not a logical error because it may work with
// user-supplied data.
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
"Cannot read a tuple because not all elements are present");
}
}
}
catch (...)
{
@ -213,37 +228,93 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F
void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('[', ostr);
for (const auto i : ext::range(0, ext::size(elems)))
if (settings.json.named_tuples_as_objects
&& have_explicit_names)
{
if (i != 0)
writeChar(',', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
writeChar('{', ostr);
for (const auto i : ext::range(0, ext::size(elems)))
{
if (i != 0)
{
writeChar(',', ostr);
}
writeJSONString(names[i], ostr, settings);
writeChar(':', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar('}', ostr);
}
else
{
writeChar('[', ostr);
for (const auto i : ext::range(0, ext::size(elems)))
{
if (i != 0)
writeChar(',', ostr);
elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar(']', ostr);
}
writeChar(']', ostr);
}
void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
const size_t size = elems.size();
assertChar('[', istr);
addElementSafe(elems, column, [&]
if (settings.json.named_tuples_as_objects
&& have_explicit_names)
{
for (const auto i : ext::range(0, size))
{
skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar('{', istr);
skipWhitespaceIfAny(istr);
skipWhitespaceIfAny(istr);
assertChar(']', istr);
addElementSafe(elems, column, [&]
{
// Require all elements but in arbitrary order.
for (auto i : ext::range(0, ext::size(elems)))
{
if (i > 0)
{
skipWhitespaceIfAny(istr);
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
std::string name;
readDoubleQuotedString(name, istr);
skipWhitespaceIfAny(istr);
assertChar(':', istr);
skipWhitespaceIfAny(istr);
const size_t element_pos = getPositionByName(name);
auto & element_column = extractElementColumn(column, element_pos);
elems[element_pos]->deserializeAsTextJSON(element_column, istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar('}', istr);
}
else
{
const size_t size = elems.size();
assertChar('[', istr);
addElementSafe(elems, column, [&]
{
for (const auto i : ext::range(0, size))
{
skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
}
});
skipWhitespaceIfAny(istr);
assertChar(']', istr);
}
}
void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -80,6 +80,7 @@ FormatSettings getFormatSettings(const Context & context,
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.null_as_default = settings.input_format_null_as_default;

View File

@ -90,6 +90,7 @@ struct FormatSettings
bool quote_64bit_integers = true;
bool quote_denormals = true;
bool escape_forward_slashes = true;
bool named_tuples_as_objects = false;
bool serialize_as_strings = false;
} json;

View File

@ -66,9 +66,13 @@ public:
names.emplace_back(argument.name);
}
/// Create named tuple if possible.
/// Create named tuple if possible. We don't print tuple element names
/// because they are bad anyway -- aliases are not used, e.g. tuple(1 a)
/// will have element name '1' and not 'a'. If we ever change this, and
/// add the ability to access tuple elements by name, like tuple(1 a).a,
/// we should probably enable printing for better discoverability.
if (DataTypeTuple::canBeCreatedWithNames(names))
return std::make_shared<DataTypeTuple>(types, names, false);
return std::make_shared<DataTypeTuple>(types, names, false /*print names*/);
return std::make_shared<DataTypeTuple>(types);
}

View File

@ -493,8 +493,12 @@ template <char quote, bool enable_sql_style_quoting, typename Vector>
static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
{
if (buf.eof() || *buf.position() != quote)
throw Exception("Cannot parse quoted string: expected opening quote",
ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
{
throw Exception(ErrorCodes::CANNOT_PARSE_QUOTED_STRING,
"Cannot parse quoted string: expected opening quote '{}', got '{}'",
std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()});
}
++buf.position();
while (!buf.eof())

View File

@ -0,0 +1,3 @@
{"c":{"a":0,"b":0}}
{"c":{"a":1,"b":2}}
{"c":{"a":2,"b":4}}

View File

@ -0,0 +1,6 @@
create table named_tuples engine File(JSONEachRow)
settings output_format_json_named_tuples_as_objects = 1
as select cast(tuple(number, number * 2), 'Tuple(a int, b int)') c
from numbers(3);
select * from named_tuples format JSONEachRow settings output_format_json_named_tuples_as_objects = 1;