mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Write rows as JSON array in JSONEachRow output format
This commit is contained in:
parent
ffb83155d7
commit
8cde88440b
@ -441,6 +441,7 @@ class IColumn;
|
||||
M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
|
||||
\
|
||||
M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
|
||||
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
|
||||
\
|
||||
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
|
||||
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
|
||||
@ -517,7 +518,7 @@ struct Settings : public BaseSettings<SettingsTraits>
|
||||
};
|
||||
|
||||
/*
|
||||
* User-specified file format settings for File and ULR engines.
|
||||
* User-specified file format settings for File and URL engines.
|
||||
*/
|
||||
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
|
||||
|
||||
|
@ -78,6 +78,7 @@ FormatSettings getFormatSettings(const Context & context,
|
||||
format_settings.import_nested_json = settings.input_format_import_nested_json;
|
||||
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
|
||||
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
|
||||
format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
|
||||
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
|
||||
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
|
||||
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
||||
|
@ -86,6 +86,7 @@ struct FormatSettings
|
||||
|
||||
struct
|
||||
{
|
||||
bool array_of_rows = false;
|
||||
bool quote_64bit_integers = true;
|
||||
bool quote_denormals = true;
|
||||
bool escape_forward_slashes = true;
|
||||
|
@ -12,9 +12,9 @@ JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(
|
||||
WriteBuffer & out_,
|
||||
const Block & header_,
|
||||
const RowOutputFormatParams & params_,
|
||||
const FormatSettings & settings_,
|
||||
bool yield_strings_)
|
||||
: IRowOutputFormat(header_, out_, params_), settings(settings_), yield_strings(yield_strings_)
|
||||
const FormatSettings & settings_)
|
||||
: IRowOutputFormat(header_, out_, params_),
|
||||
settings(settings_)
|
||||
{
|
||||
const auto & sample = getPort(PortKind::Main).getHeader();
|
||||
size_t columns = sample.columns();
|
||||
@ -33,7 +33,7 @@ void JSONEachRowRowOutputFormat::writeField(const IColumn & column, const IDataT
|
||||
writeString(fields[field_number], out);
|
||||
writeChar(':', out);
|
||||
|
||||
if (yield_strings)
|
||||
if (settings.json.serialize_as_strings)
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
|
||||
@ -61,29 +61,94 @@ void JSONEachRowRowOutputFormat::writeRowStartDelimiter()
|
||||
|
||||
void JSONEachRowRowOutputFormat::writeRowEndDelimiter()
|
||||
{
|
||||
writeCString("}\n", out);
|
||||
// Why do we need this weird `if`?
|
||||
//
|
||||
// The reason is the formatRow function that is broken with respect to
|
||||
// row-between delimiters. It should not write them, but it does, and then
|
||||
// hacks around it by having a special formatRowNoNewline version, which, as
|
||||
// you guessed, removes the newline from the end of row. But the row-between
|
||||
// delimiter goes into a second row, so it turns out to be in the beginning
|
||||
// of the line, and the removal doesn't work. There is also a second bug --
|
||||
// the row-between delimiter in this format is written incorrectly. In fact,
|
||||
// it is not written at all, and the newline is written in a row-end
|
||||
// delimiter ("}\n" instead of the correct "}"). With these two bugs
|
||||
// combined, the test 01420_format_row works perfectly.
|
||||
//
|
||||
// A proper implementation of formatRow would use IRowOutputFormat directly,
|
||||
// and not write row-between delimiters, instead of using IOutputFormat
|
||||
// processor and its crutch row callback. This would require exposing
|
||||
// IRowOutputFormat, which we don't do now, but which can be generally useful
|
||||
// for other cases such as parallel formatting, that also require a control
|
||||
// flow different from the usual IOutputFormat.
|
||||
//
|
||||
// I just don't have time or energy to redo all of this, but I need to
|
||||
// support JSON array output here, which requires proper ",\n" row-between
|
||||
// delimiters. For compatibility, I preserve the bug in case of non-array
|
||||
// output.
|
||||
if (settings.json.array_of_rows)
|
||||
{
|
||||
writeCString("}", out);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeCString("}\n", out);
|
||||
}
|
||||
field_number = 0;
|
||||
}
|
||||
|
||||
|
||||
void JSONEachRowRowOutputFormat::writeRowBetweenDelimiter()
|
||||
{
|
||||
// We preserve an existing bug here for compatibility. See the comment above.
|
||||
if (settings.json.array_of_rows)
|
||||
{
|
||||
writeCString(",\n", out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void JSONEachRowRowOutputFormat::writePrefix()
|
||||
{
|
||||
if (settings.json.array_of_rows)
|
||||
{
|
||||
writeCString("[\n", out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void JSONEachRowRowOutputFormat::writeSuffix()
|
||||
{
|
||||
if (settings.json.array_of_rows)
|
||||
{
|
||||
writeCString("\n]\n", out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
|
||||
{
|
||||
factory.registerOutputFormatProcessor("JSONEachRow", [](
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings & format_settings)
|
||||
const FormatSettings & _format_settings)
|
||||
{
|
||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params, format_settings, false);
|
||||
FormatSettings settings = _format_settings;
|
||||
settings.json.serialize_as_strings = false;
|
||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
||||
settings);
|
||||
});
|
||||
|
||||
factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings & format_settings)
|
||||
const FormatSettings & _format_settings)
|
||||
{
|
||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params, format_settings, true);
|
||||
FormatSettings settings = _format_settings;
|
||||
settings.json.serialize_as_strings = true;
|
||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
||||
settings);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,7 @@ public:
|
||||
WriteBuffer & out_,
|
||||
const Block & header_,
|
||||
const RowOutputFormatParams & params_,
|
||||
const FormatSettings & settings_,
|
||||
bool yield_strings_);
|
||||
const FormatSettings & settings_);
|
||||
|
||||
String getName() const override { return "JSONEachRowRowOutputFormat"; }
|
||||
|
||||
@ -28,6 +27,9 @@ public:
|
||||
void writeFieldDelimiter() override;
|
||||
void writeRowStartDelimiter() override;
|
||||
void writeRowEndDelimiter() override;
|
||||
void writeRowBetweenDelimiter() override;
|
||||
void writePrefix() override;
|
||||
void writeSuffix() override;
|
||||
|
||||
protected:
|
||||
/// No totals and extremes.
|
||||
@ -40,9 +42,6 @@ private:
|
||||
Names fields;
|
||||
|
||||
FormatSettings settings;
|
||||
|
||||
protected:
|
||||
bool yield_strings;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -34,18 +34,24 @@ void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factor
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings & format_settings)
|
||||
const FormatSettings & _format_settings)
|
||||
{
|
||||
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, params, format_settings, false);
|
||||
FormatSettings settings = _format_settings;
|
||||
settings.json.serialize_as_strings = false;
|
||||
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf,
|
||||
sample, params, settings);
|
||||
});
|
||||
|
||||
factory.registerOutputFormatProcessor("JSONStringsEachRowWithProgress", [](
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings & format_settings)
|
||||
const FormatSettings & _format_settings)
|
||||
{
|
||||
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, params, format_settings, true);
|
||||
FormatSettings settings = _format_settings;
|
||||
settings.json.serialize_as_strings = true;
|
||||
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf,
|
||||
sample, params, settings);
|
||||
});
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user