ClickHouse/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp

158 lines
4.7 KiB
C++
Raw Normal View History

2019-02-19 18:41:18 +00:00
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferValidUTF8.h>
#include <Processors/Formats/Impl/JSONEachRowRowOutputFormat.h>
#include <Formats/FormatFactory.h>
namespace DB
{
2020-09-02 04:05:02 +00:00
JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
const RowOutputFormatParams & params_,
const FormatSettings & settings_)
: IRowOutputFormat(header_, out_, params_),
settings(settings_)
2019-02-19 18:41:18 +00:00
{
2020-04-22 06:34:20 +00:00
const auto & sample = getPort(PortKind::Main).getHeader();
2019-02-19 18:41:18 +00:00
size_t columns = sample.columns();
fields.resize(columns);
for (size_t i = 0; i < columns; ++i)
{
WriteBufferFromString buf(fields[i]);
writeJSONString(sample.getByPosition(i).name, buf, settings);
}
}
2021-03-09 14:46:52 +00:00
void JSONEachRowRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num)
2019-02-19 18:41:18 +00:00
{
writeString(fields[field_number], out);
writeChar(':', out);
2020-09-02 04:05:02 +00:00
if (settings.json.serialize_as_strings)
2020-09-02 04:05:02 +00:00
{
WriteBufferFromOwnString buf;
2021-03-09 14:46:52 +00:00
serialization.serializeText(column, row_num, buf, settings);
2020-09-02 04:05:02 +00:00
writeJSONString(buf.str(), out, settings);
}
else
2021-03-09 14:46:52 +00:00
serialization.serializeTextJSON(column, row_num, out, settings);
2020-09-02 04:05:02 +00:00
2019-02-19 18:41:18 +00:00
++field_number;
}
void JSONEachRowRowOutputFormat::writeFieldDelimiter()
{
writeChar(',', out);
}
void JSONEachRowRowOutputFormat::writeRowStartDelimiter()
{
writeChar('{', out);
}
void JSONEachRowRowOutputFormat::writeRowEndDelimiter()
{
// Why do we need this weird `if`?
//
// The reason is the formatRow function that is broken with respect to
// row-between delimiters. It should not write them, but it does, and then
// hacks around it by having a special formatRowNoNewline version, which, as
// you guessed, removes the newline from the end of row. But the row-between
// delimiter goes into a second row, so it turns out to be in the beginning
// of the line, and the removal doesn't work. There is also a second bug --
// the row-between delimiter in this format is written incorrectly. In fact,
// it is not written at all, and the newline is written in a row-end
// delimiter ("}\n" instead of the correct "}"). With these two bugs
// combined, the test 01420_format_row works perfectly.
//
// A proper implementation of formatRow would use IRowOutputFormat directly,
// and not write row-between delimiters, instead of using IOutputFormat
// processor and its crutch row callback. This would require exposing
// IRowOutputFormat, which we don't do now, but which can be generally useful
// for other cases such as parallel formatting, that also require a control
// flow different from the usual IOutputFormat.
//
// I just don't have time or energy to redo all of this, but I need to
// support JSON array output here, which requires proper ",\n" row-between
// delimiters. For compatibility, I preserve the bug in case of non-array
// output.
if (settings.json.array_of_rows)
{
writeCString("}", out);
}
else
{
writeCString("}\n", out);
}
2019-02-19 18:41:18 +00:00
field_number = 0;
}
void JSONEachRowRowOutputFormat::writeRowBetweenDelimiter()
{
// We preserve an existing bug here for compatibility. See the comment above.
if (settings.json.array_of_rows)
{
writeCString(",\n", out);
}
}
void JSONEachRowRowOutputFormat::writePrefix()
{
if (settings.json.array_of_rows)
{
writeCString("[\n", out);
}
}
void JSONEachRowRowOutputFormat::writeSuffix()
{
if (settings.json.array_of_rows)
{
writeCString("\n]\n", out);
}
}
2021-10-11 16:11:50 +00:00
void registerOutputFormatJSONEachRow(FormatFactory & factory)
2019-02-19 18:41:18 +00:00
{
2021-10-11 16:11:50 +00:00
factory.registerOutputFormat("JSONEachRow", [](
2019-02-19 18:41:18 +00:00
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & _format_settings)
2019-02-19 18:41:18 +00:00
{
FormatSettings settings = _format_settings;
settings.json.serialize_as_strings = false;
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
settings);
2020-09-02 04:05:02 +00:00
});
2020-10-06 14:02:01 +00:00
factory.markOutputFormatSupportsParallelFormatting("JSONEachRow");
2020-09-02 04:05:02 +00:00
2021-10-11 16:11:50 +00:00
factory.registerOutputFormat("JSONStringsEachRow", [](
2020-09-02 04:05:02 +00:00
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & _format_settings)
2020-09-02 04:05:02 +00:00
{
FormatSettings settings = _format_settings;
settings.json.serialize_as_strings = true;
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
settings);
2019-02-19 18:41:18 +00:00
});
2020-10-06 14:02:01 +00:00
factory.markOutputFormatSupportsParallelFormatting("JSONStringEachRow");
2019-02-19 18:41:18 +00:00
}
}