ClickHouse/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp

283 lines
11 KiB
C++
Raw Normal View History

2019-08-23 19:47:22 +00:00
#include <Processors/Formats/Impl/TemplateBlockOutputFormat.h>
2019-02-10 15:42:56 +00:00
#include <Formats/FormatFactory.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypesNumber.h>
2019-09-24 19:56:45 +00:00
#include <Interpreters/Context.h>
2019-02-10 15:42:56 +00:00
namespace DB
{
2019-02-10 15:42:56 +00:00
namespace ErrorCodes
{
2019-08-29 19:29:54 +00:00
extern const int SYNTAX_ERROR;
2019-02-10 15:42:56 +00:00
}
2019-09-24 14:25:22 +00:00
TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_,
ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_,
std::string row_between_delimiter_)
: IOutputFormat(header_, out_), settings(settings_), format(std::move(format_))
, row_format(std::move(row_format_)), row_between_delimiter(std::move(row_between_delimiter_))
2019-04-07 21:30:54 +00:00
{
2020-04-22 06:34:20 +00:00
const auto & sample = getPort(PortKind::Main).getHeader();
2019-08-23 19:47:22 +00:00
size_t columns = sample.columns();
2021-03-09 14:46:52 +00:00
serializations.resize(columns);
2019-08-23 19:47:22 +00:00
for (size_t i = 0; i < columns; ++i)
2021-03-09 14:46:52 +00:00
serializations[i] = sample.safeGetByPosition(i).type->getDefaultSerialization();
2019-08-23 19:47:22 +00:00
2019-08-26 13:02:15 +00:00
/// Validate format string for whole output
size_t data_idx = format.format_idx_to_column_idx.size() + 1;
2019-04-07 21:30:54 +00:00
for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i)
{
2019-08-26 13:02:15 +00:00
if (!format.format_idx_to_column_idx[i])
format.throwInvalidFormat("Output part name cannot be empty.", i);
switch (*format.format_idx_to_column_idx[i])
2019-04-07 21:30:54 +00:00
{
2019-09-24 14:25:22 +00:00
case static_cast<size_t>(ResultsetPart::Data):
2019-08-26 13:02:15 +00:00
data_idx = i;
[[fallthrough]];
2019-09-24 14:25:22 +00:00
case static_cast<size_t>(ResultsetPart::Totals):
case static_cast<size_t>(ResultsetPart::ExtremesMin):
case static_cast<size_t>(ResultsetPart::ExtremesMax):
if (format.formats[i] != ColumnFormat::None)
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("Serialization type for data, totals, min and max must be empty or None", i);
2019-04-07 21:30:54 +00:00
break;
2019-09-24 14:25:22 +00:00
case static_cast<size_t>(ResultsetPart::Rows):
case static_cast<size_t>(ResultsetPart::RowsBeforeLimit):
case static_cast<size_t>(ResultsetPart::TimeElapsed):
case static_cast<size_t>(ResultsetPart::RowsRead):
case static_cast<size_t>(ResultsetPart::BytesRead):
if (format.formats[i] == ColumnFormat::None)
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("Serialization type for output part rows, rows_before_limit, time, "
"rows_read or bytes_read is not specified", i);
2019-04-07 21:30:54 +00:00
break;
default:
format.throwInvalidFormat("Invalid output part", i);
2019-04-07 21:30:54 +00:00
}
}
2019-08-26 13:02:15 +00:00
if (data_idx != 0)
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("${data} must be the first output part", 0);
2019-04-07 21:30:54 +00:00
2019-08-26 13:02:15 +00:00
/// Validate format string for rows
2019-04-07 21:30:54 +00:00
if (row_format.delimiters.size() == 1)
2019-08-29 19:29:54 +00:00
row_format.throwInvalidFormat("No columns specified", 0);
for (size_t i = 0; i < row_format.columnsCount(); ++i)
{
if (!row_format.format_idx_to_column_idx[i])
2019-08-29 19:29:54 +00:00
row_format.throwInvalidFormat("Cannot skip format field for output, it's a bug.", i);
if (header_.columns() <= *row_format.format_idx_to_column_idx[i])
row_format.throwInvalidFormat("Column index " + std::to_string(*row_format.format_idx_to_column_idx[i]) +
" must be less then number of columns (" + std::to_string(header_.columns()) + ")", i);
if (row_format.formats[i] == ColumnFormat::None)
2019-08-29 19:29:54 +00:00
row_format.throwInvalidFormat("Serialization type for file column is not specified", i);
}
2019-04-07 21:30:54 +00:00
}
2019-09-24 14:25:22 +00:00
TemplateBlockOutputFormat::ResultsetPart TemplateBlockOutputFormat::stringToResultsetPart(const String & part)
{
2019-04-17 18:10:24 +00:00
if (part == "data")
2019-09-24 14:25:22 +00:00
return ResultsetPart::Data;
else if (part == "totals")
2019-09-24 14:25:22 +00:00
return ResultsetPart::Totals;
else if (part == "min")
2019-09-24 14:25:22 +00:00
return ResultsetPart::ExtremesMin;
else if (part == "max")
2019-09-24 14:25:22 +00:00
return ResultsetPart::ExtremesMax;
else if (part == "rows")
2019-09-24 14:25:22 +00:00
return ResultsetPart::Rows;
else if (part == "rows_before_limit")
2019-09-24 14:25:22 +00:00
return ResultsetPart::RowsBeforeLimit;
else if (part == "time")
2019-09-24 14:25:22 +00:00
return ResultsetPart::TimeElapsed;
else if (part == "rows_read")
2019-09-24 14:25:22 +00:00
return ResultsetPart::RowsRead;
else if (part == "bytes_read")
2019-09-24 14:25:22 +00:00
return ResultsetPart::BytesRead;
else
2019-08-29 19:29:54 +00:00
throw Exception("Unknown output part " + part, ErrorCodes::SYNTAX_ERROR);
2019-02-10 15:42:56 +00:00
}
2019-08-23 19:47:22 +00:00
void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num)
2019-02-10 15:42:56 +00:00
{
size_t columns = row_format.format_idx_to_column_idx.size();
for (size_t j = 0; j < columns; ++j)
{
2019-08-23 19:47:22 +00:00
writeString(row_format.delimiters[j], out);
2019-08-26 13:02:15 +00:00
size_t col_idx = *row_format.format_idx_to_column_idx[j];
2021-03-09 14:46:52 +00:00
serializeField(*chunk.getColumns()[col_idx], *serializations[col_idx], row_num, row_format.formats[j]);
}
2019-08-23 19:47:22 +00:00
writeString(row_format.delimiters[columns], out);
}
2021-03-09 14:46:52 +00:00
void TemplateBlockOutputFormat::serializeField(const IColumn & column, const ISerialization & serialization, size_t row_num, ColumnFormat col_format)
{
switch (col_format)
2019-02-10 15:42:56 +00:00
{
case ColumnFormat::Escaped:
2021-03-09 14:46:52 +00:00
serialization.serializeTextEscaped(column, row_num, out, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Quoted:
2021-03-09 14:46:52 +00:00
serialization.serializeTextQuoted(column, row_num, out, settings);
2019-02-10 15:42:56 +00:00
break;
2019-04-17 20:15:57 +00:00
case ColumnFormat::Csv:
2021-03-09 14:46:52 +00:00
serialization.serializeTextCSV(column, row_num, out, settings);
2019-04-17 20:15:57 +00:00
break;
2019-02-10 15:42:56 +00:00
case ColumnFormat::Json:
2021-03-09 14:46:52 +00:00
serialization.serializeTextJSON(column, row_num, out, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Xml:
2021-03-09 14:46:52 +00:00
serialization.serializeTextXML(column, row_num, out, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Raw:
2021-03-09 14:46:52 +00:00
serialization.serializeText(column, row_num, out, settings);
2019-02-10 15:42:56 +00:00
break;
default:
__builtin_unreachable();
2019-02-10 15:42:56 +00:00
}
}
2019-08-23 19:47:22 +00:00
template <typename U, typename V> void TemplateBlockOutputFormat::writeValue(U value, ColumnFormat col_format)
{
auto type = std::make_unique<V>();
auto col = type->createColumn();
col->insert(value);
2021-03-09 14:46:52 +00:00
serializeField(*col, *type->getDefaultSerialization(), 0, col_format);
}
2019-08-23 19:47:22 +00:00
void TemplateBlockOutputFormat::consume(Chunk chunk)
2019-02-10 15:42:56 +00:00
{
2019-08-23 19:47:22 +00:00
doWritePrefix();
size_t rows = chunk.getNumRows();
for (size_t i = 0; i < rows; ++i)
2019-02-10 15:42:56 +00:00
{
if (row_count)
writeString(row_between_delimiter, out);
2019-02-10 15:42:56 +00:00
2019-08-23 19:47:22 +00:00
writeRow(chunk, i);
++row_count;
2019-02-10 15:42:56 +00:00
}
}
2019-08-23 19:47:22 +00:00
void TemplateBlockOutputFormat::doWritePrefix()
2019-02-10 15:42:56 +00:00
{
2019-08-23 19:47:22 +00:00
if (need_write_prefix)
{
writeString(format.delimiters.front(), out);
need_write_prefix = false;
}
}
2019-02-10 15:42:56 +00:00
2019-08-23 19:47:22 +00:00
void TemplateBlockOutputFormat::finalize()
{
2019-08-23 19:47:22 +00:00
if (finalized)
return;
doWritePrefix();
size_t parts = format.format_idx_to_column_idx.size();
2019-08-29 19:29:54 +00:00
for (size_t i = 0; i < parts; ++i)
{
auto type = std::make_shared<DataTypeUInt64>();
ColumnWithTypeAndName col(type->createColumnConst(1, row_count), type, String("tmp"));
2019-09-24 14:25:22 +00:00
switch (static_cast<ResultsetPart>(*format.format_idx_to_column_idx[i]))
{
2019-09-24 14:25:22 +00:00
case ResultsetPart::Totals:
2020-09-04 12:34:36 +00:00
if (!totals || !totals.hasRows())
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("Cannot print totals for this request", i);
writeRow(totals, 0);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::ExtremesMin:
if (!extremes)
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("Cannot print extremes for this request", i);
writeRow(extremes, 0);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::ExtremesMax:
if (!extremes)
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("Cannot print extremes for this request", i);
writeRow(extremes, 1);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::Rows:
2019-08-29 19:29:54 +00:00
writeValue<size_t, DataTypeUInt64>(row_count, format.formats[i]);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::RowsBeforeLimit:
2019-04-17 18:10:24 +00:00
if (!rows_before_limit_set)
2019-08-29 19:29:54 +00:00
format.throwInvalidFormat("Cannot print rows_before_limit for this request", i);
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.formats[i]);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::TimeElapsed:
2019-08-29 19:29:54 +00:00
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.formats[i]);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::RowsRead:
2019-08-29 19:29:54 +00:00
writeValue<size_t, DataTypeUInt64>(progress.read_rows.load(), format.formats[i]);
break;
2019-09-24 14:25:22 +00:00
case ResultsetPart::BytesRead:
2019-08-29 19:29:54 +00:00
writeValue<size_t, DataTypeUInt64>(progress.read_bytes.load(), format.formats[i]);
break;
default:
break;
}
2019-08-29 19:29:54 +00:00
writeString(format.delimiters[i + 1], out);
}
2019-08-23 19:47:22 +00:00
finalized = true;
2019-02-10 15:42:56 +00:00
}
2021-10-11 16:11:50 +00:00
void registerOutputFormatTemplate(FormatFactory & factory)
2019-02-10 15:42:56 +00:00
{
2021-10-11 16:11:50 +00:00
factory.registerOutputFormat("Template", [](
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams &,
const FormatSettings & settings)
{
2019-09-24 14:25:22 +00:00
ParsedTemplateFormatString resultset_format;
if (settings.template_settings.resultset_format.empty())
{
/// Default format string: "${data}"
resultset_format.delimiters.resize(2);
resultset_format.formats.emplace_back(ParsedTemplateFormatString::ColumnFormat::None);
resultset_format.format_idx_to_column_idx.emplace_back(0);
resultset_format.column_names.emplace_back("data");
}
else
{
/// Read format string from file
resultset_format = ParsedTemplateFormatString(
FormatSchemaInfo(settings.template_settings.resultset_format, "Template", false,
settings.schema.is_server, settings.schema.format_schema_path),
2019-09-24 14:25:22 +00:00
[&](const String & partName)
{
return static_cast<size_t>(TemplateBlockOutputFormat::stringToResultsetPart(partName));
});
}
ParsedTemplateFormatString row_format = ParsedTemplateFormatString(
FormatSchemaInfo(settings.template_settings.row_format, "Template", false,
settings.schema.is_server, settings.schema.format_schema_path),
2019-09-24 14:25:22 +00:00
[&](const String & colName)
{
return sample.getPositionByName(colName);
});
return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.template_settings.row_between_delimiter);
2019-02-10 15:42:56 +00:00
});
2019-09-24 19:56:45 +00:00
2021-10-11 16:11:50 +00:00
factory.registerOutputFormat("CustomSeparated", [](
2019-09-24 19:56:45 +00:00
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams &,
2019-09-24 19:56:45 +00:00
const FormatSettings & settings)
{
ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(settings.custom);
ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(settings.custom, sample);
2019-09-24 19:56:45 +00:00
return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.custom.row_between_delimiter);
2019-09-24 19:56:45 +00:00
});
2019-02-10 15:42:56 +00:00
}
}