2019-08-23 19:47:22 +00:00
|
|
|
#include <Processors/Formats/Impl/TemplateBlockOutputFormat.h>
|
2019-02-10 15:42:56 +00:00
|
|
|
#include <Formats/FormatFactory.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2019-02-17 21:23:44 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2019-02-10 15:42:56 +00:00
|
|
|
|
|
|
|
|
2019-02-12 19:40:03 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2019-02-10 15:42:56 +00:00
|
|
|
|
2019-02-12 19:40:03 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2019-08-29 19:29:54 +00:00
|
|
|
extern const int SYNTAX_ERROR;
|
2019-02-10 15:42:56 +00:00
|
|
|
}
|
|
|
|
|
2019-08-30 14:38:24 +00:00
|
|
|
TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_)
|
2019-08-23 19:47:22 +00:00
|
|
|
: IOutputFormat(header_, out_), settings(settings_)
|
2019-04-07 21:30:54 +00:00
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
auto & sample = getPort(PortKind::Main).getHeader();
|
|
|
|
size_t columns = sample.columns();
|
|
|
|
types.resize(columns);
|
|
|
|
for (size_t i = 0; i < columns; ++i)
|
|
|
|
types[i] = sample.safeGetByPosition(i).type;
|
|
|
|
|
2019-08-26 13:02:15 +00:00
|
|
|
/// Parse format string for whole output
|
2019-04-17 18:10:24 +00:00
|
|
|
static const String default_format("${data}");
|
2019-04-07 21:30:54 +00:00
|
|
|
const String & format_str = settings.template_settings.format.empty() ? default_format : settings.template_settings.format;
|
2019-08-26 13:02:15 +00:00
|
|
|
format = ParsedTemplateFormatString(format_str, [&](const String & partName)
|
2019-04-07 21:30:54 +00:00
|
|
|
{
|
|
|
|
return static_cast<size_t>(stringToOutputPart(partName));
|
|
|
|
});
|
|
|
|
|
2019-08-26 13:02:15 +00:00
|
|
|
/// Validate format string for whole output
|
|
|
|
size_t data_idx = format.format_idx_to_column_idx.size() + 1;
|
2019-04-07 21:30:54 +00:00
|
|
|
for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i)
|
|
|
|
{
|
2019-08-26 13:02:15 +00:00
|
|
|
if (!format.format_idx_to_column_idx[i])
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Output part name cannot be empty, it's a bug.", i);
|
2019-08-26 13:02:15 +00:00
|
|
|
switch (static_cast<OutputPart>(*format.format_idx_to_column_idx[i]))
|
2019-04-07 21:30:54 +00:00
|
|
|
{
|
2019-04-17 18:10:24 +00:00
|
|
|
case OutputPart::Data:
|
2019-08-26 13:02:15 +00:00
|
|
|
data_idx = i;
|
|
|
|
[[fallthrough]];
|
2019-04-07 21:30:54 +00:00
|
|
|
case OutputPart::Totals:
|
|
|
|
case OutputPart::ExtremesMin:
|
|
|
|
case OutputPart::ExtremesMax:
|
2019-08-27 16:53:26 +00:00
|
|
|
if (format.formats[i] != ColumnFormat::None)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Serialization type for data, totals, min and max must be empty or None", i);
|
2019-04-07 21:30:54 +00:00
|
|
|
break;
|
|
|
|
default:
|
2019-08-27 16:53:26 +00:00
|
|
|
if (format.formats[i] == ColumnFormat::None)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Serialization type for output part rows, rows_before_limit, time, "
|
|
|
|
"rows_read or bytes_read is not specified", i);
|
2019-04-07 21:30:54 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2019-08-26 13:02:15 +00:00
|
|
|
if (data_idx != 0)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("${data} must be the first output part", 0);
|
2019-04-07 21:30:54 +00:00
|
|
|
|
2019-08-26 13:02:15 +00:00
|
|
|
/// Parse format string for rows
|
|
|
|
row_format = ParsedTemplateFormatString(settings.template_settings.row_format, [&](const String & colName)
|
2019-04-07 21:30:54 +00:00
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
return sample.getPositionByName(colName);
|
2019-04-07 21:30:54 +00:00
|
|
|
});
|
|
|
|
|
2019-08-26 13:02:15 +00:00
|
|
|
/// Validate format string for rows
|
2019-04-07 21:30:54 +00:00
|
|
|
if (row_format.delimiters.size() == 1)
|
2019-08-29 19:29:54 +00:00
|
|
|
row_format.throwInvalidFormat("No columns specified", 0);
|
2019-08-27 16:53:26 +00:00
|
|
|
for (size_t i = 0; i < row_format.columnsCount(); ++i)
|
|
|
|
{
|
|
|
|
if (!row_format.format_idx_to_column_idx[i])
|
2019-08-29 19:29:54 +00:00
|
|
|
row_format.throwInvalidFormat("Cannot skip format field for output, it's a bug.", i);
|
2019-08-27 16:53:26 +00:00
|
|
|
if (row_format.formats[i] == ColumnFormat::None)
|
2019-08-29 19:29:54 +00:00
|
|
|
row_format.throwInvalidFormat("Serialization type for file column is not specified", i);
|
2019-08-27 16:53:26 +00:00
|
|
|
}
|
2019-04-07 21:30:54 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
TemplateBlockOutputFormat::OutputPart TemplateBlockOutputFormat::stringToOutputPart(const String & part)
|
2019-02-17 21:23:44 +00:00
|
|
|
{
|
2019-04-17 18:10:24 +00:00
|
|
|
if (part == "data")
|
|
|
|
return OutputPart::Data;
|
2019-02-17 21:23:44 +00:00
|
|
|
else if (part == "totals")
|
|
|
|
return OutputPart::Totals;
|
|
|
|
else if (part == "min")
|
|
|
|
return OutputPart::ExtremesMin;
|
|
|
|
else if (part == "max")
|
|
|
|
return OutputPart::ExtremesMax;
|
|
|
|
else if (part == "rows")
|
|
|
|
return OutputPart::Rows;
|
|
|
|
else if (part == "rows_before_limit")
|
|
|
|
return OutputPart::RowsBeforeLimit;
|
|
|
|
else if (part == "time")
|
|
|
|
return OutputPart::TimeElapsed;
|
|
|
|
else if (part == "rows_read")
|
|
|
|
return OutputPart::RowsRead;
|
|
|
|
else if (part == "bytes_read")
|
|
|
|
return OutputPart::BytesRead;
|
|
|
|
else
|
2019-08-29 19:29:54 +00:00
|
|
|
throw Exception("Unknown output part " + part, ErrorCodes::SYNTAX_ERROR);
|
2019-02-10 15:42:56 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num)
|
2019-02-10 15:42:56 +00:00
|
|
|
{
|
2019-02-17 21:23:44 +00:00
|
|
|
size_t columns = row_format.format_idx_to_column_idx.size();
|
|
|
|
for (size_t j = 0; j < columns; ++j)
|
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
writeString(row_format.delimiters[j], out);
|
2019-02-17 21:23:44 +00:00
|
|
|
|
2019-08-26 13:02:15 +00:00
|
|
|
size_t col_idx = *row_format.format_idx_to_column_idx[j];
|
2019-08-23 19:47:22 +00:00
|
|
|
serializeField(*chunk.getColumns()[col_idx], *types[col_idx], row_num, row_format.formats[j]);
|
2019-02-17 21:23:44 +00:00
|
|
|
}
|
2019-08-23 19:47:22 +00:00
|
|
|
writeString(row_format.delimiters[columns], out);
|
2019-02-17 21:23:44 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
void TemplateBlockOutputFormat::serializeField(const IColumn & column, const IDataType & type, size_t row_num, ColumnFormat col_format)
|
2019-02-17 21:23:44 +00:00
|
|
|
{
|
|
|
|
switch (col_format)
|
2019-02-10 15:42:56 +00:00
|
|
|
{
|
|
|
|
case ColumnFormat::Escaped:
|
2019-08-23 19:47:22 +00:00
|
|
|
type.serializeAsTextEscaped(column, row_num, out, settings);
|
2019-02-10 15:42:56 +00:00
|
|
|
break;
|
|
|
|
case ColumnFormat::Quoted:
|
2019-08-23 19:47:22 +00:00
|
|
|
type.serializeAsTextQuoted(column, row_num, out, settings);
|
2019-02-10 15:42:56 +00:00
|
|
|
break;
|
2019-04-17 20:15:57 +00:00
|
|
|
case ColumnFormat::Csv:
|
2019-08-23 19:47:22 +00:00
|
|
|
type.serializeAsTextCSV(column, row_num, out, settings);
|
2019-04-17 20:15:57 +00:00
|
|
|
break;
|
2019-02-10 15:42:56 +00:00
|
|
|
case ColumnFormat::Json:
|
2019-08-23 19:47:22 +00:00
|
|
|
type.serializeAsTextJSON(column, row_num, out, settings);
|
2019-02-10 15:42:56 +00:00
|
|
|
break;
|
|
|
|
case ColumnFormat::Xml:
|
2019-08-23 19:47:22 +00:00
|
|
|
type.serializeAsTextXML(column, row_num, out, settings);
|
2019-02-10 15:42:56 +00:00
|
|
|
break;
|
|
|
|
case ColumnFormat::Raw:
|
2019-08-23 19:47:22 +00:00
|
|
|
type.serializeAsText(column, row_num, out, settings);
|
2019-02-10 15:42:56 +00:00
|
|
|
break;
|
2019-08-27 16:53:26 +00:00
|
|
|
default:
|
|
|
|
__builtin_unreachable();
|
2019-02-10 15:42:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
template <typename U, typename V> void TemplateBlockOutputFormat::writeValue(U value, ColumnFormat col_format)
|
2019-02-17 21:23:44 +00:00
|
|
|
{
|
|
|
|
auto type = std::make_unique<V>();
|
|
|
|
auto col = type->createColumn();
|
|
|
|
col->insert(value);
|
|
|
|
serializeField(*col, *type, 0, col_format);
|
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
void TemplateBlockOutputFormat::consume(Chunk chunk)
|
2019-02-10 15:42:56 +00:00
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
doWritePrefix();
|
|
|
|
|
|
|
|
size_t rows = chunk.getNumRows();
|
2019-02-12 19:40:03 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2019-02-10 15:42:56 +00:00
|
|
|
{
|
2019-02-17 21:23:44 +00:00
|
|
|
if (row_count)
|
2019-08-23 19:47:22 +00:00
|
|
|
writeString(settings.template_settings.row_between_delimiter, out);
|
2019-02-10 15:42:56 +00:00
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
writeRow(chunk, i);
|
2019-02-17 21:23:44 +00:00
|
|
|
++row_count;
|
2019-02-10 15:42:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
void TemplateBlockOutputFormat::doWritePrefix()
|
2019-02-10 15:42:56 +00:00
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
if (need_write_prefix)
|
|
|
|
{
|
|
|
|
writeString(format.delimiters.front(), out);
|
|
|
|
need_write_prefix = false;
|
|
|
|
}
|
2019-02-12 19:40:03 +00:00
|
|
|
}
|
2019-02-10 15:42:56 +00:00
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
void TemplateBlockOutputFormat::finalize()
|
2019-02-12 19:40:03 +00:00
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
if (finalized)
|
|
|
|
return;
|
|
|
|
|
|
|
|
doWritePrefix();
|
2019-02-17 21:23:44 +00:00
|
|
|
|
|
|
|
size_t parts = format.format_idx_to_column_idx.size();
|
|
|
|
|
2019-08-29 19:29:54 +00:00
|
|
|
for (size_t i = 0; i < parts; ++i)
|
2019-02-17 21:23:44 +00:00
|
|
|
{
|
|
|
|
auto type = std::make_shared<DataTypeUInt64>();
|
|
|
|
ColumnWithTypeAndName col(type->createColumnConst(1, row_count), type, String("tmp"));
|
2019-08-29 19:29:54 +00:00
|
|
|
switch (static_cast<OutputPart>(*format.format_idx_to_column_idx[i]))
|
2019-02-17 21:23:44 +00:00
|
|
|
{
|
|
|
|
case OutputPart::Totals:
|
|
|
|
if (!totals)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Cannot print totals for this request", i);
|
2019-02-17 21:23:44 +00:00
|
|
|
writeRow(totals, 0);
|
|
|
|
break;
|
|
|
|
case OutputPart::ExtremesMin:
|
|
|
|
if (!extremes)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Cannot print extremes for this request", i);
|
2019-02-17 21:23:44 +00:00
|
|
|
writeRow(extremes, 0);
|
|
|
|
break;
|
|
|
|
case OutputPart::ExtremesMax:
|
|
|
|
if (!extremes)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Cannot print extremes for this request", i);
|
2019-02-17 21:23:44 +00:00
|
|
|
writeRow(extremes, 1);
|
|
|
|
break;
|
|
|
|
case OutputPart::Rows:
|
2019-08-29 19:29:54 +00:00
|
|
|
writeValue<size_t, DataTypeUInt64>(row_count, format.formats[i]);
|
2019-02-17 21:23:44 +00:00
|
|
|
break;
|
|
|
|
case OutputPart::RowsBeforeLimit:
|
2019-04-17 18:10:24 +00:00
|
|
|
if (!rows_before_limit_set)
|
2019-08-29 19:29:54 +00:00
|
|
|
format.throwInvalidFormat("Cannot print rows_before_limit for this request", i);
|
|
|
|
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.formats[i]);
|
2019-02-17 21:23:44 +00:00
|
|
|
break;
|
|
|
|
case OutputPart::TimeElapsed:
|
2019-08-29 19:29:54 +00:00
|
|
|
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.formats[i]);
|
2019-02-17 21:23:44 +00:00
|
|
|
break;
|
|
|
|
case OutputPart::RowsRead:
|
2019-08-29 19:29:54 +00:00
|
|
|
writeValue<size_t, DataTypeUInt64>(progress.read_rows.load(), format.formats[i]);
|
2019-02-17 21:23:44 +00:00
|
|
|
break;
|
|
|
|
case OutputPart::BytesRead:
|
2019-08-29 19:29:54 +00:00
|
|
|
writeValue<size_t, DataTypeUInt64>(progress.read_bytes.load(), format.formats[i]);
|
2019-02-17 21:23:44 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-08-29 19:29:54 +00:00
|
|
|
writeString(format.delimiters[i + 1], out);
|
2019-02-17 21:23:44 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
finalized = true;
|
2019-02-10 15:42:56 +00:00
|
|
|
}
|
|
|
|
|
2019-02-12 19:40:03 +00:00
|
|
|
|
2019-08-23 19:47:22 +00:00
|
|
|
void registerOutputFormatProcessorTemplate(FormatFactory & factory)
|
2019-02-10 15:42:56 +00:00
|
|
|
{
|
2019-08-23 19:47:22 +00:00
|
|
|
factory.registerOutputFormatProcessor("Template", [](
|
2019-02-17 21:23:44 +00:00
|
|
|
WriteBuffer & buf,
|
|
|
|
const Block & sample,
|
|
|
|
const Context &,
|
2019-08-23 12:32:35 +00:00
|
|
|
FormatFactory::WriteCallback,
|
2019-02-17 21:23:44 +00:00
|
|
|
const FormatSettings & settings)
|
2019-02-12 19:40:03 +00:00
|
|
|
{
|
2019-08-30 14:38:24 +00:00
|
|
|
return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings);
|
2019-02-10 15:42:56 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|