ClickHouse/dbms/src/Formats/TemplateBlockOutputStream.cpp

311 lines
10 KiB
C++
Raw Normal View History

#include <Formats/TemplateBlockOutputStream.h>
2019-02-10 15:42:56 +00:00
#include <Formats/FormatFactory.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypesNumber.h>
2019-02-10 15:42:56 +00:00
namespace DB
{
2019-02-10 15:42:56 +00:00
namespace ErrorCodes
{
2019-02-10 15:42:56 +00:00
extern const int INVALID_TEMPLATE_FORMAT;
}
2019-04-07 21:30:54 +00:00
ParsedTemplateFormat::ParsedTemplateFormat(const String & format_string, const ColumnIdxGetter & idxByName)
2019-02-10 15:42:56 +00:00
{
enum ParserState
{
Delimiter,
Column,
Format
};
2019-04-07 21:30:54 +00:00
const char * pos = format_string.c_str();
2019-02-10 15:42:56 +00:00
const char * token_begin = pos;
ParserState state = Delimiter;
2019-04-07 21:30:54 +00:00
delimiters.emplace_back();
2019-02-10 15:42:56 +00:00
for (; *pos; ++pos)
{
switch (state)
{
case Delimiter:
if (*pos == '$')
{
2019-04-07 21:30:54 +00:00
delimiters.back().append(token_begin, pos - token_begin);
2019-02-10 15:42:56 +00:00
++pos;
if (*pos == '{')
{
token_begin = pos + 1;
state = Column;
}
else if (*pos == '$')
{
token_begin = pos;
}
else
{
2019-04-07 21:30:54 +00:00
throw Exception("invalid template: pos " + std::to_string(pos - format_string.c_str()) +
2019-02-10 15:42:56 +00:00
": expected '{' or '$' after '$'", ErrorCodes::INVALID_TEMPLATE_FORMAT);
}
}
break;
case Column:
if (*pos == ':')
{
size_t column_idx = idxByName(String(token_begin, pos - token_begin));
2019-04-07 21:30:54 +00:00
format_idx_to_column_idx.push_back(column_idx);
2019-02-10 15:42:56 +00:00
token_begin = pos + 1;
state = Format;
}
else if (*pos == '}')
{
size_t column_idx = idxByName(String(token_begin, pos - token_begin));
2019-04-07 21:30:54 +00:00
format_idx_to_column_idx.push_back(column_idx);
formats.push_back(ColumnFormat::Default);
delimiters.emplace_back();
2019-02-10 15:42:56 +00:00
token_begin = pos + 1;
state = Delimiter;
}
break;
case Format:
if (*pos == '}')
{
2019-04-07 21:30:54 +00:00
formats.push_back(stringToFormat(String(token_begin, pos - token_begin)));
2019-02-10 15:42:56 +00:00
token_begin = pos + 1;
2019-04-07 21:30:54 +00:00
delimiters.emplace_back();
2019-02-10 15:42:56 +00:00
state = Delimiter;
}
}
}
if (state != Delimiter)
throw Exception("invalid template: check parentheses balance", ErrorCodes::INVALID_TEMPLATE_FORMAT);
2019-04-07 21:30:54 +00:00
delimiters.back().append(token_begin, pos - token_begin);
2019-02-10 15:42:56 +00:00
}
2019-04-07 21:30:54 +00:00
ParsedTemplateFormat::ColumnFormat ParsedTemplateFormat::stringToFormat(const String & col_format)
2019-02-10 15:42:56 +00:00
{
if (col_format.empty())
2019-02-10 15:42:56 +00:00
return ColumnFormat::Default;
else if (col_format == "Escaped")
2019-02-10 15:42:56 +00:00
return ColumnFormat::Escaped;
else if (col_format == "Quoted")
2019-02-10 15:42:56 +00:00
return ColumnFormat::Quoted;
else if (col_format == "JSON")
2019-02-10 15:42:56 +00:00
return ColumnFormat::Json;
else if (col_format == "XML")
2019-02-10 15:42:56 +00:00
return ColumnFormat::Xml;
else if (col_format == "Raw")
2019-02-10 15:42:56 +00:00
return ColumnFormat::Raw;
else
throw Exception("invalid template: unknown field format " + col_format, ErrorCodes::INVALID_TEMPLATE_FORMAT);
}
2019-02-10 15:42:56 +00:00
2019-04-07 21:30:54 +00:00
size_t ParsedTemplateFormat::columnsCount() const
{
return format_idx_to_column_idx.size();
}
TemplateBlockOutputStream::TemplateBlockOutputStream(WriteBuffer & ostr_, const Block & sample, const FormatSettings & settings_)
: ostr(ostr_), header(sample), settings(settings_)
{
static const String default_format("${result}");
const String & format_str = settings.template_settings.format.empty() ? default_format : settings.template_settings.format;
format = ParsedTemplateFormat(format_str, [&](const String & partName)
{
return static_cast<size_t>(stringToOutputPart(partName));
});
size_t resultIdx = format.format_idx_to_column_idx.size() + 1;
for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i)
{
switch (static_cast<OutputPart>(format.format_idx_to_column_idx[i]))
{
case OutputPart::Result:
resultIdx = i;
BOOST_FALLTHROUGH;
case OutputPart::Totals:
case OutputPart::ExtremesMin:
case OutputPart::ExtremesMax:
if (format.formats[i] != ColumnFormat::Default)
throw Exception("invalid template: wrong serialization type for result, totals, min or max",
ErrorCodes::INVALID_TEMPLATE_FORMAT);
break;
default:
break;
}
}
if (resultIdx != 0)
throw Exception("invalid template: ${result} must be the first output part", ErrorCodes::INVALID_TEMPLATE_FORMAT);
row_format = ParsedTemplateFormat(settings.template_settings.row_format, [&](const String & colName)
{
return header.getPositionByName(colName);
});
if (row_format.delimiters.size() == 1)
throw Exception("invalid template: no columns specified", ErrorCodes::INVALID_TEMPLATE_FORMAT);
}
TemplateBlockOutputStream::OutputPart TemplateBlockOutputStream::stringToOutputPart(const String & part)
{
if (part == "result")
return OutputPart::Result;
else if (part == "totals")
return OutputPart::Totals;
else if (part == "min")
return OutputPart::ExtremesMin;
else if (part == "max")
return OutputPart::ExtremesMax;
else if (part == "rows")
return OutputPart::Rows;
else if (part == "rows_before_limit")
return OutputPart::RowsBeforeLimit;
else if (part == "time")
return OutputPart::TimeElapsed;
else if (part == "rows_read")
return OutputPart::RowsRead;
else if (part == "bytes_read")
return OutputPart::BytesRead;
else
throw Exception("invalid template: unknown output part " + part, ErrorCodes::INVALID_TEMPLATE_FORMAT);
2019-02-10 15:42:56 +00:00
}
void TemplateBlockOutputStream::flush()
2019-02-10 15:42:56 +00:00
{
ostr.next();
}
void TemplateBlockOutputStream::writeRow(const Block & block, size_t row_num)
2019-02-10 15:42:56 +00:00
{
size_t columns = row_format.format_idx_to_column_idx.size();
for (size_t j = 0; j < columns; ++j)
{
writeString(row_format.delimiters[j], ostr);
size_t col_idx = row_format.format_idx_to_column_idx[j];
const ColumnWithTypeAndName & col = block.getByPosition(col_idx);
serializeField(*col.column, *col.type, row_num, row_format.formats[j]);
}
writeString(row_format.delimiters[columns], ostr);
}
void TemplateBlockOutputStream::serializeField(const IColumn & column, const IDataType & type, size_t row_num, ColumnFormat col_format)
{
switch (col_format)
2019-02-10 15:42:56 +00:00
{
case ColumnFormat::Default:
case ColumnFormat::Escaped:
type.serializeAsTextEscaped(column, row_num, ostr, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Quoted:
type.serializeAsTextQuoted(column, row_num, ostr, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Json:
type.serializeAsTextJSON(column, row_num, ostr, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Xml:
type.serializeAsTextXML(column, row_num, ostr, settings);
2019-02-10 15:42:56 +00:00
break;
case ColumnFormat::Raw:
type.serializeAsText(column, row_num, ostr, settings);
2019-02-10 15:42:56 +00:00
break;
}
}
template <typename U, typename V> void TemplateBlockOutputStream::writeValue(U value, ColumnFormat col_format)
{
auto type = std::make_unique<V>();
auto col = type->createColumn();
col->insert(value);
serializeField(*col, *type, 0, col_format);
}
void TemplateBlockOutputStream::write(const Block & block)
2019-02-10 15:42:56 +00:00
{
size_t rows = block.rows();
for (size_t i = 0; i < rows; ++i)
2019-02-10 15:42:56 +00:00
{
if (row_count)
writeString(settings.template_settings.row_between_delimiter, ostr);
2019-02-10 15:42:56 +00:00
writeRow(block, i);
++row_count;
2019-02-10 15:42:56 +00:00
}
}
void TemplateBlockOutputStream::writePrefix()
2019-02-10 15:42:56 +00:00
{
writeString(format.delimiters.front(), ostr);
}
2019-02-10 15:42:56 +00:00
void TemplateBlockOutputStream::writeSuffix()
{
size_t parts = format.format_idx_to_column_idx.size();
for (size_t j = 0; j < parts; ++j)
{
auto type = std::make_shared<DataTypeUInt64>();
ColumnWithTypeAndName col(type->createColumnConst(1, row_count), type, String("tmp"));
switch (static_cast<OutputPart>(format.format_idx_to_column_idx[j]))
{
case OutputPart::Totals:
if (!totals)
throw Exception("invalid template: cannot print totals for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
writeRow(totals, 0);
break;
case OutputPart::ExtremesMin:
if (!extremes)
throw Exception("invalid template: cannot print extremes for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
writeRow(extremes, 0);
break;
case OutputPart::ExtremesMax:
if (!extremes)
throw Exception("invalid template: cannot print extremes for this request", ErrorCodes::INVALID_TEMPLATE_FORMAT);
writeRow(extremes, 1);
break;
case OutputPart::Rows:
writeValue<size_t, DataTypeUInt64>(row_count, format.formats[j]);
break;
case OutputPart::RowsBeforeLimit:
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.formats[j]);
break;
case OutputPart::TimeElapsed:
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.formats[j]);
break;
case OutputPart::RowsRead:
writeValue<size_t, DataTypeUInt64>(progress.rows.load(), format.formats[j]);
break;
case OutputPart::BytesRead:
writeValue<size_t, DataTypeUInt64>(progress.bytes.load(), format.formats[j]);
break;
default:
break;
}
writeString(format.delimiters[j + 1], ostr);
}
2019-02-10 15:42:56 +00:00
}
void registerOutputFormatTemplate(FormatFactory & factory)
2019-02-10 15:42:56 +00:00
{
factory.registerOutputFormat("Template", [](
WriteBuffer & buf,
const Block & sample,
const Context &,
const FormatSettings & settings)
{
return std::make_shared<TemplateBlockOutputStream>(buf, sample, settings);
2019-02-10 15:42:56 +00:00
});
}
}