Better rendering of multiline strings in Pretty formats

This commit is contained in:
Volodya Giro 2024-02-13 15:17:52 +03:00 committed by Volodya
parent bf38e8b3eb
commit 2bcb3d975b
4 changed files with 136 additions and 6 deletions

View File

@ -1,3 +1,4 @@
#include <cstddef>
#include <Processors/Formats/Impl/PrettyBlockOutputFormat.h>
#include <Formats/FormatFactory.h>
#include <IO/WriteBuffer.h>
@ -68,6 +69,17 @@ void PrettyBlockOutputFormat::calculateWidths(
}
widths[i][j] = UTF8::computeWidth(reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), prefix);
if (serialized_value.contains('\n')) {
size_t row_width = 0;
size_t row_start = 0;
for (size_t k = 0; k < serialized_value.size(); ++k) {
if (serialized_value[k] == '\n') {
row_width = std::max(row_width, k - row_start + 1 + (row_start != 0));
row_start = k + 1;
}
}
widths[i][j] = std::max(row_width, serialized_value.size() - row_start + 1);
}
max_padded_widths[i] = std::max<UInt64>(max_padded_widths[i],
std::min<UInt64>(format_settings.pretty.max_column_pad_width,
std::min<UInt64>(format_settings.pretty.max_value_width, widths[i][j])));
@ -303,19 +315,38 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
writeCString(grid_symbols.bar, out);
std::vector<String> transferred_row(num_columns);
bool has_transferred_row = false;
for (size_t j = 0; j < num_columns; ++j)
{
if (j != 0)
writeCString(grid_symbols.bar, out);
const auto & type = *header.getByPosition(j).type;
bool has_break_line = false;
writeValueWithPadding(*columns[j], *serializations[j], i,
widths[j].empty() ? max_widths[j] : widths[j][i],
max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), has_break_line);
if (has_break_line) {
has_transferred_row = true;
String serialized_value = " ";
{
WriteBufferFromString out_serialize(serialized_value, AppendModeTag());
serializations[j]->serializeText(*columns[j], i, out_serialize, format_settings);
}
size_t break_line_pos = serialized_value.find_first_of('\n');
transferred_row[j] = serialized_value.substr(break_line_pos + 1);
}
}
writeCString(grid_symbols.bar, out);
writeReadableNumberTip(chunk);
writeCString("\n", out);
if (has_transferred_row) {
writeTransferredRow(max_widths, transferred_row);
}
}
if (format_settings.pretty.output_format_pretty_row_numbers)
@ -397,7 +428,7 @@ static String highlightDigitGroups(String source)
void PrettyBlockOutputFormat::writeValueWithPadding(
const IColumn & column, const ISerialization & serialization, size_t row_num,
size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number)
size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number, bool & has_line_breake)
{
String serialized_value = " ";
{
@ -405,6 +436,12 @@ void PrettyBlockOutputFormat::writeValueWithPadding(
serialization.serializeText(column, row_num, out_serialize, format_settings);
}
if (size_t line_breake_pos = serialized_value.find_first_of('\n'); line_breake_pos != String::npos) {
has_line_breake = true;
serialized_value = serialized_value.substr(0, line_breake_pos) + "";
value_width = serialized_value.size() - 3;
}
if (cut_to_width && value_width > cut_to_width)
{
serialized_value.resize(UTF8::computeBytesBeforeWidth(
@ -448,6 +485,59 @@ void PrettyBlockOutputFormat::writeValueWithPadding(
}
}
void PrettyBlockOutputFormat::writeTransferredRow(const Widths & max_widths, const std::vector<String> & transferred_row) {
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ?
utf8_grid_symbols :
ascii_grid_symbols;
size_t num_columns = max_widths.size();
writeCString(grid_symbols.bar, out);
std::vector<String> new_transferred_row(num_columns);
bool has_transferred_row = false;
size_t cur_width = 0;
for (size_t j = 0; j < num_columns; ++j)
{
if (j != 0)
writeCString(grid_symbols.bar, out);
String value = transferred_row[j];
cur_width = value.size();
if (size_t break_line_pos = value.find_first_of('\n'); break_line_pos != String::npos) {
has_transferred_row = true;
new_transferred_row[j] = value.substr(break_line_pos + 1);
value = value.substr(0, break_line_pos) + "";
cur_width = value.size() - 2;
}
if (!value.empty()) {
value = "" + value;
cur_width += 1;
}
value = " " + value + " ";
auto write_padding = [&]()
{
if (max_widths[j] > cur_width)
for (size_t k = 0; k < max_widths[j] - cur_width; ++k)
writeChar(' ', out);
};
out.write(value.data(), value.size());
write_padding();
}
writeCString(grid_symbols.bar, out);
writeCString("\n", out);
if (has_transferred_row) {
writeTransferredRow(max_widths, new_transferred_row);
}
}
void PrettyBlockOutputFormat::consume(Chunk chunk)
{

View File

@ -1,5 +1,6 @@
#pragma once
#include <cstddef>
#include <Core/Block.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Formats/FormatSettings.h>
@ -48,7 +49,9 @@ protected:
void writeValueWithPadding(
const IColumn & column, const ISerialization & serialization, size_t row_num,
size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number);
size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number, bool & has_line_breake);
void writeTransferredRow(const Widths & max_widths, const std::vector<String> & transferred_row);
void resetFormatterImpl() override
{

View File

@ -168,6 +168,8 @@ void PrettyCompactBlockOutputFormat::writeRow(
writeCString(grid_symbols.bar, out);
std::vector<String> transferred_row(num_columns);
bool has_transferred_row = false;
for (size_t j = 0; j < num_columns; ++j)
{
if (j != 0)
@ -175,12 +177,28 @@ void PrettyCompactBlockOutputFormat::writeRow(
const auto & type = *header.getByPosition(j).type;
const auto & cur_widths = widths[j].empty() ? max_widths[j] : widths[j][row_num];
writeValueWithPadding(*columns[j], *serializations[j], row_num, cur_widths, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
bool has_break_line = false;
writeValueWithPadding(*columns[j], *serializations[j], row_num, cur_widths, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), has_break_line);
if (has_break_line) {
has_transferred_row = true;
String serialized_value = " ";
{
WriteBufferFromString out_serialize(serialized_value, AppendModeTag());
serializations[j]->serializeText(*columns[j], row_num, out_serialize, format_settings);
}
size_t break_line_pos = serialized_value.find_first_of('\n');
transferred_row[j] = serialized_value.substr(break_line_pos + 1);
}
}
writeCString(grid_symbols.bar, out);
writeReadableNumberTip(chunk);
writeCString("\n", out);
if (has_transferred_row) {
writeTransferredRow(max_widths, transferred_row);
}
}
void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind)

View File

@ -70,6 +70,9 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
}
writeCString("\n\n", out);
std::vector<String> transferred_row(num_columns);
bool has_transferred_row = false;
for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row)
{
if (format_settings.pretty.output_format_pretty_row_numbers)
@ -92,12 +95,28 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
const auto & type = *header.getByPosition(column).type;
auto & cur_width = widths[column].empty() ? max_widths[column] : widths[column][row];
writeValueWithPadding(
*columns[column], *serializations[column], row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
bool has_break_line = false;
writeValueWithPadding(*columns[column], *serializations[column],
row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), has_break_line);
if (has_break_line) {
has_transferred_row = true;
String serialized_value = " ";
{
WriteBufferFromString out_serialize(serialized_value, AppendModeTag());
serializations[column]->serializeText(*columns[column], row, out_serialize, format_settings);
}
size_t break_line_pos = serialized_value.find_first_of('\n');
transferred_row[column] = serialized_value.substr(break_line_pos + 1);
}
}
writeReadableNumberTip(chunk);
writeChar('\n', out);
if (has_transferred_row) {
writeTransferredRow(max_widths, transferred_row);
}
}
total_rows += num_rows;