ClickHouse/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp

427 lines
13 KiB
C++
Raw Normal View History

2019-02-19 18:41:18 +00:00
#include <sys/ioctl.h>
#if defined(OS_SUNOS)
# include <sys/termios.h>
#endif
2020-02-17 14:27:09 +00:00
#include <unistd.h>
2019-02-19 18:41:18 +00:00
#include <Processors/Formats/Impl/PrettyBlockOutputFormat.h>
#include <Formats/FormatFactory.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
2020-11-10 18:22:26 +00:00
#include <IO/Operators.h>
2019-10-07 18:56:03 +00:00
#include <Common/PODArray.h>
2019-02-19 18:41:18 +00:00
#include <Common/UTF8Helpers.h>
namespace DB
{
namespace ErrorCodes
{
}
PrettyBlockOutputFormat::PrettyBlockOutputFormat(
2019-08-03 11:02:40 +00:00
WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
2022-05-06 16:48:48 +00:00
: IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations())
2019-02-19 18:41:18 +00:00
{
struct winsize w;
if (0 == ioctl(STDOUT_FILENO, TIOCGWINSZ, &w))
terminal_width = w.ws_col;
}
/// Evaluate the visible width of the values and column names.
/// Note that number of code points is just a rough approximation of visible string width.
void PrettyBlockOutputFormat::calculateWidths(
const Block & header, const Chunk & chunk,
WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths)
2019-02-19 18:41:18 +00:00
{
2020-05-31 19:22:59 +00:00
size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows);
2020-09-30 01:50:23 +00:00
/// len(num_rows) + len(". ")
row_number_width = std::floor(std::log10(num_rows)) + 3;
2019-02-19 18:41:18 +00:00
size_t num_columns = chunk.getNumColumns();
2020-04-22 06:01:33 +00:00
const auto & columns = chunk.getColumns();
2019-02-19 18:41:18 +00:00
widths.resize(num_columns);
max_padded_widths.resize_fill(num_columns);
2019-02-19 18:41:18 +00:00
name_widths.resize(num_columns);
/// Calculate widths of all values.
String serialized_value;
size_t prefix = 2; // Tab character adjustment
for (size_t i = 0; i < num_columns; ++i)
{
2020-04-22 06:01:33 +00:00
const auto & elem = header.getByPosition(i);
const auto & column = columns[i];
2019-02-19 18:41:18 +00:00
widths[i].resize(num_rows);
for (size_t j = 0; j < num_rows; ++j)
{
{
WriteBufferFromString out_serialize(serialized_value);
2021-03-09 14:46:52 +00:00
auto serialization = elem.type->getDefaultSerialization();
serialization->serializeText(*column, j, out_serialize, format_settings);
2019-02-19 18:41:18 +00:00
}
2020-05-31 22:17:40 +00:00
/// Avoid calculating width of too long strings by limiting the size in bytes.
/// Note that it is just an estimation. 4 is the maximum size of Unicode code point in bytes in UTF-8.
/// But it's possible that the string is long in bytes but very short in visible size.
/// (e.g. non-printable characters, diacritics, combining characters)
2020-06-02 02:39:21 +00:00
if (format_settings.pretty.max_value_width)
{
size_t max_byte_size = format_settings.pretty.max_value_width * 4;
if (serialized_value.size() > max_byte_size)
serialized_value.resize(max_byte_size);
}
2020-05-31 22:17:40 +00:00
widths[i][j] = UTF8::computeWidth(reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), prefix);
2020-06-02 13:56:03 +00:00
max_padded_widths[i] = std::max<UInt64>(max_padded_widths[i],
std::min<UInt64>(format_settings.pretty.max_column_pad_width,
std::min<UInt64>(format_settings.pretty.max_value_width, widths[i][j])));
2019-02-19 18:41:18 +00:00
}
/// And also calculate widths for names of columns.
{
// name string doesn't contain Tab, no need to pass `prefix`
name_widths[i] = std::min<UInt64>(format_settings.pretty.max_column_pad_width,
UTF8::computeWidth(reinterpret_cast<const UInt8 *>(elem.name.data()), elem.name.size()));
2020-06-02 13:56:03 +00:00
max_padded_widths[i] = std::max<UInt64>(max_padded_widths[i], name_widths[i]);
2019-02-19 18:41:18 +00:00
}
prefix += max_padded_widths[i] + 3;
2019-02-19 18:41:18 +00:00
}
}
namespace
{
/// Grid symbols are used for printing grid borders in a terminal.
/// Defaults values are UTF-8.
struct GridSymbols
{
const char * bold_left_top_corner = "";
const char * bold_right_top_corner = "";
const char * left_bottom_corner = "";
const char * right_bottom_corner = "";
const char * bold_left_separator = "";
const char * left_separator = "";
const char * bold_right_separator = "";
const char * right_separator = "";
const char * bold_top_separator = "";
const char * bold_middle_separator = "";
const char * middle_separator = "";
const char * bottom_separator = "";
const char * bold_dash = "";
const char * dash = "";
const char * bold_bar = "";
const char * bar = "";
};
GridSymbols utf8_grid_symbols;
GridSymbols ascii_grid_symbols {
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"-",
"-",
"|",
"|"
};
}
2019-02-19 18:41:18 +00:00
2022-05-06 16:48:48 +00:00
void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind)
2019-02-19 18:41:18 +00:00
{
UInt64 max_rows = format_settings.pretty.max_rows;
if (total_rows >= max_rows)
{
total_rows += chunk.getNumRows();
return;
}
auto num_rows = chunk.getNumRows();
auto num_columns = chunk.getNumColumns();
2020-04-22 06:01:33 +00:00
const auto & columns = chunk.getColumns();
const auto & header = getPort(port_kind).getHeader();
2019-02-19 18:41:18 +00:00
WidthsPerColumn widths;
Widths max_widths;
Widths name_widths;
calculateWidths(header, chunk, widths, max_widths, name_widths);
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ?
utf8_grid_symbols :
ascii_grid_symbols;
2019-02-19 18:41:18 +00:00
/// Create separators
2020-11-10 18:22:26 +00:00
WriteBufferFromOwnString top_separator;
WriteBufferFromOwnString middle_names_separator;
WriteBufferFromOwnString middle_values_separator;
WriteBufferFromOwnString bottom_separator;
2020-11-07 00:14:53 +00:00
top_separator << grid_symbols.bold_left_top_corner;
middle_names_separator << grid_symbols.bold_left_separator;
middle_values_separator << grid_symbols.left_separator;
bottom_separator << grid_symbols.left_bottom_corner;
2019-02-19 18:41:18 +00:00
for (size_t i = 0; i < num_columns; ++i)
{
if (i != 0)
{
top_separator << grid_symbols.bold_top_separator;
middle_names_separator << grid_symbols.bold_middle_separator;
middle_values_separator << grid_symbols.middle_separator;
bottom_separator << grid_symbols.bottom_separator;
2019-02-19 18:41:18 +00:00
}
for (size_t j = 0; j < max_widths[i] + 2; ++j)
{
top_separator << grid_symbols.bold_dash;
middle_names_separator << grid_symbols.bold_dash;
middle_values_separator << grid_symbols.dash;
bottom_separator << grid_symbols.dash;
2019-02-19 18:41:18 +00:00
}
}
top_separator << grid_symbols.bold_right_top_corner << "\n";
middle_names_separator << grid_symbols.bold_right_separator << "\n";
middle_values_separator << grid_symbols.right_separator << "\n";
bottom_separator << grid_symbols.right_bottom_corner << "\n";
2019-02-19 18:41:18 +00:00
std::string top_separator_s = top_separator.str();
std::string middle_names_separator_s = middle_names_separator.str();
std::string middle_values_separator_s = middle_values_separator.str();
std::string bottom_separator_s = bottom_separator.str();
2020-10-02 15:13:28 +00:00
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
2019-02-19 18:41:18 +00:00
/// Output the block
writeString(top_separator_s, out);
2020-10-02 15:13:28 +00:00
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
2019-02-19 18:41:18 +00:00
/// Names
writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
2019-02-19 18:41:18 +00:00
for (size_t i = 0; i < num_columns; ++i)
{
if (i != 0)
{
writeCString(" ", out);
writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
}
2019-02-19 18:41:18 +00:00
2020-04-22 06:01:33 +00:00
const auto & col = header.getByPosition(i);
2019-02-19 18:41:18 +00:00
if (format_settings.pretty.color)
writeCString("\033[1m", out);
if (col.type->shouldAlignRightInPrettyFormats())
{
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
writeString(col.name, out);
}
else
{
writeString(col.name, out);
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
}
if (format_settings.pretty.color)
writeCString("\033[0m", out);
}
writeCString(" ", out);
writeCString(grid_symbols.bold_bar, out);
writeCString("\n", out);
2019-02-19 18:41:18 +00:00
2020-10-02 15:13:28 +00:00
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
2019-02-19 18:41:18 +00:00
writeString(middle_names_separator_s, out);
for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i)
{
if (i != 0)
2020-10-02 15:13:28 +00:00
{
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
2019-02-19 18:41:18 +00:00
writeString(middle_values_separator_s, out);
2020-10-02 15:13:28 +00:00
}
if (format_settings.pretty.output_format_pretty_row_numbers)
{
// Write row number;
auto row_num_string = std::to_string(i + 1) + ". ";
for (size_t j = 0; j < row_number_width - row_num_string.size(); ++j)
{
writeCString(" ", out);
}
writeString(row_num_string, out);
}
2019-02-19 18:41:18 +00:00
writeCString(grid_symbols.bar, out);
2019-02-19 18:41:18 +00:00
for (size_t j = 0; j < num_columns; ++j)
{
if (j != 0)
writeCString(grid_symbols.bar, out);
2020-04-22 06:01:33 +00:00
const auto & type = *header.getByPosition(j).type;
2021-03-09 14:46:52 +00:00
writeValueWithPadding(*columns[j], *serializations[j], i,
widths[j].empty() ? max_widths[j] : widths[j][i],
2021-03-09 14:46:52 +00:00
max_widths[j], type.shouldAlignRightInPrettyFormats());
2019-02-19 18:41:18 +00:00
}
writeCString(grid_symbols.bar, out);
writeCString("\n", out);
2019-02-19 18:41:18 +00:00
}
2020-10-02 15:13:28 +00:00
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
2019-02-19 18:41:18 +00:00
writeString(bottom_separator_s, out);
total_rows += num_rows;
}
void PrettyBlockOutputFormat::writeValueWithPadding(
2021-03-09 14:46:52 +00:00
const IColumn & column, const ISerialization & serialization, size_t row_num,
size_t value_width, size_t pad_to_width, bool align_right)
2019-02-19 18:41:18 +00:00
{
String serialized_value = " ";
2020-05-31 19:22:59 +00:00
{
WriteBufferFromString out_serialize(serialized_value, AppendModeTag());
2021-03-09 14:46:52 +00:00
serialization.serializeText(column, row_num, out_serialize, format_settings);
2020-05-31 19:22:59 +00:00
}
if (value_width > format_settings.pretty.max_value_width)
2020-05-31 19:22:59 +00:00
{
serialized_value.resize(UTF8::computeBytesBeforeWidth(
reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), 0, 1 + format_settings.pretty.max_value_width));
2020-07-10 21:31:25 +00:00
const char * ellipsis = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? "" : "~";
if (format_settings.pretty.color)
{
serialized_value += "\033[31;1m";
serialized_value += ellipsis;
serialized_value += "\033[0m";
}
else
serialized_value += ellipsis;
value_width = format_settings.pretty.max_value_width;
2020-05-31 19:22:59 +00:00
}
else
serialized_value += ' ';
auto write_padding = [&]()
{
if (pad_to_width > value_width)
for (size_t k = 0; k < pad_to_width - value_width; ++k)
writeChar(' ', out);
};
2020-05-31 19:22:59 +00:00
2021-03-09 14:46:52 +00:00
if (align_right)
2019-02-19 18:41:18 +00:00
{
write_padding();
2020-05-31 19:22:59 +00:00
out.write(serialized_value.data(), serialized_value.size());
2019-02-19 18:41:18 +00:00
}
else
{
2020-05-31 19:22:59 +00:00
out.write(serialized_value.data(), serialized_value.size());
write_padding();
2019-02-19 18:41:18 +00:00
}
}
void PrettyBlockOutputFormat::consume(Chunk chunk)
{
2022-05-06 16:48:48 +00:00
write(std::move(chunk), PortKind::Main);
2019-02-19 18:41:18 +00:00
}
void PrettyBlockOutputFormat::consumeTotals(Chunk chunk)
{
total_rows = 0;
2020-04-08 12:40:04 +00:00
writeCString("\nTotals:\n", out);
2022-05-06 16:48:48 +00:00
write(std::move(chunk), PortKind::Totals);
2019-02-19 18:41:18 +00:00
}
void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk)
{
total_rows = 0;
2020-04-08 12:40:04 +00:00
writeCString("\nExtremes:\n", out);
2022-05-06 16:48:48 +00:00
write(std::move(chunk), PortKind::Extremes);
2019-02-19 18:41:18 +00:00
}
void PrettyBlockOutputFormat::writeSuffix()
{
if (total_rows >= format_settings.pretty.max_rows)
{
writeCString(" Showed first ", out);
writeIntText(format_settings.pretty.max_rows, out);
writeCString(".\n", out);
}
}
2021-10-11 16:11:50 +00:00
void registerOutputFormatPretty(FormatFactory & factory)
2019-02-19 18:41:18 +00:00
{
2021-10-11 16:11:50 +00:00
factory.registerOutputFormat("Pretty", [](
2019-02-19 18:41:18 +00:00
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams &,
2019-02-19 18:41:18 +00:00
const FormatSettings & format_settings)
{
return std::make_shared<PrettyBlockOutputFormat>(buf, sample, format_settings);
});
factory.markOutputFormatSupportsParallelFormatting("Pretty");
2021-10-11 16:11:50 +00:00
factory.registerOutputFormat("PrettyNoEscapes", [](
2019-02-19 18:41:18 +00:00
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams &,
2019-02-19 18:41:18 +00:00
const FormatSettings & format_settings)
{
FormatSettings changed_settings = format_settings;
changed_settings.pretty.color = false;
return std::make_shared<PrettyBlockOutputFormat>(buf, sample, changed_settings);
});
factory.markOutputFormatSupportsParallelFormatting("PrettyNoEscapes");
2019-02-19 18:41:18 +00:00
}
}