ClickHouse/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp

386 lines
12 KiB
C++
Raw Normal View History

2019-02-19 18:41:18 +00:00
#include <sys/ioctl.h>
2020-02-17 14:27:09 +00:00
#include <unistd.h>
2019-02-19 18:41:18 +00:00
#include <Processors/Formats/Impl/PrettyBlockOutputFormat.h>
#include <Formats/FormatFactory.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
2019-10-07 18:56:03 +00:00
#include <Common/PODArray.h>
2019-02-19 18:41:18 +00:00
#include <Common/UTF8Helpers.h>
namespace DB
{
namespace ErrorCodes
{
}
PrettyBlockOutputFormat::PrettyBlockOutputFormat(
2019-08-03 11:02:40 +00:00
WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_), format_settings(format_settings_)
2019-02-19 18:41:18 +00:00
{
struct winsize w;
if (0 == ioctl(STDOUT_FILENO, TIOCGWINSZ, &w))
terminal_width = w.ws_col;
}
/// Evaluate the visible width of the values and column names.
/// Note that number of code points is just a rough approximation of visible string width.
void PrettyBlockOutputFormat::calculateWidths(
const Block & header, const Chunk & chunk,
WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths)
2019-02-19 18:41:18 +00:00
{
2020-05-31 19:22:59 +00:00
size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows);
auto max_row_number = std::to_string(num_rows);
row_number_width = max_row_number.size() + 2;
2019-02-19 18:41:18 +00:00
size_t num_columns = chunk.getNumColumns();
2020-04-22 06:01:33 +00:00
const auto & columns = chunk.getColumns();
2019-02-19 18:41:18 +00:00
widths.resize(num_columns);
max_padded_widths.resize_fill(num_columns);
2019-02-19 18:41:18 +00:00
name_widths.resize(num_columns);
/// Calculate widths of all values.
String serialized_value;
size_t prefix = 2; // Tab character adjustment
for (size_t i = 0; i < num_columns; ++i)
{
2020-04-22 06:01:33 +00:00
const auto & elem = header.getByPosition(i);
const auto & column = columns[i];
2019-02-19 18:41:18 +00:00
widths[i].resize(num_rows);
for (size_t j = 0; j < num_rows; ++j)
{
{
WriteBufferFromString out_serialize(serialized_value);
elem.type->serializeAsText(*column, j, out_serialize, format_settings);
2019-02-19 18:41:18 +00:00
}
2020-05-31 22:17:40 +00:00
/// Avoid calculating width of too long strings by limiting the size in bytes.
/// Note that it is just an estimation. 4 is the maximum size of Unicode code point in bytes in UTF-8.
/// But it's possible that the string is long in bytes but very short in visible size.
/// (e.g. non-printable characters, diacritics, combining characters)
2020-06-02 02:39:21 +00:00
if (format_settings.pretty.max_value_width)
{
size_t max_byte_size = format_settings.pretty.max_value_width * 4;
if (serialized_value.size() > max_byte_size)
serialized_value.resize(max_byte_size);
}
2020-05-31 22:17:40 +00:00
widths[i][j] = UTF8::computeWidth(reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), prefix);
2020-06-02 13:56:03 +00:00
max_padded_widths[i] = std::max<UInt64>(max_padded_widths[i],
std::min<UInt64>(format_settings.pretty.max_column_pad_width,
std::min<UInt64>(format_settings.pretty.max_value_width, widths[i][j])));
2019-02-19 18:41:18 +00:00
}
/// And also calculate widths for names of columns.
{
// name string doesn't contain Tab, no need to pass `prefix`
name_widths[i] = std::min<UInt64>(format_settings.pretty.max_column_pad_width,
UTF8::computeWidth(reinterpret_cast<const UInt8 *>(elem.name.data()), elem.name.size()));
2020-06-02 13:56:03 +00:00
max_padded_widths[i] = std::max<UInt64>(max_padded_widths[i], name_widths[i]);
2019-02-19 18:41:18 +00:00
}
prefix += max_padded_widths[i] + 3;
2019-02-19 18:41:18 +00:00
}
}
namespace
{
/// Grid symbols are used for printing grid borders in a terminal.
/// Defaults values are UTF-8.
struct GridSymbols
{
const char * bold_left_top_corner = "";
const char * bold_right_top_corner = "";
const char * left_bottom_corner = "";
const char * right_bottom_corner = "";
const char * bold_left_separator = "";
const char * left_separator = "";
const char * bold_right_separator = "";
const char * right_separator = "";
const char * bold_top_separator = "";
const char * bold_middle_separator = "";
const char * middle_separator = "";
const char * bottom_separator = "";
const char * bold_dash = "";
const char * dash = "";
const char * bold_bar = "";
const char * bar = "";
};
GridSymbols utf8_grid_symbols;
GridSymbols ascii_grid_symbols {
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"+",
"-",
"-",
"|",
"|"
};
}
2019-02-19 18:41:18 +00:00
void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind)
{
UInt64 max_rows = format_settings.pretty.max_rows;
if (total_rows >= max_rows)
{
total_rows += chunk.getNumRows();
return;
}
auto num_rows = chunk.getNumRows();
auto num_columns = chunk.getNumColumns();
2020-04-22 06:01:33 +00:00
const auto & columns = chunk.getColumns();
const auto & header = getPort(port_kind).getHeader();
2019-02-19 18:41:18 +00:00
WidthsPerColumn widths;
Widths max_widths;
Widths name_widths;
calculateWidths(header, chunk, widths, max_widths, name_widths);
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ?
utf8_grid_symbols :
ascii_grid_symbols;
2019-02-19 18:41:18 +00:00
/// Create separators
std::stringstream top_separator;
std::stringstream middle_names_separator;
std::stringstream middle_values_separator;
std::stringstream bottom_separator;
top_separator << grid_symbols.bold_left_top_corner;
middle_names_separator << grid_symbols.bold_left_separator;
middle_values_separator << grid_symbols.left_separator;
bottom_separator << grid_symbols.left_bottom_corner;
2019-02-19 18:41:18 +00:00
for (size_t i = 0; i < num_columns; ++i)
{
if (i != 0)
{
top_separator << grid_symbols.bold_top_separator;
middle_names_separator << grid_symbols.bold_middle_separator;
middle_values_separator << grid_symbols.middle_separator;
bottom_separator << grid_symbols.bottom_separator;
2019-02-19 18:41:18 +00:00
}
for (size_t j = 0; j < max_widths[i] + 2; ++j)
{
top_separator << grid_symbols.bold_dash;
middle_names_separator << grid_symbols.bold_dash;
middle_values_separator << grid_symbols.dash;
bottom_separator << grid_symbols.dash;
2019-02-19 18:41:18 +00:00
}
}
top_separator << grid_symbols.bold_right_top_corner << "\n";
middle_names_separator << grid_symbols.bold_right_separator << "\n";
middle_values_separator << grid_symbols.right_separator << "\n";
bottom_separator << grid_symbols.right_bottom_corner << "\n";
2019-02-19 18:41:18 +00:00
std::string top_separator_s = top_separator.str();
std::string middle_names_separator_s = middle_names_separator.str();
std::string middle_values_separator_s = middle_values_separator.str();
std::string bottom_separator_s = bottom_separator.str();
/// Output the block
writeString(top_separator_s, out);
/// Names
writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
2019-02-19 18:41:18 +00:00
for (size_t i = 0; i < num_columns; ++i)
{
if (i != 0)
{
writeCString(" ", out);
writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
}
2019-02-19 18:41:18 +00:00
2020-04-22 06:01:33 +00:00
const auto & col = header.getByPosition(i);
2019-02-19 18:41:18 +00:00
if (format_settings.pretty.color)
writeCString("\033[1m", out);
if (col.type->shouldAlignRightInPrettyFormats())
{
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
writeString(col.name, out);
}
else
{
writeString(col.name, out);
for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k)
writeChar(' ', out);
}
if (format_settings.pretty.color)
writeCString("\033[0m", out);
}
writeCString(" ", out);
writeCString(grid_symbols.bold_bar, out);
writeCString("\n", out);
2019-02-19 18:41:18 +00:00
writeString(middle_names_separator_s, out);
for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i)
{
if (i != 0)
writeString(middle_values_separator_s, out);
writeCString(grid_symbols.bar, out);
2019-02-19 18:41:18 +00:00
for (size_t j = 0; j < num_columns; ++j)
{
if (j != 0)
writeCString(grid_symbols.bar, out);
2019-02-19 18:41:18 +00:00
2020-04-22 06:01:33 +00:00
const auto & type = *header.getByPosition(j).type;
writeValueWithPadding(*columns[j], type, i,
widths[j].empty() ? max_widths[j] : widths[j][i],
max_widths[j]);
2019-02-19 18:41:18 +00:00
}
writeCString(grid_symbols.bar, out);
writeCString("\n", out);
2019-02-19 18:41:18 +00:00
}
writeString(bottom_separator_s, out);
total_rows += num_rows;
}
void PrettyBlockOutputFormat::writeValueWithPadding(
const IColumn & column, const IDataType & type, size_t row_num, size_t value_width, size_t pad_to_width)
{
String serialized_value = " ";
2020-05-31 19:22:59 +00:00
{
WriteBufferFromString out_serialize(serialized_value, WriteBufferFromString::AppendModeTag());
2020-05-31 19:22:59 +00:00
type.serializeAsText(column, row_num, out_serialize, format_settings);
}
if (value_width > format_settings.pretty.max_value_width)
2020-05-31 19:22:59 +00:00
{
serialized_value.resize(UTF8::computeBytesBeforeWidth(
reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), 0, 1 + format_settings.pretty.max_value_width));
2020-07-10 21:31:25 +00:00
const char * ellipsis = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? "" : "~";
if (format_settings.pretty.color)
{
serialized_value += "\033[31;1m";
serialized_value += ellipsis;
serialized_value += "\033[0m";
}
else
serialized_value += ellipsis;
value_width = format_settings.pretty.max_value_width;
2020-05-31 19:22:59 +00:00
}
else
serialized_value += ' ';
auto write_padding = [&]()
{
if (pad_to_width > value_width)
for (size_t k = 0; k < pad_to_width - value_width; ++k)
writeChar(' ', out);
};
2020-05-31 19:22:59 +00:00
2019-02-19 18:41:18 +00:00
if (type.shouldAlignRightInPrettyFormats())
{
write_padding();
2020-05-31 19:22:59 +00:00
out.write(serialized_value.data(), serialized_value.size());
2019-02-19 18:41:18 +00:00
}
else
{
2020-05-31 19:22:59 +00:00
out.write(serialized_value.data(), serialized_value.size());
write_padding();
2019-02-19 18:41:18 +00:00
}
}
void PrettyBlockOutputFormat::consume(Chunk chunk)
{
write(chunk, PortKind::Main);
}
void PrettyBlockOutputFormat::consumeTotals(Chunk chunk)
{
total_rows = 0;
writeSuffixIfNot();
2020-04-08 12:40:04 +00:00
writeCString("\nTotals:\n", out);
2019-02-19 18:41:18 +00:00
write(chunk, PortKind::Totals);
}
void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk)
{
total_rows = 0;
writeSuffixIfNot();
2020-04-08 12:40:04 +00:00
writeCString("\nExtremes:\n", out);
2019-02-19 18:41:18 +00:00
write(chunk, PortKind::Extremes);
}
void PrettyBlockOutputFormat::writeSuffix()
{
if (total_rows >= format_settings.pretty.max_rows)
{
writeCString(" Showed first ", out);
writeIntText(format_settings.pretty.max_rows, out);
writeCString(".\n", out);
}
}
void PrettyBlockOutputFormat::finalize()
{
writeSuffixIfNot();
}
void registerOutputFormatProcessorPretty(FormatFactory & factory)
{
factory.registerOutputFormatProcessor("Pretty", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback,
2019-02-19 18:41:18 +00:00
const FormatSettings & format_settings)
{
return std::make_shared<PrettyBlockOutputFormat>(buf, sample, format_settings);
});
factory.registerOutputFormatProcessor("PrettyNoEscapes", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback,
2019-02-19 18:41:18 +00:00
const FormatSettings & format_settings)
{
FormatSettings changed_settings = format_settings;
changed_settings.pretty.color = false;
return std::make_shared<PrettyBlockOutputFormat>(buf, sample, changed_settings);
});
}
}