From a76bd99e84b56929272180c420de359afb80d1e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jun 2020 01:12:13 +0300 Subject: [PATCH] Added setting "output_format_pretty_max_value_width" --- src/Common/UTF8Helpers.cpp | 4 +- src/Common/UTF8Helpers.h | 3 + src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + .../Formats/Impl/PrettyBlockOutputFormat.cpp | 59 +++++---- .../Impl/PrettyCompactBlockOutputFormat.cpp | 6 +- .../Impl/PrettySpaceBlockOutputFormat.cpp | 4 +- .../01293_pretty_max_value_width.reference | 114 ++++++++++++++++++ .../01293_pretty_max_value_width.sql | 43 +++++++ 9 files changed, 206 insertions(+), 29 deletions(-) create mode 100644 tests/queries/0_stateless/01293_pretty_max_value_width.reference create mode 100644 tests/queries/0_stateless/01293_pretty_max_value_width.sql diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index 9be11a4785d..f6bb8e944d6 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -143,7 +143,7 @@ static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, s ++i; } - if (mode == BytesBeforLimit && width >= limit) + if (mode == BytesBeforLimit && width > limit) return i - (width - limit); switch (decoder.decode(data[i])) @@ -172,7 +172,7 @@ static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, s else next_width += wcwidth(decoder.codepoint); - if (mode == BytesBeforLimit && next_width >= limit) + if (mode == BytesBeforLimit && next_width > limit) return i - rollback; width = next_width; diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h index c1c73783870..94115c7c331 100644 --- a/src/Common/UTF8Helpers.h +++ b/src/Common/UTF8Helpers.h @@ -99,6 +99,9 @@ int queryConvert(const CharT * bytes, int length) /// and include `\t` to the nearest longer length with multiple of eight. size_t computeWidth(const UInt8 * data, size_t size, size_t prefix = 0) noexcept; +/// Calculate the maximum number of bytes, so that substring of this size fits in 'limit' width. +size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept; + } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c3c8eedbb12..d1e36e9f3de 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -211,6 +211,7 @@ struct Settings : public SettingsCollection \ M(SettingUInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ + M(SettingUInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \ M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ M(SettingString, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 669baace2f5..9182c728600 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -101,6 +101,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; format_settings.pretty.max_rows = settings.output_format_pretty_max_rows; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; + format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.template_settings.resultset_format = settings.format_template_resultset; format_settings.template_settings.row_format = settings.format_template_row; diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 761832d5492..6ad5c2ad719 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -30,14 +30,14 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat( /// Note that number of code points is just a rough approximation of visible string width. void PrettyBlockOutputFormat::calculateWidths( const Block & header, const Chunk & chunk, - WidthsPerColumn & widths, Widths & max_widths, Widths & name_widths) + WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths) { size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows); size_t num_columns = chunk.getNumColumns(); const auto & columns = chunk.getColumns(); widths.resize(num_columns); - max_widths.resize_fill(num_columns); + max_padded_widths.resize_fill(num_columns); name_widths.resize(num_columns); /// Calculate widths of all values. @@ -57,10 +57,10 @@ void PrettyBlockOutputFormat::calculateWidths( elem.type->serializeAsText(*column, j, out_serialize, format_settings); } - widths[i][j] = std::min(format_settings.pretty.max_column_pad_width + 1, - std::min(format_settings.pretty.max_value_width, - UTF8::computeWidth(reinterpret_cast(serialized_value.data()), serialized_value.size(), prefix))); - max_widths[i] = std::max(max_widths[i], widths[i][j]); + widths[i][j] = UTF8::computeWidth(reinterpret_cast(serialized_value.data()), serialized_value.size(), prefix); + max_padded_widths[i] = std::max(max_padded_widths[i], + std::min(format_settings.pretty.max_column_pad_width, + std::min(format_settings.pretty.max_value_width, widths[i][j]))); } /// And also calculate widths for names of columns. @@ -68,9 +68,9 @@ void PrettyBlockOutputFormat::calculateWidths( // name string doesn't contain Tab, no need to pass `prefix` name_widths[i] = std::min(format_settings.pretty.max_column_pad_width, UTF8::computeWidth(reinterpret_cast(elem.name.data()), elem.name.size())); - max_widths[i] = std::max(max_widths[i], name_widths[i]); + max_padded_widths[i] = std::max(max_padded_widths[i], name_widths[i]); } - prefix += max_widths[i] + 3; + prefix += max_padded_widths[i] + 3; } } @@ -175,18 +175,20 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) if (i != 0) writeString(middle_values_separator_s, out); - writeCString("│ ", out); + writeCString("│", out); for (size_t j = 0; j < num_columns; ++j) { if (j != 0) - writeCString(" │ ", out); + writeCString("│", out); const auto & type = *header.getByPosition(j).type; - writeValueWithPadding(*columns[j], type, i, widths[j].empty() ? max_widths[j] : widths[j][i], max_widths[j]); + writeValueWithPadding(*columns[j], type, i, + widths[j].empty() ? max_widths[j] : widths[j][i], + max_widths[j]); } - writeCString(" │\n", out); + writeCString("│\n", out); } writeString(bottom_separator_s, out); @@ -198,22 +200,33 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) void PrettyBlockOutputFormat::writeValueWithPadding( const IColumn & column, const IDataType & type, size_t row_num, size_t value_width, size_t pad_to_width) { - auto write_padding = [&]() + String serialized_value = " "; { - for (size_t k = 0; k < pad_to_width - value_width; ++k) - writeChar(' ', out); - }; - - String serialized_value; - { - WriteBufferFromString out_serialize(serialized_value); + WriteBufferFromString out_serialize(serialized_value, WriteBufferFromString::AppendModeTag()); type.serializeAsText(column, row_num, out_serialize, format_settings); } - if (serialized_value.size() > format_settings.pretty.max_value_width) + + if (value_width > format_settings.pretty.max_value_width) { - serialized_value.resize(format_settings.pretty.max_value_width); - serialized_value += "⋯"; + serialized_value.resize(UTF8::computeBytesBeforeWidth( + reinterpret_cast(serialized_value.data()), serialized_value.size(), 0, 1 + format_settings.pretty.max_value_width)); + + if (format_settings.pretty.color) + serialized_value += "\033[31;1m⋯\033[0m"; + else + serialized_value += "⋯"; + + value_width = format_settings.pretty.max_value_width; } + else + serialized_value += ' '; + + auto write_padding = [&]() + { + if (pad_to_width > value_width) + for (size_t k = 0; k < pad_to_width - value_width; ++k) + writeChar(' ', out); + }; if (type.shouldAlignRightInPrettyFormats()) { diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 4726332e554..e9040f672b4 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -83,19 +83,19 @@ void PrettyCompactBlockOutputFormat::writeRow( { size_t num_columns = max_widths.size(); - writeCString("│ ", out); + writeCString("│", out); for (size_t j = 0; j < num_columns; ++j) { if (j != 0) - writeCString(" │ ", out); + writeCString("│", out); const auto & type = *header.getByPosition(j).type; const auto & cur_widths = widths[j].empty() ? max_widths[j] : widths[j][row_num]; writeValueWithPadding(*columns[j], type, row_num, cur_widths, max_widths[j]); } - writeCString(" │\n", out); + writeCString("│\n", out); } void PrettyCompactBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index ca9d1b674d4..6b42ea57e1f 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -34,6 +34,8 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind { if (i != 0) writeCString(" ", out); + else + writeChar(' ', out); const ColumnWithTypeAndName & col = header.getByPosition(i); @@ -67,7 +69,7 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind for (size_t column = 0; column < num_columns; ++column) { if (column != 0) - writeCString(" ", out); + writeCString(" ", out); const auto & type = *header.getByPosition(column).type; auto & cur_width = widths[column].empty() ? max_widths[column] : widths[column][row]; diff --git a/tests/queries/0_stateless/01293_pretty_max_value_width.reference b/tests/queries/0_stateless/01293_pretty_max_value_width.reference new file mode 100644 index 00000000000..9887169f7af --- /dev/null +++ b/tests/queries/0_stateless/01293_pretty_max_value_width.reference @@ -0,0 +1,114 @@ +┏━━━━━━━━┳━━━━━┓ +┃ x  ┃ y  ┃ +┡━━━━━━━━╇━━━━━┩ +│ привет │ мир │ +└────────┴─────┘ +┏━━━━━━━┳━━━━━┓ +┃ x  ┃ y  ┃ +┡━━━━━━━╇━━━━━┩ +│ приве⋯│ мир │ +└───────┴─────┘ +┌─x─────┬─y───┐ +│ приве⋯│ мир │ +└───────┴─────┘ + x y + + приве⋯ мир +┏━━━━━━━┳━━━━━━━┓ +┃ x  ┃ y  ┃ +┡━━━━━━━╇━━━━━━━┩ +│ приве⋯│ мир │ +├───────┼───────┤ +│ мир │ приве⋯│ +└───────┴───────┘ +┌─x─────┬─y─────┐ +│ приве⋯│ мир │ +│ мир │ приве⋯│ +└───────┴───────┘ + x y + + приве⋯ мир + мир приве⋯ +┏━━━━━━━━┳━━━━━┓ +┃ x  ┃ y  ┃ +┡━━━━━━━━╇━━━━━┩ +│ привет │ мир │ +└────────┴─────┘ +┌─x──────┬─y───┐ +│ привет │ мир │ +└────────┴─────┘ + x y + + привет мир +┏━━━━━━━━┳━━━━━━━━┓ +┃ x  ┃ y  ┃ +┡━━━━━━━━╇━━━━━━━━┩ +│ привет │ мир │ +├────────┼────────┤ +│ мир │ привет │ +└────────┴────────┘ +┌─x──────┬─y──────┐ +│ привет │ мир │ +│ мир │ привет │ +└────────┴────────┘ + x y + + привет мир + мир привет +┏━━━┳━━━┓ +┃ x ┃ y ┃ +┡━━━╇━━━┩ +│ п⋯│ м⋯│ +└───┴───┘ +┌─x─┬─y─┐ +│ п⋯│ м⋯│ +└───┴───┘ + x y + + п⋯ м⋯ +┏━━━┳━━━┓ +┃ x ┃ y ┃ +┡━━━╇━━━┩ +│ п⋯│ м⋯│ +├───┼───┤ +│ м⋯│ п⋯│ +└───┴───┘ +┌─x─┬─y─┐ +│ п⋯│ м⋯│ +│ м⋯│ п⋯│ +└───┴───┘ + x y + + п⋯ м⋯ + м⋯ п⋯ +┏━━━┳━━━┓ +┃ x ┃ y ┃ +┡━━━╇━━━┩ +│ ⋯ │ ⋯ │ +└───┴───┘ +┌─x─┬─y─┐ +│ ⋯ │ ⋯ │ +└───┴───┘ + x y + + ⋯ ⋯ +┏━━━┳━━━┓ +┃ x ┃ y ┃ +┡━━━╇━━━┩ +│ ⋯ │ ⋯ │ +├───┼───┤ +│ ⋯ │ ⋯ │ +└───┴───┘ +┌─x─┬─y─┐ +│ ⋯ │ ⋯ │ +│ ⋯ │ ⋯ │ +└───┴───┘ + x y + + ⋯ ⋯ + ⋯ ⋯ +┏━━━┳━━━┓ +┃ x ┃ y ┃ +┡━━━╇━━━┩ +│ ⋯ │ ⋯ │ +└───┴───┘ diff --git a/tests/queries/0_stateless/01293_pretty_max_value_width.sql b/tests/queries/0_stateless/01293_pretty_max_value_width.sql new file mode 100644 index 00000000000..992aec06f0a --- /dev/null +++ b/tests/queries/0_stateless/01293_pretty_max_value_width.sql @@ -0,0 +1,43 @@ +SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; + +SET output_format_pretty_max_value_width = 5; +SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettyCompact; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettySpace; + +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT Pretty; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettyCompact; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettySpace; + +SET output_format_pretty_max_value_width = 6; + +SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettyCompact; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettySpace; + +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT Pretty; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettyCompact; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettySpace; + +SET output_format_pretty_max_value_width = 1; + +SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettyCompact; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettySpace; + +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT Pretty; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettyCompact; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettySpace; + +SET output_format_pretty_max_value_width = 0; + +SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettyCompact; +SELECT 'привет' AS x, 'мир' AS y FORMAT PrettySpace; + +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT Pretty; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettyCompact; +SELECT * FROM VALUES('x String, y String', ('привет', 'мир'), ('мир', 'привет')) FORMAT PrettySpace; + +SET output_format_pretty_color = 0; +SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty;