diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md index 3e4295598aa..879bf9d71ac 100644 --- a/docs/en/operations/utilities/clickhouse-format.md +++ b/docs/en/operations/utilities/clickhouse-format.md @@ -11,6 +11,8 @@ Keys: - `--query` — Format queries of any length and complexity. - `--hilite` — Add syntax highlight with ANSI terminal escape sequences. - `--oneline` — Format in single line. +- `--max_line_length` — Format in single line queries with length less than specified. +- `--comments` — Keep comments in the output. - `--quiet` or `-q` — Just check syntax, no output on success. - `--multiquery` or `-n` — Allow multiple queries in the same file. - `--obfuscate` — Obfuscate instead of formatting. diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 05ba86069d7..f7385335de9 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -30,21 +30,49 @@ #include #include +#include +#include + +namespace DB::ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace +{ + +void skipSpacesAndComments(const char*& pos, const char* end, bool print_comments) +{ + do + { + /// skip spaces to avoid throw exception after last query + while (pos != end && std::isspace(*pos)) + ++pos; + + const char * comment_begin = pos; + /// for skip comment after the last query and to not throw exception + if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-') + { + pos += 2; + /// skip until the end of the line + while (pos != end && *pos != '\n') + ++pos; + if (print_comments) + std::cout << std::string_view(comment_begin, pos - comment_begin) << "\n"; + } + /// need to parse next sql + else + break; + } while (pos != end); +} + +} #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" extern const char * auto_time_zones[]; - -namespace DB -{ -namespace ErrorCodes -{ -extern const int INVALID_FORMAT_INSERT_QUERY_WITH_DATA; -} -} - int mainEntryClickHouseFormat(int argc, char ** argv) { using namespace DB; @@ -55,8 +83,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) desc.add_options() ("query", po::value(), "query to format") ("help,h", "produce help message") + ("comments", "keep comments in the output") ("hilite", "add syntax highlight with ANSI terminal escape sequences") ("oneline", "format in single line") + ("max_line_length", po::value()->default_value(0), "format in single line queries with length less than specified") ("quiet,q", "just check syntax, no output on success") ("multiquery,n", "allow multiple queries in the same file") ("obfuscate", "obfuscate instead of formatting") @@ -88,6 +118,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) bool oneline = options.count("oneline"); bool quiet = options.count("quiet"); bool multiple = options.count("multiquery"); + bool print_comments = options.count("comments"); + size_t max_line_length = options["max_line_length"].as(); bool obfuscate = options.count("obfuscate"); bool backslash = options.count("backslash"); bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert"); @@ -104,6 +136,19 @@ int mainEntryClickHouseFormat(int argc, char ** argv) return 2; } + if (oneline && max_line_length) + { + std::cerr << "Options 'oneline' and 'max_line_length' are mutually exclusive." << std::endl; + return 2; + } + + if (max_line_length > 255) + { + std::cerr << "Option 'max_line_length' must be less than 256." << std::endl; + return 2; + } + + String query; if (options.count("query")) @@ -124,7 +169,6 @@ int mainEntryClickHouseFormat(int argc, char ** argv) if (options.count("seed")) { - std::string seed; hash_func.update(options["seed"].as()); } @@ -179,30 +223,68 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { const char * pos = query.data(); const char * end = pos + query.size(); + skipSpacesAndComments(pos, end, print_comments); ParserQuery parser(end, allow_settings_after_format_in_insert); - do + while (pos != end) { + size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos; + ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); - /// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure, - /// we should throw an exception early, and make exception message more readable. - if (const auto * insert_query = res->as(); insert_query && insert_query->data) + std::unique_ptr insert_query_payload = nullptr; + /// If the query is INSERT ... VALUES, then we will try to parse the data. + if (auto * insert_query = res->as(); insert_query && insert_query->data) { - throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA, - "Can't format ASTInsertQuery with data, since data will be lost"); + if ("Values" != insert_query->format) + throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, + "Can't format INSERT query with data format '{}'", insert_query->format); + + /// We assume that data ends with a newline character (same as client does) + const char * this_query_end = find_first_symbols<'\n'>(insert_query->data, end); + insert_query->end = this_query_end; + pos = this_query_end; + insert_query_payload = getReadBufferFromASTInsertQuery(res); } if (!quiet) { if (!backslash) { - WriteBufferFromOStream res_buf(std::cout, 4096); - formatAST(*res, res_buf, hilite, oneline); - res_buf.finalize(); - if (multiple) - std::cout << "\n;\n"; + WriteBufferFromOwnString str_buf; + formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length); + + if (insert_query_payload) + { + str_buf.write(' '); + copyData(*insert_query_payload, str_buf); + if (multiple) + str_buf.write('\n'); + } + + String res_string = str_buf.str(); + const char * s_pos = res_string.data(); + const char * s_end = s_pos + res_string.size(); + WriteBufferFromOStream res_cout(std::cout, 4096); + /// For multiline queries we print ';' at new line, + /// but for single line queries we print ';' at the same line + bool has_multiple_lines = false; + while (s_pos != s_end) + { + if (*s_pos == '\n') + has_multiple_lines = true; + res_cout.write(*s_pos++); + } + res_cout.finalize(); + + if (multiple && !insert_query_payload) + { + if (oneline || !has_multiple_lines) + std::cout << ";\n"; + else + std::cout << "\n;\n"; + } std::cout << std::endl; } /// add additional '\' at the end of each line; @@ -230,27 +312,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) std::cout << std::endl; } } - - do - { - /// skip spaces to avoid throw exception after last query - while (pos != end && std::isspace(*pos)) - ++pos; - - /// for skip comment after the last query and to not throw exception - if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-') - { - pos += 2; - /// skip until the end of the line - while (pos != end && *pos != '\n') - ++pos; - } - /// need to parse next sql - else - break; - } while (pos != end); - - } while (multiple && pos != end); + skipSpacesAndComments(pos, end, print_comments); + if (!multiple) + break; + } } } catch (...) diff --git a/tests/queries/0_stateless/02946_format_values.reference b/tests/queries/0_stateless/02946_format_values.reference new file mode 100644 index 00000000000..8f896fd3ba0 --- /dev/null +++ b/tests/queries/0_stateless/02946_format_values.reference @@ -0,0 +1,139 @@ +INSERT INTO table1 FORMAT Values (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td') +====================================== +SELECT a +FROM table1 +; + +INSERT INTO table1 FORMAT Values (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td'); + +SELECT b +FROM table1 +; + +====================================== +-- begin +SELECT a +FROM table1 +; + +-- some insert query +INSERT INTO table1 FORMAT Values (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td'); + +-- more comments +-- in a row +SELECT b +FROM table1 +; + +-- end +====================================== +SELECT b FROM table1; + +SELECT b, c FROM table1; + +SELECT + b, + c, + d +FROM table1 +; + +SELECT + b, + c, + d, + e +FROM table1 +; + +SELECT + b, + c, + d, + e, + f +FROM table1 +; + +SELECT + b, + c +FROM +( + SELECT + b, + c + FROM table1 +) +; + +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +; + +====================================== +SELECT b FROM table1; + +SELECT b, c FROM table1; + +SELECT b, c, d FROM table1; + +SELECT b, c, d, e FROM table1; + +SELECT b, c, d, e, f FROM table1; + +SELECT b, c FROM (SELECT b, c FROM table1); + +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +; + +====================================== +SELECT + b, + c, + d, + e, + f +FROM +( + SELECT + b, + c, + d, + e, + f + FROM table1 +) +SELECT b, c, d, e, f FROM (SELECT b, c, d, e, f FROM table1) +====================================== +BAD_ARGUMENTS +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/02946_format_values.sh b/tests/queries/0_stateless/02946_format_values.sh new file mode 100755 index 00000000000..cd44d31d1a1 --- /dev/null +++ b/tests/queries/0_stateless/02946_format_values.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "insert into table1 values (1, [1,3], 'fd'), (2, [2,4], 'sd'), (3, [3,5], 'td')" | ${CLICKHOUSE_FORMAT} + +echo "======================================" + +cat <&1 | grep -o 'BAD_ARGUMENTS' ||: +echo "select 1" | ${CLICKHOUSE_FORMAT} --comments --max_line_length=120 --oneline 2>&1 | grep -o 'BAD_ARGUMENTS' ||: