diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index b05fb2ba334..e039548e50a 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -25,7 +25,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list [LIMIT [n, ]m] [WITH TIES] [SETTINGS ...] [UNION ...] -[INTO OUTFILE filename [COMPRESSION type] ] +[INTO OUTFILE filename [COMPRESSION type [LEVEL level]] ] [FORMAT format] ``` diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index db1ed2551a7..f101c10ff60 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -6,16 +6,18 @@ sidebar_label: INTO OUTFILE `INTO OUTFILE` clause redirects the result of a `SELECT` query to a file on the **client** side. -Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause. +Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause. The compression level for a certain compression type can be specified in a `LEVEL` clause. **Syntax** ```sql -SELECT INTO OUTFILE file_name [COMPRESSION type] +SELECT INTO OUTFILE file_name [COMPRESSION type [LEVEL level]] ``` `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. +`level` is a numeric literal. Positive integers in following ranges are supported: `1-12` for `lz4` type, `1-22` for `zstd` type and `1-9` for other compression types. + ## Implementation Details - This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index 6222efe5dd1..1edf93faeaa 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -24,7 +24,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list [LIMIT [n, ]m] [WITH TIES] [SETTINGS ...] [UNION ALL ...] -[INTO OUTFILE filename [COMPRESSION type] ] +[INTO OUTFILE filename [COMPRESSION type [LEVEL level]] ] [FORMAT format] ``` diff --git a/docs/ru/sql-reference/statements/select/into-outfile.md b/docs/ru/sql-reference/statements/select/into-outfile.md index 2952ef2a3a2..81d48badbe6 100644 --- a/docs/ru/sql-reference/statements/select/into-outfile.md +++ b/docs/ru/sql-reference/statements/select/into-outfile.md @@ -6,16 +6,18 @@ sidebar_label: INTO OUTFILE Секция `INTO OUTFILE` перенаправляет результат запроса `SELECT` в файл на стороне **клиента**. -Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`. +Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`. Уровень сжатия для конкретного алгоритма может быть задан в секции `LEVEL`. **Синтаксис** ```sql -SELECT INTO OUTFILE file_name [COMPRESSION type] +SELECT INTO OUTFILE file_name [COMPRESSION type [LEVEL level]] ``` `file_name` и `type` задаются в виде строковых литералов. Поддерживаются форматы сжатия: `'none`', `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. +`level` задается в виде числового литерала. Поддерживаются положительные значения в следующих диапазонах: `1-12` для формата `lz4`, `1-22` для формата `zstd` и `1-9` для остальных форматов. + ## Детали реализации {#implementation-details} - Эта функция доступна только в следующих интерфейсах: [клиент командной строки](../../../interfaces/cli.md) и [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Таким образом, запрос, отправленный через [HTTP интерфейс](../../../interfaces/http.md) вернет ошибку. diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 8230c97f49c..0e243f97aaf 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -524,17 +524,35 @@ try const auto & out_file_node = query_with_output->out_file->as(); out_file = out_file_node.value.safeGet(); - std::string compression_method; + std::string compression_method_string; + if (query_with_output->compression) { const auto & compression_method_node = query_with_output->compression->as(); - compression_method = compression_method_node.value.safeGet(); + compression_method_string = compression_method_node.value.safeGet(); + } + + CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string); + UInt64 compression_level = 3; + + if (query_with_output->compression_level) + { + const auto & compression_level_node = query_with_output->compression_level->as(); + bool res = compression_level_node.value.tryGet(compression_level); + auto range = getCompressionLevelRange(compression_method); + + if (!res || compression_level < range.first || compression_level > range.second) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid compression level, must be positive integer in range {}-{}", + range.first, + range.second); } out_file_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT), - chooseCompressionMethod(out_file, compression_method), - /* compression level = */ 3 + compression_method, + compression_level ); // We are writing to file, so default format is the same as in non-interactive mode. diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index 0da235c074c..449335407aa 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -98,6 +98,19 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s ErrorCodes::NOT_IMPLEMENTED); } +std::pair getCompressionLevelRange(const CompressionMethod & method) +{ + switch (method) + { + case CompressionMethod::Zstd: + return {1, 22}; + case CompressionMethod::Lz4: + return {1, 12}; + default: + return {1, 9}; + } +} + static std::unique_ptr createCompressedWrapper( std::unique_ptr nested, CompressionMethod method, size_t buf_size, char * existing_memory, size_t alignment, int zstd_window_log_max) { diff --git a/src/IO/CompressionMethod.h b/src/IO/CompressionMethod.h index a399a756c13..29a18e5e978 100644 --- a/src/IO/CompressionMethod.h +++ b/src/IO/CompressionMethod.h @@ -46,6 +46,9 @@ std::string toContentEncodingName(CompressionMethod method); */ CompressionMethod chooseCompressionMethod(const std::string & path, const std::string & hint); +/// Get a range of the valid compression levels for the compression method. +std::pair getCompressionLevelRange(const CompressionMethod & method); + std::unique_ptr wrapReadBufferWithCompressionMethod( std::unique_ptr nested, CompressionMethod method, diff --git a/src/Parsers/ASTQueryWithOutput.h b/src/Parsers/ASTQueryWithOutput.h index 10852467a76..a34826d128c 100644 --- a/src/Parsers/ASTQueryWithOutput.h +++ b/src/Parsers/ASTQueryWithOutput.h @@ -18,6 +18,7 @@ public: ASTPtr format; ASTPtr settings_ast; ASTPtr compression; + ASTPtr compression_level; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const final; diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index a6af19fc670..79203c6d3d1 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -96,6 +96,14 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserStringLiteral compression; if (!compression.parse(pos, query_with_output.compression, expected)) return false; + + ParserKeyword s_compression_level("LEVEL"); + if (s_compression_level.ignore(pos, expected)) + { + ParserNumber compression_level; + if (!compression_level.parse(pos, query_with_output.compression_level, expected)) + return false; + } } query_with_output.children.push_back(query_with_output.out_file); diff --git a/tests/queries/0_stateless/02353_compression_level.reference b/tests/queries/0_stateless/02353_compression_level.reference new file mode 100644 index 00000000000..0ab28a4efa5 --- /dev/null +++ b/tests/queries/0_stateless/02353_compression_level.reference @@ -0,0 +1,18 @@ +6000 5999 13 +6000 5999 13 +Ok +6000 5999 13 +6000 5999 13 +Ok +6000 5999 13 +6000 5999 13 +Ok +6000 5999 13 +6000 5999 13 +Ok +6000 5999 13 +6000 5999 13 +Ok +6000 5999 13 +6000 5999 13 +Ok diff --git a/tests/queries/0_stateless/02353_compression_level.sh b/tests/queries/0_stateless/02353_compression_level.sh new file mode 100755 index 00000000000..b08dc1e204c --- /dev/null +++ b/tests/queries/0_stateless/02353_compression_level.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel +# Tag no-fasttest: depends on brotli and bzip2 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +WORKING_FOLDER_02353="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" + +rm -rf "${WORKING_FOLDER_02353}" +mkdir "${WORKING_FOLDER_02353}" + +for m in gz br xz zst lz4 bz2 +do + ${CLICKHOUSE_CLIENT} --query "SELECT number, 'Hello, world!' FROM numbers(6000) INTO OUTFILE '${WORKING_FOLDER_02353}/${m}_1.${m}' COMPRESSION '${m}' LEVEL 1" + ${CLICKHOUSE_CLIENT} --query "SELECT number, 'Hello, world!' FROM numbers(6000) INTO OUTFILE '${WORKING_FOLDER_02353}/${m}_9.${m}' COMPRESSION '${m}' LEVEL 9" + + ${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x), avg(length(s)) FROM file('${WORKING_FOLDER_02353}/${m}_1.${m}', 'TabSeparated', 'x UInt32, s String')" + ${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x), avg(length(s)) FROM file('${WORKING_FOLDER_02353}/${m}_9.${m}', 'TabSeparated', 'x UInt32, s String')" + + level_1=$(du -b ${WORKING_FOLDER_02353}/${m}_1.${m} | awk '{print $1}') + level_9=$(du -b ${WORKING_FOLDER_02353}/${m}_9.${m} | awk '{print $1}') + + if [ "$level_1" != "$level_9" ]; then + echo "Ok" + else + echo "Failed" + fi + +done + +rm -rf "${WORKING_FOLDER_02353}"