mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #38907 from evillique/compression_level
Add compression level for data export
This commit is contained in:
commit
74d02aeca7
@ -25,7 +25,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[SETTINGS ...]
|
||||
[UNION ...]
|
||||
[INTO OUTFILE filename [COMPRESSION type] ]
|
||||
[INTO OUTFILE filename [COMPRESSION type [LEVEL level]] ]
|
||||
[FORMAT format]
|
||||
```
|
||||
|
||||
|
@ -6,16 +6,18 @@ sidebar_label: INTO OUTFILE
|
||||
|
||||
`INTO OUTFILE` clause redirects the result of a `SELECT` query to a file on the **client** side.
|
||||
|
||||
Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause.
|
||||
Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause. The compression level for a certain compression type can be specified in a `LEVEL` clause.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
|
||||
```
|
||||
|
||||
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
|
||||
|
||||
`level` is a numeric literal. Positive integers in following ranges are supported: `1-12` for `lz4` type, `1-22` for `zstd` type and `1-9` for other compression types.
|
||||
|
||||
## Implementation Details
|
||||
|
||||
- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail.
|
||||
|
@ -24,7 +24,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[SETTINGS ...]
|
||||
[UNION ALL ...]
|
||||
[INTO OUTFILE filename [COMPRESSION type] ]
|
||||
[INTO OUTFILE filename [COMPRESSION type [LEVEL level]] ]
|
||||
[FORMAT format]
|
||||
```
|
||||
|
||||
|
@ -6,16 +6,18 @@ sidebar_label: INTO OUTFILE
|
||||
|
||||
Секция `INTO OUTFILE` перенаправляет результат запроса `SELECT` в файл на стороне **клиента**.
|
||||
|
||||
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`.
|
||||
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`. Уровень сжатия для конкретного алгоритма может быть задан в секции `LEVEL`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
|
||||
```
|
||||
|
||||
`file_name` и `type` задаются в виде строковых литералов. Поддерживаются форматы сжатия: `'none`', `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
|
||||
|
||||
`level` задается в виде числового литерала. Поддерживаются положительные значения в следующих диапазонах: `1-12` для формата `lz4`, `1-22` для формата `zstd` и `1-9` для остальных форматов.
|
||||
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
- Эта функция доступна только в следующих интерфейсах: [клиент командной строки](../../../interfaces/cli.md) и [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Таким образом, запрос, отправленный через [HTTP интерфейс](../../../interfaces/http.md) вернет ошибку.
|
||||
|
@ -524,17 +524,35 @@ try
|
||||
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
|
||||
out_file = out_file_node.value.safeGet<std::string>();
|
||||
|
||||
std::string compression_method;
|
||||
std::string compression_method_string;
|
||||
|
||||
if (query_with_output->compression)
|
||||
{
|
||||
const auto & compression_method_node = query_with_output->compression->as<ASTLiteral &>();
|
||||
compression_method = compression_method_node.value.safeGet<std::string>();
|
||||
compression_method_string = compression_method_node.value.safeGet<std::string>();
|
||||
}
|
||||
|
||||
CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string);
|
||||
UInt64 compression_level = 3;
|
||||
|
||||
if (query_with_output->compression_level)
|
||||
{
|
||||
const auto & compression_level_node = query_with_output->compression_level->as<ASTLiteral &>();
|
||||
bool res = compression_level_node.value.tryGet<UInt64>(compression_level);
|
||||
auto range = getCompressionLevelRange(compression_method);
|
||||
|
||||
if (!res || compression_level < range.first || compression_level > range.second)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid compression level, must be positive integer in range {}-{}",
|
||||
range.first,
|
||||
range.second);
|
||||
}
|
||||
|
||||
out_file_buf = wrapWriteBufferWithCompressionMethod(
|
||||
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
|
||||
chooseCompressionMethod(out_file, compression_method),
|
||||
/* compression level = */ 3
|
||||
compression_method,
|
||||
compression_level
|
||||
);
|
||||
|
||||
// We are writing to file, so default format is the same as in non-interactive mode.
|
||||
|
@ -98,6 +98,19 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
std::pair<uint64_t, uint64_t> getCompressionLevelRange(const CompressionMethod & method)
|
||||
{
|
||||
switch (method)
|
||||
{
|
||||
case CompressionMethod::Zstd:
|
||||
return {1, 22};
|
||||
case CompressionMethod::Lz4:
|
||||
return {1, 12};
|
||||
default:
|
||||
return {1, 9};
|
||||
}
|
||||
}
|
||||
|
||||
static std::unique_ptr<CompressedReadBufferWrapper> createCompressedWrapper(
|
||||
std::unique_ptr<ReadBuffer> nested, CompressionMethod method, size_t buf_size, char * existing_memory, size_t alignment, int zstd_window_log_max)
|
||||
{
|
||||
|
@ -46,6 +46,9 @@ std::string toContentEncodingName(CompressionMethod method);
|
||||
*/
|
||||
CompressionMethod chooseCompressionMethod(const std::string & path, const std::string & hint);
|
||||
|
||||
/// Get a range of the valid compression levels for the compression method.
|
||||
std::pair<uint64_t, uint64_t> getCompressionLevelRange(const CompressionMethod & method);
|
||||
|
||||
std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
|
||||
std::unique_ptr<ReadBuffer> nested,
|
||||
CompressionMethod method,
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
ASTPtr format;
|
||||
ASTPtr settings_ast;
|
||||
ASTPtr compression;
|
||||
ASTPtr compression_level;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const final;
|
||||
|
||||
|
@ -96,6 +96,14 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
|
||||
ParserStringLiteral compression;
|
||||
if (!compression.parse(pos, query_with_output.compression, expected))
|
||||
return false;
|
||||
|
||||
ParserKeyword s_compression_level("LEVEL");
|
||||
if (s_compression_level.ignore(pos, expected))
|
||||
{
|
||||
ParserNumber compression_level;
|
||||
if (!compression_level.parse(pos, query_with_output.compression_level, expected))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
query_with_output.children.push_back(query_with_output.out_file);
|
||||
|
18
tests/queries/0_stateless/02353_compression_level.reference
Normal file
18
tests/queries/0_stateless/02353_compression_level.reference
Normal file
@ -0,0 +1,18 @@
|
||||
6000 5999 13
|
||||
6000 5999 13
|
||||
Ok
|
||||
6000 5999 13
|
||||
6000 5999 13
|
||||
Ok
|
||||
6000 5999 13
|
||||
6000 5999 13
|
||||
Ok
|
||||
6000 5999 13
|
||||
6000 5999 13
|
||||
Ok
|
||||
6000 5999 13
|
||||
6000 5999 13
|
||||
Ok
|
||||
6000 5999 13
|
||||
6000 5999 13
|
||||
Ok
|
34
tests/queries/0_stateless/02353_compression_level.sh
Executable file
34
tests/queries/0_stateless/02353_compression_level.sh
Executable file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, no-parallel
|
||||
# Tag no-fasttest: depends on brotli and bzip2
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
WORKING_FOLDER_02353="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}"
|
||||
|
||||
rm -rf "${WORKING_FOLDER_02353}"
|
||||
mkdir "${WORKING_FOLDER_02353}"
|
||||
|
||||
for m in gz br xz zst lz4 bz2
|
||||
do
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT number, 'Hello, world!' FROM numbers(6000) INTO OUTFILE '${WORKING_FOLDER_02353}/${m}_1.${m}' COMPRESSION '${m}' LEVEL 1"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT number, 'Hello, world!' FROM numbers(6000) INTO OUTFILE '${WORKING_FOLDER_02353}/${m}_9.${m}' COMPRESSION '${m}' LEVEL 9"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x), avg(length(s)) FROM file('${WORKING_FOLDER_02353}/${m}_1.${m}', 'TabSeparated', 'x UInt32, s String')"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x), avg(length(s)) FROM file('${WORKING_FOLDER_02353}/${m}_9.${m}', 'TabSeparated', 'x UInt32, s String')"
|
||||
|
||||
level_1=$(du -b ${WORKING_FOLDER_02353}/${m}_1.${m} | awk '{print $1}')
|
||||
level_9=$(du -b ${WORKING_FOLDER_02353}/${m}_9.${m} | awk '{print $1}')
|
||||
|
||||
if [ "$level_1" != "$level_9" ]; then
|
||||
echo "Ok"
|
||||
else
|
||||
echo "Failed"
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
rm -rf "${WORKING_FOLDER_02353}"
|
Loading…
Reference in New Issue
Block a user