Merge pull request #49241 from evillique/decompress-filename

Add file name to exception raised during decompression
This commit is contained in:
Alexey Milovidov 2023-05-02 02:16:49 +03:00 committed by GitHub
commit 6f2ccd88b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 106 additions and 23 deletions

View File

@ -3,6 +3,7 @@
#if USE_BROTLI
# include <brotli/decode.h>
# include "BrotliReadBuffer.h"
# include <IO/WithFileName.h>
namespace DB
{
@ -60,7 +61,10 @@ bool BrotliReadBuffer::nextImpl()
if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in->eof()))
{
throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error");
throw Exception(
ErrorCodes::BROTLI_READ_FAILED,
"brotli decode error{}",
getExceptionEntryWithFileName(*in));
}
out_capacity = internal_buffer.size();
@ -83,13 +87,19 @@ bool BrotliReadBuffer::nextImpl()
}
else
{
throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error");
throw Exception(
ErrorCodes::BROTLI_READ_FAILED,
"brotli decode error{}",
getExceptionEntryWithFileName(*in));
}
}
if (brotli->result == BROTLI_DECODER_RESULT_ERROR)
{
throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error");
throw Exception(
ErrorCodes::BROTLI_READ_FAILED,
"brotli decode error{}",
getExceptionEntryWithFileName(*in));
}
return true;

View File

@ -3,6 +3,7 @@
#if USE_BZIP2
# include <IO/Bzip2ReadBuffer.h>
# include <bzlib.h>
# include <IO/WithFileName.h>
namespace DB
{
@ -118,13 +119,17 @@ bool Bzip2ReadBuffer::nextImpl()
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream decoder failed: error code: {}",
ret);
"bzip2 stream decoder failed: error code: {}{}",
ret,
getExceptionEntryWithFileName(*in));
if (in->eof())
{
eof_flag = true;
throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive");
throw Exception(
ErrorCodes::UNEXPECTED_END_OF_FILE,
"Unexpected end of bzip2 archive{}",
getExceptionEntryWithFileName(*in));
}
return true;

View File

@ -11,6 +11,8 @@
#include "HadoopSnappyReadBuffer.h"
#include <IO/WithFileName.h>
namespace DB
{
namespace ErrorCodes
@ -89,9 +91,8 @@ inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(siz
{
auto status = readLength(avail_in, next_in, &compressed_length);
if (unlikely(compressed_length > 0 && static_cast<size_t>(compressed_length) > sizeof(buffer)))
throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED,
"Too large snappy compressed block. buffer size: {}, compressed block size: {}",
sizeof(buffer), compressed_length);
return Status::TOO_LARGE_COMPRESSED_BLOCK;
return status;
}
return Status::OK;
@ -196,7 +197,11 @@ bool HadoopSnappyReadBuffer::nextImpl()
if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof()))
{
throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result));
throw Exception(
ErrorCodes::SNAPPY_UNCOMPRESS_FAILED,
"hadoop snappy decode error: {}{}",
statusToString(decoder->result),
getExceptionEntryWithFileName(*in));
}
out_capacity = internal_buffer.size();
@ -219,9 +224,13 @@ bool HadoopSnappyReadBuffer::nextImpl()
}
return true;
}
else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL)
else if (decoder->result != Status::NEEDS_MORE_INPUT)
{
throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result));
throw Exception(
ErrorCodes::SNAPPY_UNCOMPRESS_FAILED,
"hadoop snappy decode error: {}{}",
statusToString(decoder->result),
getExceptionEntryWithFileName(*in));
}
return true;
}

View File

@ -29,6 +29,7 @@ public:
INVALID_INPUT = 1,
BUFFER_TOO_SMALL = 2,
NEEDS_MORE_INPUT = 3,
TOO_LARGE_COMPRESSED_BLOCK = 4,
};
HadoopSnappyDecoder() = default;
@ -84,6 +85,8 @@ public:
return "BUFFER_TOO_SMALL";
case Status::NEEDS_MORE_INPUT:
return "NEEDS_MORE_INPUT";
case Status::TOO_LARGE_COMPRESSED_BLOCK:
return "TOO_LARGE_COMPRESSED_BLOCK";
}
UNREACHABLE();
}

View File

@ -1,4 +1,5 @@
#include <IO/LZMAInflatingReadBuffer.h>
#include <IO/WithFileName.h>
namespace DB
{
@ -78,18 +79,20 @@ bool LZMAInflatingReadBuffer::nextImpl()
{
throw Exception(
ErrorCodes::LZMA_STREAM_DECODER_FAILED,
"lzma decoder finished, but input stream has not exceeded: error code: {}; lzma version: {}",
"lzma decoder finished, but input stream has not exceeded: error code: {}; lzma version: {}{}",
ret,
LZMA_VERSION_STRING);
LZMA_VERSION_STRING,
getExceptionEntryWithFileName(*in));
}
}
if (ret != LZMA_OK)
throw Exception(
ErrorCodes::LZMA_STREAM_DECODER_FAILED,
"lzma_stream_decoder failed: error code: error codeL {}; lzma version: {}",
"lzma_stream_decoder failed: error code: error code {}; lzma version: {}{}",
ret,
LZMA_VERSION_STRING);
LZMA_VERSION_STRING,
getExceptionEntryWithFileName(*in));
return true;
}

View File

@ -1,4 +1,5 @@
#include <IO/Lz4InflatingReadBuffer.h>
#include <IO/WithFileName.h>
namespace DB
{
@ -72,9 +73,10 @@ bool Lz4InflatingReadBuffer::nextImpl()
if (LZ4F_isError(ret))
throw Exception(
ErrorCodes::LZ4_DECODER_FAILED,
"LZ4 decompression failed. LZ4F version: {}. Error: {}",
"LZ4 decompression failed. LZ4F version: {}. Error: {}{}",
LZ4F_VERSION,
LZ4F_getErrorName(ret));
LZ4F_getErrorName(ret),
getExceptionEntryWithFileName(*in));
if (in->eof())
{

View File

@ -26,4 +26,14 @@ String getFileNameFromReadBuffer(const ReadBuffer & in)
return getFileName(in);
}
String getExceptionEntryWithFileName(const ReadBuffer & in)
{
auto filename = getFileNameFromReadBuffer(in);
if (filename.empty())
return "";
return fmt::format("; While reading from: {}", filename);
}
}

View File

@ -14,5 +14,6 @@ public:
};
String getFileNameFromReadBuffer(const ReadBuffer & in);
String getExceptionEntryWithFileName(const ReadBuffer & in);
}

View File

@ -1,5 +1,5 @@
#include <IO/ZlibInflatingReadBuffer.h>
#include <IO/WithFileName.h>
namespace DB
{
@ -99,14 +99,22 @@ bool ZlibInflatingReadBuffer::nextImpl()
{
rc = inflateReset(&zstr);
if (rc != Z_OK)
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
throw Exception(
ErrorCodes::ZLIB_INFLATE_FAILED,
"inflateReset failed: {}{}",
zError(rc),
getExceptionEntryWithFileName(*in));
return true;
}
}
/// If it is not end and not OK, something went wrong, throw exception
if (rc != Z_OK)
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc));
throw Exception(
ErrorCodes::ZLIB_INFLATE_FAILED,
"inflate failed: {}{}",
zError(rc),
getExceptionEntryWithFileName(*in));
}
while (working_buffer.empty());

View File

@ -1,4 +1,5 @@
#include <IO/ZstdInflatingReadBuffer.h>
#include <IO/WithFileName.h>
#include <zstd_errors.h>
@ -61,12 +62,13 @@ bool ZstdInflatingReadBuffer::nextImpl()
{
throw Exception(
ErrorCodes::ZSTD_DECODER_FAILED,
"ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}",
"ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}{}",
ZSTD_getErrorName(ret),
ZSTD_error_frameParameter_windowTooLarge == ret
? ". You can increase the maximum window size with the 'zstd_window_log_max' setting in ClickHouse. Example: 'SET zstd_window_log_max = 31'"
: "",
ZSTD_VERSION_STRING);
ZSTD_VERSION_STRING,
getExceptionEntryWithFileName(*in));
}
/// Check that something has changed after decompress (input or output position)

View File

@ -0,0 +1,8 @@
Ok
Ok
Ok
Ok
Ok
Ok
Ok
Ok

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Tags: no-fasttest, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx"
echo 'corrupted file' > $FILENAME;
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'gzip')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'deflate')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'br')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'xz')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'zstd')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'lz4')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'bz2')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'snappy')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
rm $FILENAME;