mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #42046 from evillique/fix-bz2-decoding
Fix bzip2 decoding issue
This commit is contained in:
commit
16a8145deb
@ -35,6 +35,33 @@ public:
|
||||
BZ2_bzDecompressEnd(&stream);
|
||||
}
|
||||
|
||||
void reinitialize()
|
||||
{
|
||||
auto avail_out = stream.avail_out;
|
||||
auto * next_out = stream.next_out;
|
||||
|
||||
int ret = BZ2_bzDecompressEnd(&stream);
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 stream encoder reinit decompress end failed: error code: {}",
|
||||
ret);
|
||||
|
||||
memset(&stream, 0, sizeof(bz->stream));
|
||||
|
||||
ret = BZ2_bzDecompressInit(&stream, 0, 0);
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 stream encoder reinit failed: error code: {}",
|
||||
ret);
|
||||
|
||||
stream.avail_out = avail_out;
|
||||
stream.next_out = next_out;
|
||||
}
|
||||
|
||||
bz_stream stream;
|
||||
};
|
||||
|
||||
@ -68,24 +95,24 @@ bool Bzip2ReadBuffer::nextImpl()
|
||||
ret = BZ2_bzDecompress(&bz->stream);
|
||||
|
||||
in->position() = in->buffer().end() - bz->stream.avail_in;
|
||||
|
||||
if (ret == BZ_STREAM_END && !in->eof())
|
||||
{
|
||||
bz->reinitialize();
|
||||
bz->stream.avail_in = in->buffer().end() - in->position();
|
||||
bz->stream.next_in = in->position();
|
||||
|
||||
ret = BZ_OK;
|
||||
}
|
||||
}
|
||||
while (bz->stream.avail_out == internal_buffer.size() && ret == BZ_OK && !in->eof());
|
||||
|
||||
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
|
||||
|
||||
if (ret == BZ_STREAM_END)
|
||||
if (ret == BZ_STREAM_END && in->eof())
|
||||
{
|
||||
if (in->eof())
|
||||
{
|
||||
eof_flag = true;
|
||||
return !working_buffer.empty();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
|
||||
}
|
||||
eof_flag = true;
|
||||
return !working_buffer.empty();
|
||||
}
|
||||
|
||||
if (ret != BZ_OK)
|
||||
|
@ -123,3 +123,4 @@ Hello, world
|
||||
Hello, world
|
||||
0
|
||||
Part1 Part2
|
||||
Part1 Part2
|
||||
|
@ -51,5 +51,6 @@ echo "'Hello, world'" | bzip2 -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'C
|
||||
|
||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c;
|
||||
|
||||
# POST multiple concatenated gzip streams.
|
||||
# POST multiple concatenated gzip and bzip2 streams.
|
||||
(echo -n "SELECT 'Part1" | gzip -c; echo " Part2'" | gzip -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}" --data-binary @-
|
||||
(echo -n "SELECT 'Part1" | bzip2 -c; echo " Part2'" | bzip2 -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: bz2' "${CLICKHOUSE_URL}" --data-binary @-
|
||||
|
@ -0,0 +1,4 @@
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
21
tests/queries/0_stateless/02457_bz2_concatenated.sh
Executable file
21
tests/queries/0_stateless/02457_bz2_concatenated.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
# Tag no-fasttest: depends on bzip2
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
WORKING_FOLDER_02457="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}"
|
||||
|
||||
rm -rf "${WORKING_FOLDER_02457}"
|
||||
mkdir "${WORKING_FOLDER_02457}"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM numbers(0, 2) INTO OUTFILE '${WORKING_FOLDER_02457}/file_1.bz2'"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM numbers(2, 2) INTO OUTFILE '${WORKING_FOLDER_02457}/file_2.bz2'"
|
||||
cat ${WORKING_FOLDER_02457}/file_1.bz2 ${WORKING_FOLDER_02457}/file_2.bz2 > ${WORKING_FOLDER_02457}/concatenated.bz2
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('${WORKING_FOLDER_02457}/concatenated.bz2', 'TabSeparated', 'col Int64')"
|
||||
|
||||
rm -rf "${WORKING_FOLDER_02457}"
|
Loading…
Reference in New Issue
Block a user