mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #42046 from evillique/fix-bz2-decoding
Fix bzip2 decoding issue
This commit is contained in:
commit
16a8145deb
@ -35,6 +35,33 @@ public:
|
|||||||
BZ2_bzDecompressEnd(&stream);
|
BZ2_bzDecompressEnd(&stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void reinitialize()
|
||||||
|
{
|
||||||
|
auto avail_out = stream.avail_out;
|
||||||
|
auto * next_out = stream.next_out;
|
||||||
|
|
||||||
|
int ret = BZ2_bzDecompressEnd(&stream);
|
||||||
|
|
||||||
|
if (ret != BZ_OK)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||||
|
"bzip2 stream encoder reinit decompress end failed: error code: {}",
|
||||||
|
ret);
|
||||||
|
|
||||||
|
memset(&stream, 0, sizeof(bz->stream));
|
||||||
|
|
||||||
|
ret = BZ2_bzDecompressInit(&stream, 0, 0);
|
||||||
|
|
||||||
|
if (ret != BZ_OK)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||||
|
"bzip2 stream encoder reinit failed: error code: {}",
|
||||||
|
ret);
|
||||||
|
|
||||||
|
stream.avail_out = avail_out;
|
||||||
|
stream.next_out = next_out;
|
||||||
|
}
|
||||||
|
|
||||||
bz_stream stream;
|
bz_stream stream;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -68,24 +95,24 @@ bool Bzip2ReadBuffer::nextImpl()
|
|||||||
ret = BZ2_bzDecompress(&bz->stream);
|
ret = BZ2_bzDecompress(&bz->stream);
|
||||||
|
|
||||||
in->position() = in->buffer().end() - bz->stream.avail_in;
|
in->position() = in->buffer().end() - bz->stream.avail_in;
|
||||||
|
|
||||||
|
if (ret == BZ_STREAM_END && !in->eof())
|
||||||
|
{
|
||||||
|
bz->reinitialize();
|
||||||
|
bz->stream.avail_in = in->buffer().end() - in->position();
|
||||||
|
bz->stream.next_in = in->position();
|
||||||
|
|
||||||
|
ret = BZ_OK;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
while (bz->stream.avail_out == internal_buffer.size() && ret == BZ_OK && !in->eof());
|
while (bz->stream.avail_out == internal_buffer.size() && ret == BZ_OK && !in->eof());
|
||||||
|
|
||||||
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
|
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
|
||||||
|
|
||||||
if (ret == BZ_STREAM_END)
|
if (ret == BZ_STREAM_END && in->eof())
|
||||||
{
|
{
|
||||||
if (in->eof())
|
eof_flag = true;
|
||||||
{
|
return !working_buffer.empty();
|
||||||
eof_flag = true;
|
|
||||||
return !working_buffer.empty();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw Exception(
|
|
||||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
|
||||||
"bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret != BZ_OK)
|
if (ret != BZ_OK)
|
||||||
|
@ -123,3 +123,4 @@ Hello, world
|
|||||||
Hello, world
|
Hello, world
|
||||||
0
|
0
|
||||||
Part1 Part2
|
Part1 Part2
|
||||||
|
Part1 Part2
|
||||||
|
@ -51,5 +51,6 @@ echo "'Hello, world'" | bzip2 -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'C
|
|||||||
|
|
||||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c;
|
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c;
|
||||||
|
|
||||||
# POST multiple concatenated gzip streams.
|
# POST multiple concatenated gzip and bzip2 streams.
|
||||||
(echo -n "SELECT 'Part1" | gzip -c; echo " Part2'" | gzip -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}" --data-binary @-
|
(echo -n "SELECT 'Part1" | gzip -c; echo " Part2'" | gzip -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}" --data-binary @-
|
||||||
|
(echo -n "SELECT 'Part1" | bzip2 -c; echo " Part2'" | bzip2 -c) | ${CLICKHOUSE_CURL} -sS -H 'Content-Encoding: bz2' "${CLICKHOUSE_URL}" --data-binary @-
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
21
tests/queries/0_stateless/02457_bz2_concatenated.sh
Executable file
21
tests/queries/0_stateless/02457_bz2_concatenated.sh
Executable file
@ -0,0 +1,21 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest
|
||||||
|
# Tag no-fasttest: depends on bzip2
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||||
|
WORKING_FOLDER_02457="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}"
|
||||||
|
|
||||||
|
rm -rf "${WORKING_FOLDER_02457}"
|
||||||
|
mkdir "${WORKING_FOLDER_02457}"
|
||||||
|
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} --query "SELECT * FROM numbers(0, 2) INTO OUTFILE '${WORKING_FOLDER_02457}/file_1.bz2'"
|
||||||
|
${CLICKHOUSE_CLIENT} --query "SELECT * FROM numbers(2, 2) INTO OUTFILE '${WORKING_FOLDER_02457}/file_2.bz2'"
|
||||||
|
cat ${WORKING_FOLDER_02457}/file_1.bz2 ${WORKING_FOLDER_02457}/file_2.bz2 > ${WORKING_FOLDER_02457}/concatenated.bz2
|
||||||
|
${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('${WORKING_FOLDER_02457}/concatenated.bz2', 'TabSeparated', 'col Int64')"
|
||||||
|
|
||||||
|
rm -rf "${WORKING_FOLDER_02457}"
|
Loading…
Reference in New Issue
Block a user