mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 21:51:57 +00:00
Fix single carriage return processing in TSV file segmentation engine
This commit is contained in:
parent
47b551180b
commit
4c12914406
@ -411,11 +411,6 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
|
||||
continue;
|
||||
}
|
||||
|
||||
++number_of_rows;
|
||||
if ((number_of_rows >= min_rows)
|
||||
&& ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows)))
|
||||
need_more_data = false;
|
||||
|
||||
if (*pos == '\n')
|
||||
{
|
||||
++pos;
|
||||
@ -427,7 +422,14 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
|
||||
++pos;
|
||||
if (loadAtPosition(in, memory, pos) && *pos == '\n')
|
||||
++pos;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
++number_of_rows;
|
||||
if ((number_of_rows >= min_rows)
|
||||
&& ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows)))
|
||||
need_more_data = false;
|
||||
}
|
||||
|
||||
saveUpToPosition(in, memory, pos);
|
||||
|
@ -0,0 +1 @@
|
||||
1000000
|
10
tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.sh
Executable file
10
tests/queries/0_stateless/02844_tsv_carriage_return_parallel_parsing.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select 'Hello\rWorld' from numbers(1000000) format TSVRaw" > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv
|
||||
$CLICKHOUSE_LOCAL -q "select count() from file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv')"
|
||||
rm $CLICKHOUSE_TEST_UNIQUE_NAME.tsv
|
||||
|
Loading…
Reference in New Issue
Block a user