mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 21:42:39 +00:00
70d1adfe4b
* save format string for NetException * format exceptions * format exceptions 2 * format exceptions 3 * format exceptions 4 * format exceptions 5 * format exceptions 6 * fix * format exceptions 7 * format exceptions 8 * Update MergeTreeIndexGin.cpp * Update AggregateFunctionMap.cpp * Update AggregateFunctionMap.cpp * fix
51 lines
1.3 KiB
C++
51 lines
1.3 KiB
C++
#include <Formats/newLineSegmentationEngine.h>
|
|
#include <IO/ReadHelpers.h>
|
|
#include <base/find_symbols.h>
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int LOGICAL_ERROR;
|
|
}
|
|
|
|
std::pair<bool, size_t> newLineFileSegmentationEngine(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
|
|
{
|
|
char * pos = in.position();
|
|
bool need_more_data = true;
|
|
size_t number_of_rows = 0;
|
|
|
|
while (loadAtPosition(in, memory, pos) && need_more_data)
|
|
{
|
|
pos = find_first_symbols<'\r', '\n'>(pos, in.buffer().end());
|
|
if (pos > in.buffer().end())
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug.");
|
|
else if (pos == in.buffer().end())
|
|
continue;
|
|
|
|
++number_of_rows;
|
|
if ((memory.size() + static_cast<size_t>(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows))
|
|
need_more_data = false;
|
|
|
|
if (*pos == '\n')
|
|
{
|
|
++pos;
|
|
if (loadAtPosition(in, memory, pos) && *pos == '\r')
|
|
++pos;
|
|
}
|
|
else if (*pos == '\r')
|
|
{
|
|
++pos;
|
|
if (loadAtPosition(in, memory, pos) && *pos == '\n')
|
|
++pos;
|
|
}
|
|
}
|
|
|
|
saveUpToPosition(in, memory, pos);
|
|
|
|
return {loadAtPosition(in, memory, pos), number_of_rows};
|
|
}
|
|
|
|
}
|