mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Consider last CRLF as part of boundary (#24399)
* Consider last CRLF as part of boundary * Update HTMLForm.cpp * Don't use xxd
This commit is contained in:
parent
678a16b5dc
commit
ce2a809773
@ -28,7 +28,7 @@ bool BinaryRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
|
||||
|
||||
void BinaryRowInputFormat::readPrefix()
|
||||
{
|
||||
/// NOTE The header is completely ignored. This can be easily improved.
|
||||
/// NOTE: The header is completely ignored. This can be easily improved.
|
||||
|
||||
UInt64 columns = 0;
|
||||
String tmp;
|
||||
|
@ -244,7 +244,7 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
auto line = readLine();
|
||||
auto line = readLine(true);
|
||||
if (startsWith(line, boundary))
|
||||
{
|
||||
set(in.position(), 0);
|
||||
@ -256,29 +256,36 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
|
||||
throw Poco::Net::HTMLFormException("No boundary line found");
|
||||
}
|
||||
|
||||
std::string HTMLForm::MultipartReadBuffer::readLine(bool strict)
|
||||
std::string HTMLForm::MultipartReadBuffer::readLine(bool append_crlf)
|
||||
{
|
||||
std::string line;
|
||||
char ch = 0; // silence "uninitialized" warning from gcc-*
|
||||
|
||||
while (in.read(ch) && ch != '\r' && ch != '\n')
|
||||
/// If we don't append CRLF, it means that we may have to prepend CRLF from previous content line, which wasn't the boundary.
|
||||
if (in.read(ch))
|
||||
line += ch;
|
||||
|
||||
if (in.eof())
|
||||
{
|
||||
if (strict)
|
||||
throw Poco::Net::HTMLFormException("Unexpected end of message");
|
||||
if (in.read(ch))
|
||||
line += ch;
|
||||
if (append_crlf && line == "\r\n")
|
||||
return line;
|
||||
}
|
||||
|
||||
line += ch;
|
||||
|
||||
if (ch == '\r')
|
||||
while (!in.eof())
|
||||
{
|
||||
if (!in.read(ch) || ch != '\n')
|
||||
throw Poco::Net::HTMLFormException("No CRLF found");
|
||||
else
|
||||
while (in.read(ch) && ch != '\r')
|
||||
line += ch;
|
||||
|
||||
if (in.eof()) break;
|
||||
|
||||
assert(ch == '\r');
|
||||
|
||||
if (in.peek(ch) && ch == '\n')
|
||||
{
|
||||
in.ignore();
|
||||
if (append_crlf) line += "\r\n";
|
||||
break;
|
||||
}
|
||||
|
||||
line += ch;
|
||||
}
|
||||
|
||||
return line;
|
||||
@ -300,19 +307,12 @@ bool HTMLForm::MultipartReadBuffer::nextImpl()
|
||||
/// FIXME: there is an extra copy because we cannot traverse PeekableBuffer from checkpoint to position()
|
||||
/// since it may store different data parts in different sub-buffers,
|
||||
/// anyway calling makeContinuousMemoryFromCheckpointToPos() will also make an extra copy.
|
||||
std::string line = readLine(false);
|
||||
|
||||
/// According to RFC2046 the preceding CRLF is a part of boundary line.
|
||||
if (line == "\r\n")
|
||||
{
|
||||
line = readLine(false);
|
||||
boundary_hit = startsWith(line, boundary);
|
||||
if (!boundary_hit) line = "\r\n";
|
||||
}
|
||||
else
|
||||
boundary_hit = startsWith(line, boundary);
|
||||
std::string line = readLine(false);
|
||||
boundary_hit = startsWith(line, "\r\n" + boundary);
|
||||
bool has_next = !boundary_hit && !line.empty();
|
||||
|
||||
if (!line.empty())
|
||||
if (has_next)
|
||||
/// If we don't make sure that memory is contiguous then situation may happen, when part of the line is inside internal memory
|
||||
/// and other part is inside sub-buffer, thus we'll be unable to setup our working buffer properly.
|
||||
in.makeContinuousMemoryFromCheckpointToPos();
|
||||
@ -323,7 +323,7 @@ bool HTMLForm::MultipartReadBuffer::nextImpl()
|
||||
/// Limit readable data to a single line.
|
||||
BufferBase::set(in.position(), line.size(), 0);
|
||||
|
||||
return !boundary_hit && !line.empty();
|
||||
return has_next;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ private:
|
||||
const std::string boundary;
|
||||
bool boundary_hit = true;
|
||||
|
||||
std::string readLine(bool strict = true);
|
||||
std::string readLine(bool append_crlf);
|
||||
|
||||
bool nextImpl() override;
|
||||
};
|
||||
|
@ -2,3 +2,5 @@
|
||||
2 World
|
||||
1 Hello
|
||||
2 World
|
||||
1
|
||||
2
|
||||
|
@ -6,3 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String";
|
||||
echo -ne '1@Hello\n2@World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String&format_csv_delimiter=@";
|
||||
echo -ne '\x01\x00\x00\x00\x02\x00\x00\x00' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary";
|
||||
|
Loading…
Reference in New Issue
Block a user