Consider last CRLF as part of boundary (#24399)

* Consider last CRLF as part of boundary
* Update HTMLForm.cpp
* Don't use xxd
This commit is contained in:
Ivan 2021-05-28 14:59:11 +03:00 committed by GitHub
parent 678a16b5dc
commit ce2a809773
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 32 additions and 29 deletions

View File

@ -28,7 +28,7 @@ bool BinaryRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
void BinaryRowInputFormat::readPrefix()
{
/// NOTE The header is completely ignored. This can be easily improved.
/// NOTE: The header is completely ignored. This can be easily improved.
UInt64 columns = 0;
String tmp;

View File

@ -244,7 +244,7 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
while (!in.eof())
{
auto line = readLine();
auto line = readLine(true);
if (startsWith(line, boundary))
{
set(in.position(), 0);
@ -256,29 +256,36 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary()
throw Poco::Net::HTMLFormException("No boundary line found");
}
std::string HTMLForm::MultipartReadBuffer::readLine(bool strict)
std::string HTMLForm::MultipartReadBuffer::readLine(bool append_crlf)
{
std::string line;
char ch = 0; // silence "uninitialized" warning from gcc-*
while (in.read(ch) && ch != '\r' && ch != '\n')
/// If we don't append CRLF, it means that we may have to prepend CRLF from previous content line, which wasn't the boundary.
if (in.read(ch))
line += ch;
if (in.eof())
{
if (strict)
throw Poco::Net::HTMLFormException("Unexpected end of message");
if (in.read(ch))
line += ch;
if (append_crlf && line == "\r\n")
return line;
}
line += ch;
if (ch == '\r')
while (!in.eof())
{
if (!in.read(ch) || ch != '\n')
throw Poco::Net::HTMLFormException("No CRLF found");
else
while (in.read(ch) && ch != '\r')
line += ch;
if (in.eof()) break;
assert(ch == '\r');
if (in.peek(ch) && ch == '\n')
{
in.ignore();
if (append_crlf) line += "\r\n";
break;
}
line += ch;
}
return line;
@ -300,19 +307,12 @@ bool HTMLForm::MultipartReadBuffer::nextImpl()
/// FIXME: there is an extra copy because we cannot traverse PeekableBuffer from checkpoint to position()
/// since it may store different data parts in different sub-buffers,
/// anyway calling makeContinuousMemoryFromCheckpointToPos() will also make an extra copy.
std::string line = readLine(false);
/// According to RFC2046 the preceding CRLF is a part of boundary line.
if (line == "\r\n")
{
line = readLine(false);
boundary_hit = startsWith(line, boundary);
if (!boundary_hit) line = "\r\n";
}
else
boundary_hit = startsWith(line, boundary);
std::string line = readLine(false);
boundary_hit = startsWith(line, "\r\n" + boundary);
bool has_next = !boundary_hit && !line.empty();
if (!line.empty())
if (has_next)
/// If we don't make sure that memory is contiguous then situation may happen, when part of the line is inside internal memory
/// and other part is inside sub-buffer, thus we'll be unable to setup our working buffer properly.
in.makeContinuousMemoryFromCheckpointToPos();
@ -323,7 +323,7 @@ bool HTMLForm::MultipartReadBuffer::nextImpl()
/// Limit readable data to a single line.
BufferBase::set(in.position(), line.size(), 0);
return !boundary_hit && !line.empty();
return has_next;
}
}

View File

@ -118,7 +118,7 @@ private:
const std::string boundary;
bool boundary_hit = true;
std::string readLine(bool strict = true);
std::string readLine(bool append_crlf);
bool nextImpl() override;
};

View File

@ -2,3 +2,5 @@
2 World
1 Hello
2 World
1
2

View File

@ -6,3 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String";
echo -ne '1@Hello\n2@World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String&format_csv_delimiter=@";
echo -ne '\x01\x00\x00\x00\x02\x00\x00\x00' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary";