Handle incomplete sequences at end of input

This commit is contained in:
Matt Woenker 2024-08-05 14:55:33 -04:00
parent c64f060496
commit 704ec8dea6

View File

@ -149,9 +149,27 @@ void WriteBufferValidUTF8::finalizeImpl()
/// Write all complete sequences from buffer.
nextImpl();
/// If unfinished sequence at end, then write replacement.
if (working_buffer.begin() != memory.data())
putReplacement();
/// Handle remaining bytes if we have an incomplete sequence
if (working_buffer.begin() != memory.data()) {
char * p = memory.data();
while (p < pos) {
UInt8 len = length_of_utf8_sequence[static_cast<unsigned char>(*p)];
if (p + len > pos) {
// Incomplete sequence. Skip one byte.
putReplacement();
++p;
} else if (Poco::UTF8Encoding::isLegal(reinterpret_cast<unsigned char *>(p), len)) {
// Valid sequence
putValid(p, len);
p += len;
} else {
// Invalid sequence, skip first byte.
putReplacement();
++p;
}
}
}
}
}