diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index d611befac37..426f302cb02 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -149,9 +149,27 @@ void WriteBufferValidUTF8::finalizeImpl() /// Write all complete sequences from buffer. nextImpl(); - /// If unfinished sequence at end, then write replacement. - if (working_buffer.begin() != memory.data()) - putReplacement(); + /// Handle remaining bytes if we have an incomplete sequence + if (working_buffer.begin() != memory.data()) { + char * p = memory.data(); + + while (p < pos) { + UInt8 len = length_of_utf8_sequence[static_cast(*p)]; + if (p + len > pos) { + // Incomplete sequence. Skip one byte. + putReplacement(); + ++p; + } else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) { + // Valid sequence + putValid(p, len); + p += len; + } else { + // Invalid sequence, skip first byte. + putReplacement(); + ++p; + } + } + } } }