mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-09 17:14:47 +00:00
Handle incomplete sequences at end of input
This commit is contained in:
parent
c64f060496
commit
704ec8dea6
@ -149,9 +149,27 @@ void WriteBufferValidUTF8::finalizeImpl()
|
||||
/// Write all complete sequences from buffer.
|
||||
nextImpl();
|
||||
|
||||
/// If unfinished sequence at end, then write replacement.
|
||||
if (working_buffer.begin() != memory.data())
|
||||
putReplacement();
|
||||
/// Handle remaining bytes if we have an incomplete sequence
|
||||
if (working_buffer.begin() != memory.data()) {
|
||||
char * p = memory.data();
|
||||
|
||||
while (p < pos) {
|
||||
UInt8 len = length_of_utf8_sequence[static_cast<unsigned char>(*p)];
|
||||
if (p + len > pos) {
|
||||
// Incomplete sequence. Skip one byte.
|
||||
putReplacement();
|
||||
++p;
|
||||
} else if (Poco::UTF8Encoding::isLegal(reinterpret_cast<unsigned char *>(p), len)) {
|
||||
// Valid sequence
|
||||
putValid(p, len);
|
||||
p += len;
|
||||
} else {
|
||||
// Invalid sequence, skip first byte.
|
||||
putReplacement();
|
||||
++p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user