mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 20:32:43 +00:00
Handle incomplete sequences at end of input
This commit is contained in:
parent
c64f060496
commit
704ec8dea6
@ -149,9 +149,27 @@ void WriteBufferValidUTF8::finalizeImpl()
|
|||||||
/// Write all complete sequences from buffer.
|
/// Write all complete sequences from buffer.
|
||||||
nextImpl();
|
nextImpl();
|
||||||
|
|
||||||
/// If unfinished sequence at end, then write replacement.
|
/// Handle remaining bytes if we have an incomplete sequence
|
||||||
if (working_buffer.begin() != memory.data())
|
if (working_buffer.begin() != memory.data()) {
|
||||||
|
char * p = memory.data();
|
||||||
|
|
||||||
|
while (p < pos) {
|
||||||
|
UInt8 len = length_of_utf8_sequence[static_cast<unsigned char>(*p)];
|
||||||
|
if (p + len > pos) {
|
||||||
|
// Incomplete sequence. Skip one byte.
|
||||||
putReplacement();
|
putReplacement();
|
||||||
|
++p;
|
||||||
|
} else if (Poco::UTF8Encoding::isLegal(reinterpret_cast<unsigned char *>(p), len)) {
|
||||||
|
// Valid sequence
|
||||||
|
putValid(p, len);
|
||||||
|
p += len;
|
||||||
|
} else {
|
||||||
|
// Invalid sequence, skip first byte.
|
||||||
|
putReplacement();
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user