mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #67938 from mwoenker/incomplete-utf8-sequence
Handle incomplete sequences at end of input
This commit is contained in:
commit
a34a544f4a
@ -54,7 +54,7 @@ inline void WriteBufferValidUTF8::putReplacement()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void WriteBufferValidUTF8::putValid(char *data, size_t len)
|
inline void WriteBufferValidUTF8::putValid(const char *data, size_t len)
|
||||||
{
|
{
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
return;
|
return;
|
||||||
@ -149,9 +149,34 @@ void WriteBufferValidUTF8::finalizeImpl()
|
|||||||
/// Write all complete sequences from buffer.
|
/// Write all complete sequences from buffer.
|
||||||
nextImpl();
|
nextImpl();
|
||||||
|
|
||||||
/// If unfinished sequence at end, then write replacement.
|
/// Handle remaining bytes if we have an incomplete sequence
|
||||||
if (working_buffer.begin() != memory.data())
|
if (working_buffer.begin() != memory.data())
|
||||||
putReplacement();
|
{
|
||||||
|
const char * p = memory.data();
|
||||||
|
|
||||||
|
while (p < pos)
|
||||||
|
{
|
||||||
|
UInt8 len = length_of_utf8_sequence[static_cast<const unsigned char>(*p)];
|
||||||
|
if (p + len > pos)
|
||||||
|
{
|
||||||
|
/// Incomplete sequence. Skip one byte.
|
||||||
|
putReplacement();
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
else if (Poco::UTF8Encoding::isLegal(reinterpret_cast<const unsigned char *>(p), len))
|
||||||
|
{
|
||||||
|
/// Valid sequence
|
||||||
|
putValid(p, len);
|
||||||
|
p += len;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// Invalid sequence, skip first byte.
|
||||||
|
putReplacement();
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void putReplacement();
|
void putReplacement();
|
||||||
void putValid(char * data, size_t len);
|
void putValid(const char * data, size_t len);
|
||||||
|
|
||||||
void nextImpl() override;
|
void nextImpl() override;
|
||||||
void finalizeImpl() override;
|
void finalizeImpl() override;
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"meta":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "unhex('f0')",
|
||||||
|
"type": "String"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
|
||||||
|
"data":
|
||||||
|
[
|
||||||
|
["<22>"]
|
||||||
|
],
|
||||||
|
|
||||||
|
"rows": 1
|
||||||
|
}
|
@ -0,0 +1,2 @@
|
|||||||
|
SET output_format_write_statistics = 0;
|
||||||
|
SELECT unhex('f0') FORMAT JSONCompact;
|
Loading…
Reference in New Issue
Block a user