mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge pull request #64756 from Blargian/utf8_compute_fix
`UTF8::computeWidth` should skip ANSI escape sequences (fix)
This commit is contained in:
commit
e421c741bf
@ -140,6 +140,18 @@ inline bool isPrintableASCII(char c)
|
|||||||
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
|
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool isCSIParameterByte(char c)
|
||||||
|
{
|
||||||
|
uint8_t uc = c;
|
||||||
|
return uc >= 0x30 && uc <= 0x3F; /// ASCII 0–9:;<=>?
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool isCSIIntermediateByte(char c)
|
||||||
|
{
|
||||||
|
uint8_t uc = c;
|
||||||
|
return uc >= 0x20 && uc <= 0x2F; /// ASCII !"#$%&'()*+,-./
|
||||||
|
}
|
||||||
|
|
||||||
inline bool isCSIFinalByte(char c)
|
inline bool isCSIFinalByte(char c)
|
||||||
{
|
{
|
||||||
uint8_t uc = c;
|
uint8_t uc = c;
|
||||||
|
@ -103,7 +103,7 @@ template <ComputeWidthMode mode>
|
|||||||
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
|
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
|
||||||
{
|
{
|
||||||
UTF8Decoder decoder;
|
UTF8Decoder decoder;
|
||||||
int isEscapeSequence = false;
|
bool is_escape_sequence = false;
|
||||||
size_t width = 0;
|
size_t width = 0;
|
||||||
size_t rollback = 0;
|
size_t rollback = 0;
|
||||||
for (size_t i = 0; i < size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
@ -116,6 +116,9 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
|||||||
|
|
||||||
while (i + 15 < size)
|
while (i + 15 < size)
|
||||||
{
|
{
|
||||||
|
if (is_escape_sequence)
|
||||||
|
break;
|
||||||
|
|
||||||
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
|
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
|
||||||
|
|
||||||
const uint16_t non_regular_width_mask = _mm_movemask_epi8(
|
const uint16_t non_regular_width_mask = _mm_movemask_epi8(
|
||||||
@ -131,26 +134,29 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
|
||||||
if (isEscapeSequence)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
i += 16;
|
i += 16;
|
||||||
width += 16;
|
width += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
while (i < size && isPrintableASCII(data[i]))
|
while (i < size && isPrintableASCII(data[i]))
|
||||||
{
|
{
|
||||||
if (!isEscapeSequence)
|
bool ignore_width = is_escape_sequence && (isCSIParameterByte(data[i]) || isCSIIntermediateByte(data[i]));
|
||||||
|
|
||||||
|
if (ignore_width || (data[i] == '[' && is_escape_sequence))
|
||||||
|
{
|
||||||
|
/// don't count the width
|
||||||
|
}
|
||||||
|
else if (is_escape_sequence && isCSIFinalByte(data[i]))
|
||||||
|
{
|
||||||
|
is_escape_sequence = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
++width;
|
++width;
|
||||||
else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b')
|
}
|
||||||
isEscapeSequence = false; /// end of CSI escape sequence reached
|
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -178,7 +184,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
|||||||
// special treatment for '\t' and for ESC
|
// special treatment for '\t' and for ESC
|
||||||
size_t next_width = width;
|
size_t next_width = width;
|
||||||
if (decoder.codepoint == '\x1b')
|
if (decoder.codepoint == '\x1b')
|
||||||
isEscapeSequence = true;
|
is_escape_sequence = true;
|
||||||
else if (decoder.codepoint == '\t')
|
else if (decoder.codepoint == '\t')
|
||||||
next_width += 8 - (prefix + width) % 8;
|
next_width += 8 - (prefix + width) % 8;
|
||||||
else
|
else
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
┏━━━┓
|
┏━━━┓
|
||||||
┃ x ┃
|
┃ x ┃
|
||||||
┡━━━┩
|
┡━━━┩
|
||||||
1. │ [38;2;255;128;0m█[0m │
|
1. │ [38;2;255;128;128m█[0m │
|
||||||
└───┘
|
└───┘
|
||||||
|
┏━━━━━━━━━┳━━━━━━━━━━┓
|
||||||
|
┃ 'Hello' ┃ x ┃
|
||||||
|
┡━━━━━━━━━╇━━━━━━━━━━┩
|
||||||
|
1. │ Hello │ [38;2;255;128;128m█[0m test [38;2;255;128;128m█[0m │
|
||||||
|
└─────────┴──────────┘
|
||||||
|
@ -1 +1,2 @@
|
|||||||
SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty;
|
SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty;
|
||||||
|
SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty;
|
||||||
|
Loading…
Reference in New Issue
Block a user