mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #64756 from Blargian/utf8_compute_fix
`UTF8::computeWidth` should skip ANSI escape sequences (fix)
This commit is contained in:
commit
e421c741bf
@ -140,6 +140,18 @@ inline bool isPrintableASCII(char c)
|
||||
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
|
||||
}
|
||||
|
||||
inline bool isCSIParameterByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
return uc >= 0x30 && uc <= 0x3F; /// ASCII 0–9:;<=>?
|
||||
}
|
||||
|
||||
inline bool isCSIIntermediateByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
return uc >= 0x20 && uc <= 0x2F; /// ASCII !"#$%&'()*+,-./
|
||||
}
|
||||
|
||||
inline bool isCSIFinalByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
|
@ -103,7 +103,7 @@ template <ComputeWidthMode mode>
|
||||
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
|
||||
{
|
||||
UTF8Decoder decoder;
|
||||
int isEscapeSequence = false;
|
||||
bool is_escape_sequence = false;
|
||||
size_t width = 0;
|
||||
size_t rollback = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -116,6 +116,9 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
|
||||
while (i + 15 < size)
|
||||
{
|
||||
if (is_escape_sequence)
|
||||
break;
|
||||
|
||||
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
|
||||
|
||||
const uint16_t non_regular_width_mask = _mm_movemask_epi8(
|
||||
@ -132,25 +135,28 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isEscapeSequence)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
i += 16;
|
||||
width += 16;
|
||||
}
|
||||
i += 16;
|
||||
width += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (i < size && isPrintableASCII(data[i]))
|
||||
{
|
||||
if (!isEscapeSequence)
|
||||
bool ignore_width = is_escape_sequence && (isCSIParameterByte(data[i]) || isCSIIntermediateByte(data[i]));
|
||||
|
||||
if (ignore_width || (data[i] == '[' && is_escape_sequence))
|
||||
{
|
||||
/// don't count the width
|
||||
}
|
||||
else if (is_escape_sequence && isCSIFinalByte(data[i]))
|
||||
{
|
||||
is_escape_sequence = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
++width;
|
||||
else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b')
|
||||
isEscapeSequence = false; /// end of CSI escape sequence reached
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
@ -178,7 +184,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
// special treatment for '\t' and for ESC
|
||||
size_t next_width = width;
|
||||
if (decoder.codepoint == '\x1b')
|
||||
isEscapeSequence = true;
|
||||
is_escape_sequence = true;
|
||||
else if (decoder.codepoint == '\t')
|
||||
next_width += 8 - (prefix + width) % 8;
|
||||
else
|
||||
|
@ -1,5 +1,10 @@
|
||||
┏━━━┓
|
||||
┃ x ┃
|
||||
┡━━━┩
|
||||
1. │ [38;2;255;128;0m█[0m │
|
||||
1. │ [38;2;255;128;128m█[0m │
|
||||
└───┘
|
||||
┏━━━━━━━━━┳━━━━━━━━━━┓
|
||||
┃ 'Hello' ┃ x ┃
|
||||
┡━━━━━━━━━╇━━━━━━━━━━┩
|
||||
1. │ Hello │ [38;2;255;128;128m█[0m test [38;2;255;128;128m█[0m │
|
||||
└─────────┴──────────┘
|
||||
|
@ -1 +1,2 @@
|
||||
SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty;
|
||||
SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty;
|
||||
SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty;
|
||||
|
Loading…
Reference in New Issue
Block a user