mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge pull request #63270 from Blargian/59020_computeWidth_skip_ANSI
`UTF8::computeWidth` skip ANSI escape sequences
This commit is contained in:
commit
21cbbdd983
@ -138,6 +138,12 @@ inline bool isPrintableASCII(char c)
|
||||
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
|
||||
}
|
||||
|
||||
inline bool isCSIFinalByte(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
return uc >= 0x40 && uc <= 0x7E; /// ASCII @A–Z[\]^_`a–z{|}~
|
||||
}
|
||||
|
||||
inline bool isPunctuationASCII(char c)
|
||||
{
|
||||
uint8_t uc = c;
|
||||
|
@ -97,13 +97,14 @@ namespace
|
||||
enum ComputeWidthMode
|
||||
{
|
||||
Width, /// Calculate and return visible width
|
||||
BytesBeforLimit /// Calculate and return the maximum number of bytes when substring fits in visible width.
|
||||
BytesBeforeLimit /// Calculate and return the maximum number of bytes when substring fits in visible width.
|
||||
};
|
||||
|
||||
template <ComputeWidthMode mode>
|
||||
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
|
||||
{
|
||||
UTF8Decoder decoder;
|
||||
int isEscapeSequence = false;
|
||||
size_t width = 0;
|
||||
size_t rollback = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -132,21 +133,32 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
}
|
||||
else
|
||||
{
|
||||
i += 16;
|
||||
width += 16;
|
||||
if (isEscapeSequence)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
i += 16;
|
||||
width += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (i < size && isPrintableASCII(data[i]))
|
||||
{
|
||||
++width;
|
||||
if (!isEscapeSequence)
|
||||
++width;
|
||||
else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b')
|
||||
isEscapeSequence = false; /// end of CSI escape sequence reached
|
||||
++i;
|
||||
}
|
||||
|
||||
/// Now i points to position in bytes after regular ASCII sequence
|
||||
/// and if width > limit, then (width - limit) is the number of extra ASCII characters after width limit.
|
||||
if (mode == BytesBeforLimit && width > limit)
|
||||
|
||||
if (mode == BytesBeforeLimit && width > limit)
|
||||
return i - (width - limit);
|
||||
|
||||
switch (decoder.decode(data[i]))
|
||||
@ -162,20 +174,18 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
}
|
||||
case UTF8Decoder::ACCEPT:
|
||||
{
|
||||
// there are special control characters that manipulate the terminal output.
|
||||
// (`0x08`, `0x09`, `0x0a`, `0x0b`, `0x0c`, `0x0d`, `0x1b`)
|
||||
// Since we don't touch the original column data, there is no easy way to escape them.
|
||||
// TODO: escape control characters
|
||||
// TODO: multiline support for '\n'
|
||||
|
||||
// special treatment for '\t'
|
||||
// special treatment for '\t' and for ESC
|
||||
size_t next_width = width;
|
||||
if (decoder.codepoint == '\t')
|
||||
if (decoder.codepoint == '\x1b')
|
||||
isEscapeSequence = true;
|
||||
else if (decoder.codepoint == '\t')
|
||||
next_width += 8 - (prefix + width) % 8;
|
||||
else
|
||||
next_width += wcwidth(decoder.codepoint);
|
||||
|
||||
if (mode == BytesBeforLimit && next_width > limit)
|
||||
if (mode == BytesBeforeLimit && next_width > limit)
|
||||
return i - rollback;
|
||||
width = next_width;
|
||||
|
||||
@ -189,7 +199,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
|
||||
}
|
||||
|
||||
// no need to handle trailing sequence as they have zero width
|
||||
return (mode == BytesBeforLimit) ? size : width;
|
||||
return (mode == BytesBeforeLimit) ? size : width;
|
||||
}
|
||||
|
||||
}
|
||||
@ -202,7 +212,7 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
|
||||
|
||||
size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
|
||||
{
|
||||
return computeWidthImpl<BytesBeforLimit>(data, size, prefix, limit);
|
||||
return computeWidthImpl<BytesBeforeLimit>(data, size, prefix, limit);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,5 @@
|
||||
┏━━━┓
|
||||
┃ x ┃
|
||||
┡━━━┩
|
||||
1. │ [38;2;255;128;0m█[0m │
|
||||
└───┘
|
@ -0,0 +1 @@
|
||||
SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty;
|
Loading…
Reference in New Issue
Block a user