Merge pull request #63270 from Blargian/59020_computeWidth_skip_ANSI

`UTF8::computeWidth` skip ANSI escape sequences
This commit is contained in:
Alexey Milovidov 2024-05-07 09:05:34 +00:00 committed by GitHub
commit 21cbbdd983
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 36 additions and 14 deletions

View File

@ -138,6 +138,12 @@ inline bool isPrintableASCII(char c)
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
}
inline bool isCSIFinalByte(char c)
{
uint8_t uc = c;
return uc >= 0x40 && uc <= 0x7E; /// ASCII @AZ[\]^_`az{|}~
}
inline bool isPunctuationASCII(char c)
{
uint8_t uc = c;

View File

@ -97,13 +97,14 @@ namespace
enum ComputeWidthMode
{
Width, /// Calculate and return visible width
BytesBeforLimit /// Calculate and return the maximum number of bytes when substring fits in visible width.
BytesBeforeLimit /// Calculate and return the maximum number of bytes when substring fits in visible width.
};
template <ComputeWidthMode mode>
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
UTF8Decoder decoder;
int isEscapeSequence = false;
size_t width = 0;
size_t rollback = 0;
for (size_t i = 0; i < size; ++i)
@ -132,21 +133,32 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
}
else
{
i += 16;
width += 16;
if (isEscapeSequence)
{
break;
}
else
{
i += 16;
width += 16;
}
}
}
#endif
while (i < size && isPrintableASCII(data[i]))
{
++width;
if (!isEscapeSequence)
++width;
else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b')
isEscapeSequence = false; /// end of CSI escape sequence reached
++i;
}
/// Now i points to position in bytes after regular ASCII sequence
/// and if width > limit, then (width - limit) is the number of extra ASCII characters after width limit.
if (mode == BytesBeforLimit && width > limit)
if (mode == BytesBeforeLimit && width > limit)
return i - (width - limit);
switch (decoder.decode(data[i]))
@ -162,20 +174,18 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
}
case UTF8Decoder::ACCEPT:
{
// there are special control characters that manipulate the terminal output.
// (`0x08`, `0x09`, `0x0a`, `0x0b`, `0x0c`, `0x0d`, `0x1b`)
// Since we don't touch the original column data, there is no easy way to escape them.
// TODO: escape control characters
// TODO: multiline support for '\n'
// special treatment for '\t'
// special treatment for '\t' and for ESC
size_t next_width = width;
if (decoder.codepoint == '\t')
if (decoder.codepoint == '\x1b')
isEscapeSequence = true;
else if (decoder.codepoint == '\t')
next_width += 8 - (prefix + width) % 8;
else
next_width += wcwidth(decoder.codepoint);
if (mode == BytesBeforLimit && next_width > limit)
if (mode == BytesBeforeLimit && next_width > limit)
return i - rollback;
width = next_width;
@ -189,7 +199,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
}
// no need to handle trailing sequence as they have zero width
return (mode == BytesBeforLimit) ? size : width;
return (mode == BytesBeforeLimit) ? size : width;
}
}
@ -202,7 +212,7 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
return computeWidthImpl<BytesBeforLimit>(data, size, prefix, limit);
return computeWidthImpl<BytesBeforeLimit>(data, size, prefix, limit);
}
}

View File

@ -0,0 +1,5 @@
┏━━━┓
┃ x ┃
┡━━━┩
1. │ █ │
└───┘

View File

@ -0,0 +1 @@
SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty;