Merge branch 'improve-performance-of-client-interactive-mode-2' into improve-performance-of-client-interactive-mode

This commit is contained in:
Alexey Milovidov 2020-05-31 23:03:21 +03:00
commit 4c2950b438

View File

@ -89,7 +89,18 @@ static int wcwidth(wchar_t wc)
}
}
size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
namespace
{
enum ComputeWidthMode
{
Width, /// Calcualte and return visible width
BytesBeforLimit /// Calculate and return the maximum number of bytes when substring fits in visible width.
};
template <ComputeWidthMode mode>
static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
UTF8Decoder decoder;
size_t width = 0;
@ -97,22 +108,57 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
for (size_t i = 0; i < size; ++i)
{
/// Quickly skip regular ASCII
#if defined(__SSE2__)
const auto lower_bound = _mm_set1_epi8(32);
const auto upper_bound = _mm_set1_epi8(126);
while (i + 15 < size)
{
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
const uint16_t non_regular_width_mask = _mm_movemask_epi8(
_mm_or_si128(
_mm_cmplt_epi8(bytes, lower_bound),
_mm_cmpgt_epi8(bytes, upper_bound)));
if (non_regular_width_mask)
{
auto num_regular_chars = __builtin_ctz(non_regular_width_mask);
width += num_regular_chars;
i += num_regular_chars;
break;
}
else
{
i += 16;
width += 16;
}
}
#endif
while (i < size && isPrintableASCII(data[i]))
{
++width;
++i;
}
if (mode == BytesBeforLimit && width >= limit)
return i - (width - limit);
switch (decoder.decode(data[i]))
{
case UTF8Decoder::REJECT:
{
decoder.reset();
// invalid sequences seem to have zero width in modern terminals
// tested in libvte-based, alacritty, urxvt and xterm
i -= rollback;
rollback = 0;
break;
}
case UTF8Decoder::ACCEPT:
{
// there are special control characters that manipulate the terminal output.
// (`0x08`, `0x09`, `0x0a`, `0x0b`, `0x0c`, `0x0d`, `0x1b`)
// Since we don't touch the original column data, there is no easy way to escape them.
@ -120,12 +166,19 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
// TODO: multiline support for '\n'
// special treatment for '\t'
size_t next_width = width;
if (decoder.codepoint == '\t')
width += 8 - (prefix + width) % 8;
next_width += 8 - (prefix + width) % 8;
else
width += wcwidth(decoder.codepoint);
next_width += wcwidth(decoder.codepoint);
if (mode == BytesBeforLimit && next_width >= limit)
return i - rollback;
width = next_width;
rollback = 0;
break;
}
// continue if we meet other values here
default:
++rollback;
@ -133,7 +186,21 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
}
// no need to handle trailing sequence as they have zero width
return width;
}
return (mode == BytesBeforLimit) ? size : width;
}
}
size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept
{
return computeWidthImpl<Width>(data, size, prefix, 0);
}
size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
return computeWidthImpl<BytesBeforLimit>(data, size, prefix, limit);
}
}
}