diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h index 926bb6a7f92..129a745afe2 100644 --- a/src/Common/UTF8Helpers.h +++ b/src/Common/UTF8Helpers.h @@ -42,7 +42,7 @@ inline void syncForward(const UInt8 * & s, const UInt8 * const end) /// returns UTF-8 code point sequence length judging by it's first octet inline size_t seqLength(const UInt8 first_octet) { - if (first_octet < 0x80u) + if (first_octet < 0x80 || first_octet >= 0xF8) /// The specs of UTF-8. return 1; const size_t bits = 8; diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference new file mode 100644 index 00000000000..c98c950d535 --- /dev/null +++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference @@ -0,0 +1,2 @@ +FF +FF diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql new file mode 100644 index 00000000000..d6a299225b1 --- /dev/null +++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql @@ -0,0 +1,2 @@ +SELECT hex(lowerUTF8('\xFF')); +SELECT hex(upperUTF8('\xFF'));