diff --git a/base/common/StringRef.h b/base/common/StringRef.h index df659f50123..2247f0de2ed 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -27,7 +28,11 @@ struct StringRef size_t size = 0; template > - constexpr StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast(data_)), size(size_) {} + constexpr StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast(data_)), size(size_) + { + /// Sanity check for overflowed values. + assert(size < 0x8000000000000000ULL); + } StringRef(const std::string & s) : data(s.data()), size(s.size()) {} constexpr explicit StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {} diff --git a/src/Functions/URL/netloc.h b/src/Functions/URL/netloc.h index 443ef7f9003..ac1e57a884a 100644 --- a/src/Functions/URL/netloc.h +++ b/src/Functions/URL/netloc.h @@ -20,7 +20,8 @@ struct ExtractNetloc Pos pos = data; Pos end = data + size; - if (*pos == '/' && *(pos + 1) == '/') + /// Skip scheme. + if (pos + 2 < end && pos[0] == '/' && pos[1] == '/') { pos += 2; } @@ -55,16 +56,18 @@ struct ExtractNetloc case '&': return StringRef{}; default: - goto exloop; + pos = scheme_end; /// exit from the loop } } } -exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') - pos += 3; - else - pos = data; + if (pos + 2 < scheme_end && pos[0] == ':' && pos[1] == '/' && pos[2] == '/') + pos += 3; + else + pos = data; } + /// Now pos points to the first byte after scheme (if there is). + bool has_identification = false; Pos question_mark_pos = end; Pos slash_pos = end; @@ -106,7 +109,9 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos case ';': case '=': case '&': - return StringRef(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host); + return pos > start_of_host + ? StringRef(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host) + : StringRef{}; } } diff --git a/tests/queries/0_stateless/01434_netloc_fuzz.reference b/tests/queries/0_stateless/01434_netloc_fuzz.reference new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/queries/0_stateless/01434_netloc_fuzz.reference @@ -0,0 +1 @@ + diff --git a/tests/queries/0_stateless/01434_netloc_fuzz.sql b/tests/queries/0_stateless/01434_netloc_fuzz.sql new file mode 100644 index 00000000000..a409add313f --- /dev/null +++ b/tests/queries/0_stateless/01434_netloc_fuzz.sql @@ -0,0 +1 @@ +SELECT netloc('<\'[%UzO');