diff --git a/base/common/StringRef.h b/base/common/StringRef.h index ac9d7c47b72..55df3be0588 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -96,6 +96,34 @@ inline bool compareSSE2x4(const char * p1, const char * p2) inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) { + if (size <= 16) + { + if (size >= 8) + { + /// Chunks of 8..16 bytes. + return unalignedLoad(p1) == unalignedLoad(p2) + && unalignedLoad(p1 + size - 8) == unalignedLoad(p2 + size - 8); + } + else if (size >= 4) + { + /// Chunks of 4..7 bytes. + return unalignedLoad(p1) == unalignedLoad(p2) + && unalignedLoad(p1 + size - 4) == unalignedLoad(p2 + size - 4); + } + else if (size >= 2) + { + /// Chunks of 2..3 bytes. + return unalignedLoad(p1) == unalignedLoad(p2) + && unalignedLoad(p1 + size - 2) == unalignedLoad(p2 + size - 2); + } + else if (size >= 1) + { + /// A single byte. + return *p1 == *p2; + } + return true; + } + while (size >= 64) { if (compareSSE2x4(p1, p2)) @@ -108,39 +136,15 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) return false; } - switch ((size % 64) / 16) + switch (size / 16) { case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]]; case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]]; - case 1: if (!compareSSE2(p1 , p2 )) return false; [[fallthrough]]; - case 0: break; + case 1: if (!compareSSE2(p1, p2)) return false; [[fallthrough]]; + case 0: return compareSSE2(p1 + size - 16, p2 + size - 16); } - p1 += (size % 64) / 16 * 16; - p2 += (size % 64) / 16 * 16; - - switch (size % 16) - { - case 15: if (p1[14] != p2[14]) return false; [[fallthrough]]; - case 14: if (p1[13] != p2[13]) return false; [[fallthrough]]; - case 13: if (p1[12] != p2[12]) return false; [[fallthrough]]; - case 12: if (unalignedLoad(p1 + 8) == unalignedLoad(p2 + 8)) goto l8; else return false; - case 11: if (p1[10] != p2[10]) return false; [[fallthrough]]; - case 10: if (p1[9] != p2[9]) return false; [[fallthrough]]; - case 9: if (p1[8] != p2[8]) return false; - l8: [[fallthrough]]; - case 8: return unalignedLoad(p1) == unalignedLoad(p2); - case 7: if (p1[6] != p2[6]) return false; [[fallthrough]]; - case 6: if (p1[5] != p2[5]) return false; [[fallthrough]]; - case 5: if (p1[4] != p2[4]) return false; [[fallthrough]]; - case 4: return unalignedLoad(p1) == unalignedLoad(p2); - case 3: if (p1[2] != p2[2]) return false; [[fallthrough]]; - case 2: return unalignedLoad(p1) == unalignedLoad(p2); - case 1: if (p1[0] != p2[0]) return false; [[fallthrough]]; - case 0: break; - } - - return true; + __builtin_unreachable(); } #endif