Merge pull request #59737 from Algunenano/more_asserts_memory

Add assertions around FixedString code
This commit is contained in:
Raúl Marín 2024-02-12 16:01:02 +01:00 committed by GitHub
commit 407b7a4651
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 65 additions and 9 deletions

View File

@ -203,6 +203,7 @@ void ColumnFixedString::updatePermutation(IColumn::PermutationSortDirection dire
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{ {
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src); const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
chassert(this->n == src_concrete.n);
if (start + length > src_concrete.size()) if (start + length > src_concrete.size())
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameters start = {}, length = {} are out of bound " throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameters start = {}, length = {} are out of bound "

View File

@ -130,15 +130,21 @@ public:
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
{ {
const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_); const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
chassert(this->n == rhs.n);
return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n); return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n);
} }
void compareColumn(const IColumn & rhs, size_t rhs_row_num, void compareColumn(
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, const IColumn & rhs_,
int direction, int nan_direction_hint) const override size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const override
{ {
return doCompareColumn<ColumnFixedString>(assert_cast<const ColumnFixedString &>(rhs), rhs_row_num, row_indexes, const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
compare_results, direction, nan_direction_hint); chassert(this->n == rhs.n);
return doCompareColumn<ColumnFixedString>(rhs, rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint);
} }
bool hasEqualValues() const override bool hasEqualValues() const override

View File

@ -13,6 +13,9 @@
#undef __msan_unpoison_string #undef __msan_unpoison_string
#define __msan_unpoison(X, Y) /// NOLINT #define __msan_unpoison(X, Y) /// NOLINT
/// Given a pointer and **its size**, unpoisons 15 bytes **at the end**
/// See memcmpSmall.h / memcpySmall.h
#define __msan_unpoison_overflow_15(X, Y) /// NOLINT
#define __msan_test_shadow(X, Y) (false) /// NOLINT #define __msan_test_shadow(X, Y) (false) /// NOLINT
#define __msan_print_shadow(X, Y) /// NOLINT #define __msan_print_shadow(X, Y) /// NOLINT
#define __msan_unpoison_string(X) /// NOLINT #define __msan_unpoison_string(X) /// NOLINT
@ -24,6 +27,8 @@
# undef __msan_print_shadow # undef __msan_print_shadow
# undef __msan_unpoison_string # undef __msan_unpoison_string
# include <sanitizer/msan_interface.h> # include <sanitizer/msan_interface.h>
# undef __msan_unpoison_overflow_15
# define __msan_unpoison_overflow_15(PTR, PTR_SIZE) __msan_unpoison(&(PTR)[(PTR_SIZE)], 15)
# endif # endif
#endif #endif

View File

@ -7,6 +7,7 @@
#include <base/simd.h> #include <base/simd.h>
#include <Core/Defines.h> #include <Core/Defines.h>
#include <Common/MemorySanitizer.h>
namespace detail namespace detail
@ -26,9 +27,8 @@ inline int cmp(T a, T b)
/// We can process uninitialized memory in the functions below. /// We can process uninitialized memory in the functions below.
/// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it. /// Results don't depend on the values inside uninitialized memory
/// Disable optimized functions if compile with Memory Sanitizer. #if defined(__AVX512BW__) && defined(__AVX512VL__)
#if defined(__AVX512BW__) && defined(__AVX512VL__) && !defined(MEMORY_SANITIZER)
# include <immintrin.h> # include <immintrin.h>
@ -42,6 +42,9 @@ inline int cmp(T a, T b)
template <typename Char> template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{ {
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
size_t min_size = std::min(a_size, b_size); size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) for (size_t offset = 0; offset < min_size; offset += 16)
@ -74,6 +77,9 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
template <typename Char> template <typename Char>
inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{ {
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
size_t min_size = std::min(a_size, b_size); size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) for (size_t offset = 0; offset < min_size; offset += 16)
@ -144,6 +150,9 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
template <typename Char> template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size) inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
{ {
__msan_unpoison_overflow_15(a, size);
__msan_unpoison_overflow_15(b, size);
for (size_t offset = 0; offset < size; offset += 16) for (size_t offset = 0; offset < size; offset += 16)
{ {
uint16_t mask = _mm_cmp_epi8_mask( uint16_t mask = _mm_cmp_epi8_mask(
@ -174,6 +183,9 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
if (a_size != b_size) if (a_size != b_size)
return false; return false;
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
for (size_t offset = 0; offset < a_size; offset += 16) for (size_t offset = 0; offset < a_size; offset += 16)
{ {
uint16_t mask = _mm_cmp_epi8_mask( uint16_t mask = _mm_cmp_epi8_mask(
@ -246,6 +258,7 @@ inline bool memequal16(const void * a, const void * b)
/** Compare memory region to zero */ /** Compare memory region to zero */
inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size) inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
{ {
__msan_unpoison_overflow_15(reinterpret_cast<const char *>(data), size);
const __m128i zero16 = _mm_setzero_si128(); const __m128i zero16 = _mm_setzero_si128();
for (size_t offset = 0; offset < size; offset += 16) for (size_t offset = 0; offset < size; offset += 16)
@ -263,7 +276,7 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
return true; return true;
} }
#elif defined(__SSE2__) && !defined(MEMORY_SANITIZER) #elif defined(__SSE2__)
# include <emmintrin.h> # include <emmintrin.h>
@ -277,6 +290,9 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
template <typename Char> template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{ {
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
size_t min_size = std::min(a_size, b_size); size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) for (size_t offset = 0; offset < min_size; offset += 16)
@ -309,6 +325,9 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
template <typename Char> template <typename Char>
inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{ {
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
size_t min_size = std::min(a_size, b_size); size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) for (size_t offset = 0; offset < min_size; offset += 16)
@ -380,6 +399,9 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
template <typename Char> template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size) inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
{ {
__msan_unpoison_overflow_15(a, size);
__msan_unpoison_overflow_15(b, size);
for (size_t offset = 0; offset < size; offset += 16) for (size_t offset = 0; offset < size; offset += 16)
{ {
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8( uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
@ -410,6 +432,9 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
if (a_size != b_size) if (a_size != b_size)
return false; return false;
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
for (size_t offset = 0; offset < a_size; offset += 16) for (size_t offset = 0; offset < a_size; offset += 16)
{ {
uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8( uint16_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
@ -483,6 +508,8 @@ inline bool memequal16(const void * a, const void * b)
/** Compare memory region to zero */ /** Compare memory region to zero */
inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size) inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
{ {
__msan_unpoison_overflow_15(reinterpret_cast<const char *>(data), size);
const __m128i zero16 = _mm_setzero_si128(); const __m128i zero16 = _mm_setzero_si128();
for (size_t offset = 0; offset < size; offset += 16) for (size_t offset = 0; offset < size; offset += 16)
@ -509,6 +536,9 @@ inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
template <typename Char> template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{ {
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
size_t min_size = std::min(a_size, b_size); size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) for (size_t offset = 0; offset < min_size; offset += 16)
@ -534,6 +564,9 @@ inline int memcmpSmallAllowOverflow15(const Char * a, size_t a_size, const Char
template <typename Char> template <typename Char>
inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size) inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_size, const Char * b, size_t b_size)
{ {
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
size_t min_size = std::min(a_size, b_size); size_t min_size = std::min(a_size, b_size);
for (size_t offset = 0; offset < min_size; offset += 16) for (size_t offset = 0; offset < min_size; offset += 16)
@ -599,6 +632,9 @@ inline int memcmpSmallLikeZeroPaddedAllowOverflow15(const Char * a, size_t a_siz
template <typename Char> template <typename Char>
inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size) inline int memcmpSmallAllowOverflow15(const Char * a, const Char * b, size_t size)
{ {
__msan_unpoison_overflow_15(a, size);
__msan_unpoison_overflow_15(b, size);
for (size_t offset = 0; offset < size; offset += 16) for (size_t offset = 0; offset < size; offset += 16)
{ {
uint64_t mask = getNibbleMask(vceqq_u8( uint64_t mask = getNibbleMask(vceqq_u8(
@ -625,6 +661,9 @@ inline bool memequalSmallAllowOverflow15(const Char * a, size_t a_size, const Ch
if (a_size != b_size) if (a_size != b_size)
return false; return false;
__msan_unpoison_overflow_15(a, a_size);
__msan_unpoison_overflow_15(b, b_size);
for (size_t offset = 0; offset < a_size; offset += 16) for (size_t offset = 0; offset < a_size; offset += 16)
{ {
uint64_t mask = getNibbleMask(vceqq_u8( uint64_t mask = getNibbleMask(vceqq_u8(
@ -683,6 +722,7 @@ inline bool memequal16(const void * a, const void * b)
inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size) inline bool memoryIsZeroSmallAllowOverflow15(const void * data, size_t size)
{ {
__msan_unpoison_overflow_15(reinterpret_cast<const char *>(data), size);
for (size_t offset = 0; offset < size; offset += 16) for (size_t offset = 0; offset < size; offset += 16)
{ {
uint64_t mask = getNibbleMask(vceqzq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(data) + offset))); uint64_t mask = getNibbleMask(vceqzq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(data) + offset)));

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#include <Common/MemorySanitizer.h>
#include <cstring> #include <cstring>
#include <sys/types.h> /// ssize_t #include <sys/types.h> /// ssize_t
@ -38,6 +40,7 @@ namespace detail
{ {
inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n) inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n)
{ {
__msan_unpoison_overflow_15(src, n);
while (n > 0) while (n > 0)
{ {
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_storeu_si128(reinterpret_cast<__m128i *>(dst),
@ -64,6 +67,7 @@ namespace detail
{ {
inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n) inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n)
{ {
__msan_unpoison_overflow_15(src, n);
while (n > 0) while (n > 0)
{ {
vst1q_s8(reinterpret_cast<signed char *>(dst), vld1q_s8(reinterpret_cast<const signed char *>(src))); vst1q_s8(reinterpret_cast<signed char *>(dst), vld1q_s8(reinterpret_cast<const signed char *>(src)));