diff --git a/dbms/src/AggregateFunctions/QuantileTDigest.h b/dbms/src/AggregateFunctions/QuantileTDigest.h index ca7d4f2fb1a..c4ee76b6eed 100644 --- a/dbms/src/AggregateFunctions/QuantileTDigest.h +++ b/dbms/src/AggregateFunctions/QuantileTDigest.h @@ -85,7 +85,7 @@ class QuantileTDigest Params params; /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. - static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); + static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); using Summary = PODArray, bytes_in_arena>>; diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 69bcdac2ab7..4652e4a08c8 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -255,6 +255,11 @@ size_t ColumnAggregateFunction::allocatedBytes() const return res; } +void ColumnAggregateFunction::protect() +{ + data.protect(); +} + MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const { return create(func, Arenas(1, std::make_shared())); diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index 3fc76b4c047..a028a95d68c 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -157,6 +157,8 @@ public: size_t allocatedBytes() const override; + void protect() override; + void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; void popBack(size_t n) override; diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 4ceda666db7..eeb06b64f49 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -311,6 +311,13 @@ size_t ColumnArray::allocatedBytes() const } +void ColumnArray::protect() +{ + getData().protect(); + getOffsets().protect(); +} + + bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const { if (offsets == other.offsets) diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 3e1b586e755..d58dfba025a 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -78,6 +78,7 @@ public: void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; + void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr convertToFullColumnIfConst() const override; void getExtremes(Field & min, Field & max) const override; diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index 50a6d9d67fb..372b0c245c0 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -87,6 +87,7 @@ public: size_t size() const override { return data.size(); } size_t byteSize() const override { return data.size() * sizeof(data[0]); } size_t allocatedBytes() const override { return data.allocated_bytes(); } + void protect() override { data.protect(); } void reserve(size_t n) override { data.reserve(n); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index 941314b8888..b773d7c8eb4 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -57,6 +57,11 @@ public: return chars.allocated_bytes() + sizeof(n); } + void protect() override + { + chars.protect(); + } + Field operator[](size_t index) const override { return String(reinterpret_cast(&chars[n * index]), n); diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index c919116112c..c9a475fd8a6 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -363,7 +363,6 @@ ColumnPtr ColumnLowCardinality::countKeys() const } - ColumnLowCardinality::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {} ColumnLowCardinality::Index::Index(MutableColumnPtr && positions) : positions(std::move(positions)) diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index b88cf60581b..d9a8ea4f825 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -291,6 +291,12 @@ size_t ColumnNullable::allocatedBytes() const return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes(); } +void ColumnNullable::protect() +{ + getNestedColumn().protect(); + getNullMapColumn().protect(); +} + namespace { diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index c8453a29689..8012d03b0e8 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -71,6 +71,7 @@ public: void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; + void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 1717c02f1df..1443283783a 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -412,4 +412,11 @@ void ColumnString::getPermutationWithCollation(const Collator & collator, bool r } } + +void ColumnString::protect() +{ + getChars().protect(); + getOffsets().protect(); +} + } diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 5ca05079bd5..a30a4ceb5a1 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -68,6 +68,8 @@ public: return chars.allocated_bytes() + offsets.allocated_bytes(); } + void protect() override; + MutableColumnPtr cloneResized(size_t to_size) const override; Field operator[](size_t n) const override diff --git a/dbms/src/Columns/ColumnTuple.cpp b/dbms/src/Columns/ColumnTuple.cpp index c235cd07c31..ec0bcc1f5b5 100644 --- a/dbms/src/Columns/ColumnTuple.cpp +++ b/dbms/src/Columns/ColumnTuple.cpp @@ -315,6 +315,12 @@ size_t ColumnTuple::allocatedBytes() const return res; } +void ColumnTuple::protect() +{ + for (auto & column : columns) + column->assumeMutableRef().protect(); +} + void ColumnTuple::getExtremes(Field & min, Field & max) const { const size_t tuple_size = columns.size(); diff --git a/dbms/src/Columns/ColumnTuple.h b/dbms/src/Columns/ColumnTuple.h index d146c8bff6c..c39a92e3c8c 100644 --- a/dbms/src/Columns/ColumnTuple.h +++ b/dbms/src/Columns/ColumnTuple.h @@ -71,6 +71,7 @@ public: void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; + void protect() override; void forEachSubcolumn(ColumnCallback callback) override; size_t tupleSize() const { return columns.size(); } diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index 85a9c498a94..5eee80dc9d8 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -80,6 +80,7 @@ public: bool isNumeric() const override { return column_holder->isNumeric(); } size_t byteSize() const override { return column_holder->byteSize(); } + void protect() override { column_holder->assumeMutableRef().protect(); } size_t allocatedBytes() const override { return column_holder->allocatedBytes() diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 1c5a45ef6ad..9de84f95b4a 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -163,6 +163,11 @@ public: return data.allocated_bytes(); } + void protect() override + { + data.protect(); + } + void insertValue(const T value) { data.push_back(value); diff --git a/dbms/src/Columns/ColumnVectorHelper.h b/dbms/src/Columns/ColumnVectorHelper.h index 8a25812ffe7..d805f44218c 100644 --- a/dbms/src/Columns/ColumnVectorHelper.h +++ b/dbms/src/Columns/ColumnVectorHelper.h @@ -24,9 +24,10 @@ namespace DB class ColumnVectorHelper : public IColumn { public: + template const char * getRawDataBegin() const { - return *reinterpret_cast(reinterpret_cast(this) + sizeof(*this)); + return reinterpret_cast, 15, 16> *>(reinterpret_cast(this) + sizeof(*this))->raw_data(); } template diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 2560b9639ad..86a1097d368 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -253,6 +253,10 @@ public: /// Zero, if could be determined. virtual size_t allocatedBytes() const = 0; + /// Make memory region readonly with mprotect if it is large enough. + /// The operation is slow and performed only for debug builds. + virtual void protect() {} + /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them. /// Shallow: doesn't do recursive calls; don't do call for itself. using ColumnCallback = std::function; diff --git a/dbms/src/Common/Allocator.cpp b/dbms/src/Common/Allocator.cpp index ba0c7820187..92ff10eafb7 100644 --- a/dbms/src/Common/Allocator.cpp +++ b/dbms/src/Common/Allocator.cpp @@ -43,11 +43,30 @@ namespace ErrorCodes * * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB. */ -static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20); +#ifdef NDEBUG + static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20); +#else + /// In debug build, use small mmap threshold to reproduce more memory stomping bugs. + /// Along with ASLR it will hopefully detect more issues than ASan. + /// The program may fail due to the limit on number of memory mappings. + static constexpr size_t MMAP_THRESHOLD = 4096; +#endif + static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; +template +void * Allocator::mmap_hint() +{ +#if ALLOCATOR_ASLR + return reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(rng)); +#else + return nullptr; +#endif +} + + template void * Allocator::alloc(size_t size, size_t alignment) { @@ -61,7 +80,7 @@ void * Allocator::alloc(size_t size, size_t alignment) throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS); - buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + buf = mmap(mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == buf) DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); diff --git a/dbms/src/Common/Allocator.h b/dbms/src/Common/Allocator.h index 9a2ab0b975c..d2a81f77b62 100644 --- a/dbms/src/Common/Allocator.h +++ b/dbms/src/Common/Allocator.h @@ -2,6 +2,19 @@ #include +#ifdef NDEBUG + /// If set to 1 - randomize memory mappings manually (address space layout randomization) to reproduce more memory stomping bugs. + /// Note that Linux doesn't do it by default. This may lead to worse TLB performance. + #define ALLOCATOR_ASLR 0 +#else + #define ALLOCATOR_ASLR 1 +#endif + +#if ALLOCATOR_ASLR + #include + #include +#endif + /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. * Also used in hash tables. @@ -14,6 +27,12 @@ template class Allocator { +#if ALLOCATOR_ASLR +private: + pcg64 rng{randomSeed()}; +#endif + void * mmap_hint(); + protected: static constexpr bool clear_memory = clear_memory_; diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index d3401427037..f974b2bdaf6 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -419,6 +419,7 @@ namespace ErrorCodes extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE = 442; extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA = 443; extern const int UNKNOWN_PROTOBUF_FORMAT = 444; + extern const int CANNOT_MPROTECT = 445; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h index 462842f8236..a7b8b02bb98 100644 --- a/dbms/src/Common/PODArray.h +++ b/dbms/src/Common/PODArray.h @@ -17,10 +17,19 @@ #include #include +#ifndef NDEBUG + #include +#endif + namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_MPROTECT; +} + inline constexpr size_t integerRoundUp(size_t value, size_t dividend) { return ((value + dividend - 1) / dividend) * dividend; @@ -108,6 +117,8 @@ protected: if (c_start == null) return; + unprotect(); + TAllocator::free(c_start - pad_left, allocated_bytes()); } @@ -120,6 +131,8 @@ protected: return; } + unprotect(); + ptrdiff_t end_diff = c_end - c_start; c_start = reinterpret_cast( @@ -155,6 +168,28 @@ protected: realloc(allocated_bytes() * 2, std::forward(allocator_params)...); } +#ifndef NDEBUG + /// Make memory region readonly with mprotect if it is large enough. + /// The operation is slow and performed only for debug builds. + void protectImpl(int prot) + { + static constexpr size_t PAGE_SIZE = 4096; + + char * left_rounded_up = reinterpret_cast((reinterpret_cast(c_start) - pad_left + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE); + char * right_rounded_down = reinterpret_cast((reinterpret_cast(c_end_of_storage) + pad_right) / PAGE_SIZE * PAGE_SIZE); + + if (right_rounded_down > left_rounded_up) + { + size_t length = right_rounded_down - left_rounded_up; + if (0 != mprotect(left_rounded_up, length, prot)) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + } + } + + /// Restore memory protection in destructor or realloc for further reuse by allocator. + bool mprotected = false; +#endif + public: bool empty() const { return c_end == c_start; } size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; } @@ -199,6 +234,23 @@ public: c_end += byte_size(1); } + void protect() + { +#ifndef NDEBUG + protectImpl(PROT_READ); + mprotected = true; +#endif + } + + void unprotect() + { +#ifndef NDEBUG + if (mprotected) + protectImpl(PROT_WRITE); + mprotected = false; +#endif + } + ~PODArrayBase() { dealloc(); @@ -402,6 +454,11 @@ public: void swap(PODArray & rhs) { +#ifndef NDEBUG + this->unprotect(); + rhs.unprotect(); +#endif + /// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions: /// - The elements of arr1 are stored on stack. /// - The elements of arr2 are stored on heap. @@ -450,7 +507,9 @@ public: }; if (!this->isInitialized() && !rhs.isInitialized()) + { return; + } else if (!this->isInitialized() && rhs.isInitialized()) { do_move(rhs, *this); @@ -494,9 +553,13 @@ public: rhs.c_end = rhs.c_start + this->byte_size(lhs_size); } else if (this->isAllocatedFromStack() && !rhs.isAllocatedFromStack()) + { swap_stack_heap(*this, rhs); + } else if (!this->isAllocatedFromStack() && rhs.isAllocatedFromStack()) + { swap_stack_heap(rhs, *this); + } else { std::swap(this->c_start, rhs.c_start); diff --git a/dbms/src/Interpreters/AggregationCommon.h b/dbms/src/Interpreters/AggregationCommon.h index 12c2d53819b..74836d4463d 100644 --- a/dbms/src/Interpreters/AggregationCommon.h +++ b/dbms/src/Interpreters/AggregationCommon.h @@ -102,23 +102,23 @@ static inline T ALWAYS_INLINE packFixed( switch (key_sizes[j]) { case 1: - memcpy(bytes + offset, static_cast(column)->getRawDataBegin() + index, 1); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index, 1); offset += 1; break; case 2: - memcpy(bytes + offset, static_cast(column)->getRawDataBegin() + index * 2, 2); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<2>() + index * 2, 2); offset += 2; break; case 4: - memcpy(bytes + offset, static_cast(column)->getRawDataBegin() + index * 4, 4); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<4>() + index * 4, 4); offset += 4; break; case 8: - memcpy(bytes + offset, static_cast(column)->getRawDataBegin() + index * 8, 8); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<8>() + index * 8, 8); offset += 8; break; default: - memcpy(bytes + offset, static_cast(column)->getRawDataBegin() + index * key_sizes[j], key_sizes[j]); + memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]); offset += key_sizes[j]; } } @@ -168,23 +168,23 @@ static inline T ALWAYS_INLINE packFixed( switch (key_sizes[j]) { case 1: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin() + i, 1); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i, 1); offset += 1; break; case 2: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin() + i * 2, 2); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<2>() + i * 2, 2); offset += 2; break; case 4: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin() + i * 4, 4); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<4>() + i * 4, 4); offset += 4; break; case 8: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin() + i * 8, 8); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<8>() + i * 8, 8); offset += 8; break; default: - memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin() + i * key_sizes[j], key_sizes[j]); + memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]); offset += key_sizes[j]; } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index bf9c5b3409d..01ff4c4cdac 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -513,13 +513,16 @@ void MergeTreeDataPart::loadIndex() for (size_t i = 0; i < marks_count; ++i) //-V756 for (size_t j = 0; j < key_size; ++j) - storage.primary_key_data_types[j]->deserializeBinary(*loaded_index[j].get(), index_file); + storage.primary_key_data_types[j]->deserializeBinary(*loaded_index[j], index_file); for (size_t i = 0; i < key_size; ++i) + { + loaded_index[i]->protect(); if (loaded_index[i]->size() != marks_count) throw Exception("Cannot read all data from index file " + index_path + "(expected size: " + toString(marks_count) + ", read: " + toString(loaded_index[i]->size()) + ")", ErrorCodes::CANNOT_READ_ALL_DATA); + } if (!index_file.eof()) throw Exception("Index file " + index_path + " is unexpectedly long", ErrorCodes::EXPECTED_END_OF_FILE); diff --git a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 9091228d80a..89f5aaeafd5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -132,6 +132,7 @@ void MergeTreeReaderStream::loadMarks() if (buffer.eof() || buffer.buffer().size() != file_size) throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA); + res->protect(); return res; }; diff --git a/libs/libcommon/include/common/mremap.h b/libs/libcommon/include/common/mremap.h index f569ff05d4e..31ca74da827 100644 --- a/libs/libcommon/include/common/mremap.h +++ b/libs/libcommon/include/common/mremap.h @@ -12,7 +12,8 @@ #define MREMAP_MAYMOVE 1 -void * mremap(void * old_address, +void * mremap( + void * old_address, size_t old_size, size_t new_size, int flags = 0, @@ -23,7 +24,8 @@ void * mremap(void * old_address, #endif -inline void * clickhouse_mremap(void * old_address, +inline void * clickhouse_mremap( + void * old_address, size_t old_size, size_t new_size, int flags = 0, @@ -32,7 +34,8 @@ inline void * clickhouse_mremap(void * old_address, [[maybe_unused]] int mmap_fd = -1, [[maybe_unused]] off_t mmap_offset = 0) { - return mremap(old_address, + return mremap( + old_address, old_size, new_size, flags