Merge pull request #13500 from ClickHouse/hardening-and-better-error-messages

Hardening and better error messages
This commit is contained in:
alexey-milovidov 2020-08-08 17:13:43 +03:00 committed by GitHub
commit beed3c8244
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 56 additions and 11 deletions

View File

@ -42,11 +42,12 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
if (!offsets_concrete)
throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR);
size_t size = offsets_concrete->size();
if (size != 0 && nested_column)
if (!offsets_concrete->empty() && nested_column)
{
Offset last_offset = offsets_concrete->getData().back();
/// This will also prevent possible overflow in offset.
if (nested_column->size() != offsets_concrete->getData()[size - 1])
if (nested_column->size() != last_offset)
throw Exception("offsets_column has data inconsistent with nested_column", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -22,6 +22,22 @@ namespace ErrorCodes
}
ColumnString::ColumnString(const ColumnString & src)
: COWHelper<IColumn, ColumnString>(src),
offsets(src.offsets.begin(), src.offsets.end()),
chars(src.chars.begin(), src.chars.end())
{
if (!offsets.empty())
{
Offset last_offset = offsets.back();
/// This will also prevent possible overflow in offset.
if (chars.size() != last_offset)
throw Exception("String offsets has data inconsistent with chars array", ErrorCodes::LOGICAL_ERROR);
}
}
MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
{
auto res = ColumnString::create();

View File

@ -49,10 +49,7 @@ private:
struct lessWithCollation;
ColumnString() = default;
ColumnString(const ColumnString & src)
: offsets(src.offsets.begin(), src.offsets.end()),
chars(src.chars.begin(), src.chars.end()) {}
ColumnString(const ColumnString & src);
public:
const char * getFamilyName() const override { return "String"; }

View File

@ -70,6 +70,7 @@ namespace ErrorCodes
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int LOGICAL_ERROR;
}
}
@ -90,6 +91,7 @@ public:
/// Allocate memory range.
void * alloc(size_t size, size_t alignment = 0)
{
checkSize(size);
CurrentMemoryTracker::alloc(size);
return allocNoTrack(size, alignment);
}
@ -97,6 +99,7 @@ public:
/// Free memory range.
void free(void * buf, size_t size)
{
checkSize(size);
freeNoTrack(buf, size);
CurrentMemoryTracker::free(size);
}
@ -107,6 +110,8 @@ public:
*/
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
{
checkSize(new_size);
if (old_size == new_size)
{
/// nothing to do.
@ -244,6 +249,13 @@ private:
}
}
void checkSize(size_t size)
{
/// More obvious exception in case of possible overflow (instead of just "Cannot mmap").
if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by

View File

@ -18,6 +18,7 @@ namespace DB
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
extern const int LOGICAL_ERROR;
}
}
@ -62,6 +63,9 @@ void MemoryTracker::logMemoryUsage(Int64 current) const
void MemoryTracker::alloc(Int64 size)
{
if (size < 0)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size);
if (blocker.isCancelled())
return;

View File

@ -28,6 +28,14 @@ struct RepeatImpl
ErrorCodes::TOO_LARGE_STRING_SIZE);
}
static inline void checkStringSize(UInt64 size)
{
static constexpr UInt64 max_string_size = 1 << 30;
if (size > max_string_size)
throw Exception("Too large string size (" + std::to_string(size) + ") in function repeat, maximum is: " + std::to_string(max_string_size),
ErrorCodes::TOO_LARGE_STRING_SIZE);
}
static void vectorStrConstRepeat(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
@ -41,7 +49,10 @@ struct RepeatImpl
res_offsets.assign(offsets);
for (UInt64 i = 0; i < offsets.size(); ++i)
{
data_size += (offsets[i] - offsets[i - 1] - 1) * repeat_time + 1; /// Note that accessing -1th element is valid for PaddedPODArray.
/// Note that accessing -1th element is valid for PaddedPODArray.
size_t repeated_size = (offsets[i] - offsets[i - 1] - 1) * repeat_time + 1;
checkStringSize(repeated_size);
data_size += repeated_size;
res_offsets[i] = data_size;
}
res_data.resize(data_size);
@ -63,7 +74,9 @@ struct RepeatImpl
res_offsets.assign(offsets);
for (UInt64 i = 0; i < col_num.size(); ++i)
{
data_size += (offsets[i] - offsets[i - 1] - 1) * col_num[i] + 1;
size_t repeated_size = (offsets[i] - offsets[i - 1] - 1) * col_num[i] + 1;
checkStringSize(repeated_size);
data_size += repeated_size;
res_offsets[i] = data_size;
}
res_data.resize(data_size);
@ -89,7 +102,9 @@ struct RepeatImpl
UInt64 col_size = col_num.size();
for (UInt64 i = 0; i < col_size; ++i)
{
data_size += str_size * col_num[i] + 1;
size_t repeated_size = str_size * col_num[i] + 1;
checkStringSize(repeated_size);
data_size += repeated_size;
res_offsets[i] = data_size;
}
res_data.resize(data_size);

View File

@ -1,4 +1,4 @@
-- repeat() with this length and this number of rows will allocation huge enough region (MSB set),
-- which will cause roundUpToPowerOfTwoOrZero() returns 0 for such allocation (before the fix),
-- and later repeat() will try to use this memory and will got SIGSEGV.
SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 173; }
SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131; }