mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #66106 from ClickHouse/fix_unbin
Fix buffer overflow in `unbin`
This commit is contained in:
commit
76119a4567
@ -5,7 +5,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2)
|
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||||
{
|
{
|
||||||
if ((end - pos) & 1)
|
if ((end - pos) & 1)
|
||||||
{
|
{
|
||||||
@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o
|
|||||||
++out;
|
++out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inline binStringDecode(const char * pos, const char * end, char *& out)
|
static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||||
{
|
{
|
||||||
if (pos == end)
|
if (pos == end)
|
||||||
{
|
{
|
||||||
@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o
|
|||||||
++out;
|
++out;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert((end - pos) % 8 == 0);
|
chassert((end - pos) % word_size == 0);
|
||||||
|
|
||||||
while (end - pos != 0)
|
while (end - pos != 0)
|
||||||
{
|
{
|
||||||
|
@ -3,14 +3,14 @@
|
|||||||
#include <Columns/ColumnString.h>
|
#include <Columns/ColumnString.h>
|
||||||
#include <Columns/ColumnVector.h>
|
#include <Columns/ColumnVector.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <Common/BitHelpers.h>
|
|
||||||
#include <Common/BinStringDecodeHelper.h>
|
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/IFunction.h>
|
#include <Functions/IFunction.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Interpreters/Context_fwd.h>
|
#include <Interpreters/Context_fwd.h>
|
||||||
#include <Interpreters/castColumn.h>
|
#include <Interpreters/castColumn.h>
|
||||||
|
#include <Common/BinStringDecodeHelper.h>
|
||||||
|
#include <Common/BitHelpers.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -218,10 +218,7 @@ struct UnbinImpl
|
|||||||
static constexpr auto name = "unbin";
|
static constexpr auto name = "unbin";
|
||||||
static constexpr size_t word_size = 8;
|
static constexpr size_t word_size = 8;
|
||||||
|
|
||||||
static void decode(const char * pos, const char * end, char *& out)
|
static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); }
|
||||||
{
|
|
||||||
binStringDecode(pos, end, out);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Encode number or string to string with binary or hexadecimal representation
|
/// Encode number or string to string with binary or hexadecimal representation
|
||||||
@ -651,7 +648,15 @@ public:
|
|||||||
|
|
||||||
size_t size = in_offsets.size();
|
size_t size = in_offsets.size();
|
||||||
out_offsets.resize(size);
|
out_offsets.resize(size);
|
||||||
out_vec.resize(in_vec.size() / word_size + size);
|
|
||||||
|
size_t max_out_len = 0;
|
||||||
|
for (size_t i = 0; i < in_offsets.size(); ++i)
|
||||||
|
{
|
||||||
|
const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1])
|
||||||
|
- /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1;
|
||||||
|
max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1;
|
||||||
|
}
|
||||||
|
out_vec.resize(max_out_len);
|
||||||
|
|
||||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||||
char * pos = begin;
|
char * pos = begin;
|
||||||
@ -661,6 +666,7 @@ public:
|
|||||||
{
|
{
|
||||||
size_t new_offset = in_offsets[i];
|
size_t new_offset = in_offsets[i];
|
||||||
|
|
||||||
|
/// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte
|
||||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
||||||
|
|
||||||
out_offsets[i] = pos - begin;
|
out_offsets[i] = pos - begin;
|
||||||
@ -668,6 +674,9 @@ public:
|
|||||||
prev_offset = new_offset;
|
prev_offset = new_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chassert(
|
||||||
|
static_cast<size_t>(pos - begin) <= out_vec.size(),
|
||||||
|
fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
|
||||||
out_vec.resize(pos - begin);
|
out_vec.resize(pos - begin);
|
||||||
|
|
||||||
return col_res;
|
return col_res;
|
||||||
@ -680,11 +689,11 @@ public:
|
|||||||
ColumnString::Offsets & out_offsets = col_res->getOffsets();
|
ColumnString::Offsets & out_offsets = col_res->getOffsets();
|
||||||
|
|
||||||
const ColumnString::Chars & in_vec = col_fix_string->getChars();
|
const ColumnString::Chars & in_vec = col_fix_string->getChars();
|
||||||
size_t n = col_fix_string->getN();
|
const size_t n = col_fix_string->getN();
|
||||||
|
|
||||||
size_t size = col_fix_string->size();
|
size_t size = col_fix_string->size();
|
||||||
out_offsets.resize(size);
|
out_offsets.resize(size);
|
||||||
out_vec.resize(in_vec.size() / word_size + size);
|
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size);
|
||||||
|
|
||||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||||
char * pos = begin;
|
char * pos = begin;
|
||||||
@ -694,6 +703,7 @@ public:
|
|||||||
{
|
{
|
||||||
size_t new_offset = prev_offset + n;
|
size_t new_offset = prev_offset + n;
|
||||||
|
|
||||||
|
/// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte
|
||||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);
|
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);
|
||||||
|
|
||||||
out_offsets[i] = pos - begin;
|
out_offsets[i] = pos - begin;
|
||||||
@ -701,6 +711,9 @@ public:
|
|||||||
prev_offset = new_offset;
|
prev_offset = new_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chassert(
|
||||||
|
static_cast<size_t>(pos - begin) <= out_vec.size(),
|
||||||
|
fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
|
||||||
out_vec.resize(pos - begin);
|
out_vec.resize(pos - begin);
|
||||||
|
|
||||||
return col_res;
|
return col_res;
|
||||||
|
@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node,
|
|||||||
|
|
||||||
if (hex)
|
if (hex)
|
||||||
{
|
{
|
||||||
hexStringDecode(str_begin, str_end, res_pos);
|
hexStringDecode(str_begin, str_end, res_pos, word_size);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
binStringDecode(str_begin, str_end, res_pos);
|
binStringDecode(str_begin, str_end, res_pos, word_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
return makeStringLiteral(pos, node, String(reinterpret_cast<char *>(res.data()), (res_pos - res_begin - 1)));
|
return makeStringLiteral(pos, node, String(reinterpret_cast<char *>(res.data()), (res_pos - res_begin - 1)));
|
||||||
|
33
tests/queries/0_stateless/03199_unbin_buffer_overflow.sh
Executable file
33
tests/queries/0_stateless/03199_unbin_buffer_overflow.sh
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
|
||||||
|
# check for buffer overflow in unbin (due to not enough memory preallocate for output buffer)
|
||||||
|
# we iterate over all remainders of input string length modulo word_size and check that no assertions are triggered
|
||||||
|
|
||||||
|
word_size=8
|
||||||
|
for i in $(seq 1 $((word_size+1))); do
|
||||||
|
str=$(printf "%${i}s" | tr ' ' 'x')
|
||||||
|
$CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL"
|
||||||
|
done
|
||||||
|
|
||||||
|
word_size=8
|
||||||
|
for i in $(seq 1 $((word_size+1))); do
|
||||||
|
str=$(printf "%${i}s" | tr ' ' 'x')
|
||||||
|
$CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL"
|
||||||
|
done
|
||||||
|
|
||||||
|
word_size=2
|
||||||
|
for i in $(seq 1 $((word_size+1))); do
|
||||||
|
str=$(printf "%${i}s" | tr ' ' 'x')
|
||||||
|
$CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL"
|
||||||
|
done
|
||||||
|
|
||||||
|
word_size=2
|
||||||
|
for i in $(seq 1 $((word_size+1))); do
|
||||||
|
str=$(printf "%${i}s" | tr ' ' 'x')
|
||||||
|
$CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL"
|
||||||
|
done
|
Loading…
Reference in New Issue
Block a user