mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
Fix Array and Map support with Keyed hashing
When working with materialized key columns and rows containing Arrays or Maps (implemented as Tuple's Arrays) with multiple values, the keyed hash functions were erroneously refusing to proceed, because they misinterpreted the output vector size. Close #61497 which was reported as a security issue, but it didn't actually have any security impact. The usefulness of keyed hashing over Maps is also questionable, but we support it for completeness.
This commit is contained in:
parent
2c6d0c69ab
commit
5004c22583
@ -49,6 +49,8 @@
|
||||
#include <base/bit_cast.h>
|
||||
#include <base/unaligned.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -75,17 +77,29 @@ namespace impl
|
||||
ColumnPtr key0;
|
||||
ColumnPtr key1;
|
||||
bool is_const;
|
||||
const ColumnArray::Offsets * offsets{};
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
assert(key0 && key1);
|
||||
assert(key0->size() == key1->size());
|
||||
assert(offsets == nullptr || offsets->size() == key0->size());
|
||||
if (offsets != nullptr)
|
||||
return offsets->back();
|
||||
return key0->size();
|
||||
}
|
||||
SipHashKey getKey(size_t i) const
|
||||
{
|
||||
if (is_const)
|
||||
i = 0;
|
||||
if (offsets != nullptr)
|
||||
{
|
||||
const auto begin = offsets->begin();
|
||||
auto upper = std::upper_bound(begin, offsets->end(), i);
|
||||
if (upper == offsets->end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i);
|
||||
i = upper - begin;
|
||||
}
|
||||
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
|
||||
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
|
||||
return {key0data[i], key1data[i]};
|
||||
@ -1112,7 +1126,15 @@ private:
|
||||
|
||||
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
||||
bool nested_is_first = true;
|
||||
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
KeyColumnsType key_cols_tmp{key_cols};
|
||||
key_cols_tmp.offsets = &offsets;
|
||||
executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
}
|
||||
else
|
||||
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
|
||||
const size_t size = offsets.size();
|
||||
|
||||
|
@ -236,3 +236,6 @@ Check asan bug
|
||||
0
|
||||
Check bug found fuzzing
|
||||
9042C6691B1A75F0EA3314B6F55728BB
|
||||
Check bug 2 found fuzzing
|
||||
608E1FF030C9E206185B112C2A25F1A7
|
||||
ABB65AE97711A2E053E324ED88B1D08B
|
||||
|
@ -338,3 +338,10 @@ SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY s
|
||||
SELECT 'Check bug found fuzzing';
|
||||
SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1])));
|
||||
|
||||
SELECT 'Check bug 2 found fuzzing';
|
||||
DROP TABLE IF EXISTS sipHashKeyed_keys;
|
||||
CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory;
|
||||
INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'});
|
||||
SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a;
|
||||
DROP TABLE sipHashKeyed_keys;
|
||||
|
Loading…
Reference in New Issue
Block a user