Fix hashing of empty tuples

This commit is contained in:
Alexey Milovidov 2024-07-16 05:07:39 +02:00
parent 085b0c1815
commit 919bbf57a7
3 changed files with 60 additions and 5 deletions

View File

@ -1184,7 +1184,7 @@ private:
if (icolumn->size() != vec_to.size()) if (icolumn->size() != vec_to.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
icolumn->getName(), icolumn->size(), vec_to.size(), getName()); icolumn->getName(), icolumn->size(), vec_to.size(), getName());
if constexpr (Keyed) if constexpr (Keyed)
if (key_cols.size() != vec_to.size() && key_cols.size() != 1) if (key_cols.size() != vec_to.size() && key_cols.size() != 1)
@ -1223,6 +1223,9 @@ private:
else executeGeneric<first>(key_cols, icolumn, vec_to); else executeGeneric<first>(key_cols, icolumn, vec_to);
} }
/// Return a fixed random-looking magic number when input is empty.
static constexpr auto filler = 0xe28dbde7fe22e41c;
void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
{ {
/// Flattening of tuples. /// Flattening of tuples.
@ -1231,6 +1234,11 @@ private:
const auto & tuple_columns = tuple->getColumns(); const auto & tuple_columns = tuple->getColumns();
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements(); const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
size_t tuple_size = tuple_columns.size(); size_t tuple_size = tuple_columns.size();
if (0 == tuple_size && is_first)
for (auto & hash : vec_to)
hash = static_cast<ToType>(filler);
for (size_t i = 0; i < tuple_size; ++i) for (size_t i = 0; i < tuple_size; ++i)
executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
} }
@ -1239,6 +1247,11 @@ private:
const auto & tuple_columns = tuple_const->getColumns(); const auto & tuple_columns = tuple_const->getColumns();
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements(); const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
size_t tuple_size = tuple_columns.size(); size_t tuple_size = tuple_columns.size();
if (0 == tuple_size && is_first)
for (auto & hash : vec_to)
hash = static_cast<ToType>(filler);
for (size_t i = 0; i < tuple_size; ++i) for (size_t i = 0; i < tuple_size; ++i)
{ {
auto tmp = ColumnConst::create(tuple_columns[i], column->size()); auto tmp = ColumnConst::create(tuple_columns[i], column->size());
@ -1300,10 +1313,7 @@ public:
constexpr size_t first_data_argument = Keyed; constexpr size_t first_data_argument = Keyed;
if (arguments.size() <= first_data_argument) if (arguments.size() <= first_data_argument)
{ vec_to.assign(input_rows_count, static_cast<ToType>(filler));
/// Return a fixed random-looking magic number when input is empty
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
}
KeyColumnsType key_cols{}; KeyColumnsType key_cols{};
if constexpr (Keyed) if constexpr (Keyed)

View File

@ -0,0 +1,22 @@
16324913028386710556
16324913028386710556
5049034479224883533
7385293435322750976
12248912094175844631
5049034479224883533
5887129541803688833
5887129541803688833
13747979201178469747
5887129541803688833
15520217392480966957
16324913028386710556
16324913028386710556
5049034479224883533
7385293435322750976
12248912094175844631
5049034479224883533
5887129541803688833
5887129541803688833
13747979201178469747
5887129541803688833
15520217392480966957

View File

@ -0,0 +1,23 @@
SELECT sipHash64(());
SELECT sipHash64((), ());
SELECT sipHash64((), 1);
SELECT sipHash64(1, ());
SELECT sipHash64(1, (), 1);
SELECT sipHash64((), 1, ());
SELECT sipHash64((), (1, 2));
SELECT sipHash64((), (1, 2));
SELECT sipHash64((1, 2), ());
SELECT sipHash64((), (1, 2), ());
SELECT sipHash64((1, 2), (), (3, 4));
SELECT sipHash64(materialize(()));
SELECT sipHash64(materialize(()), materialize(()));
SELECT sipHash64(materialize(()), 1);
SELECT sipHash64(1, materialize(()));
SELECT sipHash64(1, materialize(()), 1);
SELECT sipHash64((), 1, materialize(()));
SELECT sipHash64(materialize(()), (1, 2));
SELECT sipHash64(materialize(()), (1, 2));
SELECT sipHash64((1, 2), materialize(()));
SELECT sipHash64(materialize(()), (1, 2), ());
SELECT sipHash64((1, 2), materialize(()), (3, 4));