2015-10-29 03:41:09 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <city.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Defines.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/UInt128.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/ColumnTuple.h>
|
2015-10-29 03:41:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-03-09 00:56:38 +00:00
|
|
|
/** Hashes a set of arguments to the aggregate function
|
|
|
|
* to calculate the number of unique values
|
|
|
|
* and adds them to the set.
|
2015-10-29 03:41:09 +00:00
|
|
|
*
|
2017-03-09 00:56:38 +00:00
|
|
|
* Four options (2 x 2)
|
2015-10-29 03:41:09 +00:00
|
|
|
*
|
2017-03-09 00:56:38 +00:00
|
|
|
* - for approximate calculation, uses a non-cryptographic 64-bit hash function;
|
|
|
|
* - for an accurate calculation, uses a cryptographic 128-bit hash function;
|
2015-10-29 03:41:09 +00:00
|
|
|
*
|
2017-03-09 00:56:38 +00:00
|
|
|
* - for several arguments passed in the usual way;
|
|
|
|
* - for one argument-tuple.
|
2015-10-29 03:41:09 +00:00
|
|
|
*/
|
|
|
|
|
2015-10-29 04:02:22 +00:00
|
|
|
template <bool exact, bool for_tuple>
|
|
|
|
struct UniqVariadicHash;
|
2015-10-29 03:41:09 +00:00
|
|
|
|
|
|
|
|
2018-07-16 03:12:01 +00:00
|
|
|
/// If some arguments are not contiguous, we cannot use simple hash function,
|
|
|
|
/// because it requires method IColumn::getDataAt to work.
|
|
|
|
/// Note that we treat single tuple argument in the same way as multiple arguments.
|
|
|
|
bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types);
|
|
|
|
|
|
|
|
|
2015-10-29 04:02:22 +00:00
|
|
|
template <>
|
|
|
|
struct UniqVariadicHash<false, false>
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
|
|
|
|
{
|
|
|
|
UInt64 hash;
|
|
|
|
|
|
|
|
const IColumn ** column = columns;
|
|
|
|
const IColumn ** columns_end = column + num_args;
|
|
|
|
|
|
|
|
{
|
|
|
|
StringRef value = (*column)->getDataAt(row_num);
|
2017-06-21 08:35:38 +00:00
|
|
|
hash = CityHash_v1_0_2::CityHash64(value.data, value.size);
|
2017-04-01 07:20:54 +00:00
|
|
|
++column;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (column < columns_end)
|
|
|
|
{
|
|
|
|
StringRef value = (*column)->getDataAt(row_num);
|
2017-06-21 08:35:38 +00:00
|
|
|
hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash));
|
2017-04-01 07:20:54 +00:00
|
|
|
++column;
|
|
|
|
}
|
|
|
|
|
|
|
|
return hash;
|
|
|
|
}
|
2015-10-29 04:02:22 +00:00
|
|
|
};
|
2015-10-29 03:41:09 +00:00
|
|
|
|
2015-10-29 04:02:22 +00:00
|
|
|
template <>
|
|
|
|
struct UniqVariadicHash<false, true>
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
|
|
|
|
{
|
|
|
|
UInt64 hash;
|
|
|
|
|
2019-08-21 02:28:04 +00:00
|
|
|
const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-25 17:56:18 +00:00
|
|
|
const auto * column = tuple_columns.data();
|
|
|
|
const auto * columns_end = column + num_args;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
StringRef value = column->get()->getDataAt(row_num);
|
2017-06-21 08:35:38 +00:00
|
|
|
hash = CityHash_v1_0_2::CityHash64(value.data, value.size);
|
2017-04-01 07:20:54 +00:00
|
|
|
++column;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (column < columns_end)
|
|
|
|
{
|
|
|
|
StringRef value = column->get()->getDataAt(row_num);
|
2017-06-21 08:35:38 +00:00
|
|
|
hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash));
|
2017-04-01 07:20:54 +00:00
|
|
|
++column;
|
|
|
|
}
|
|
|
|
|
|
|
|
return hash;
|
|
|
|
}
|
2015-10-29 04:02:22 +00:00
|
|
|
};
|
2015-10-29 03:41:09 +00:00
|
|
|
|
2015-10-29 04:02:22 +00:00
|
|
|
template <>
|
|
|
|
struct UniqVariadicHash<true, false>
|
2015-10-29 03:41:09 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
|
|
|
|
{
|
|
|
|
const IColumn ** column = columns;
|
|
|
|
const IColumn ** columns_end = column + num_args;
|
|
|
|
|
|
|
|
SipHash hash;
|
|
|
|
|
|
|
|
while (column < columns_end)
|
|
|
|
{
|
|
|
|
(*column)->updateHashWithValue(row_num, hash);
|
|
|
|
++column;
|
|
|
|
}
|
|
|
|
|
|
|
|
UInt128 key;
|
2017-07-04 16:10:36 +00:00
|
|
|
hash.get128(key.low, key.high);
|
2017-04-01 07:20:54 +00:00
|
|
|
return key;
|
|
|
|
}
|
2015-10-29 04:02:22 +00:00
|
|
|
};
|
2015-10-29 03:41:09 +00:00
|
|
|
|
2015-10-29 04:02:22 +00:00
|
|
|
template <>
|
|
|
|
struct UniqVariadicHash<true, true>
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
|
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();
|
2015-10-29 03:41:09 +00:00
|
|
|
|
2019-03-25 17:56:18 +00:00
|
|
|
const auto * column = tuple_columns.data();
|
|
|
|
const auto * columns_end = column + num_args;
|
2015-10-29 03:41:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
SipHash hash;
|
2016-07-10 15:58:58 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
while (column < columns_end)
|
|
|
|
{
|
|
|
|
(*column)->updateHashWithValue(row_num, hash);
|
|
|
|
++column;
|
|
|
|
}
|
2015-10-29 03:41:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt128 key;
|
2017-07-04 16:10:36 +00:00
|
|
|
hash.get128(key.low, key.high);
|
2017-04-01 07:20:54 +00:00
|
|
|
return key;
|
|
|
|
}
|
2015-10-29 04:02:22 +00:00
|
|
|
};
|
2015-10-29 03:41:09 +00:00
|
|
|
|
|
|
|
}
|