2018-02-23 21:22:52 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Common/typeid_cast.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
|
|
|
#include <Columns/ColumnConst.h>
|
|
|
|
#include <Functions/IFunction.h>
|
|
|
|
#include <Functions/FunctionHelpers.h>
|
2018-02-26 01:27:33 +00:00
|
|
|
#include <common/likely.h>
|
2018-02-27 00:01:51 +00:00
|
|
|
|
2018-02-23 21:22:52 +00:00
|
|
|
#include <yandex/consistent_hashing.h>
|
2018-02-27 00:01:51 +00:00
|
|
|
#include <mailru/sumbur.h>
|
2018-02-23 21:22:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int BAD_ARGUMENTS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
|
2018-02-23 21:22:52 +00:00
|
|
|
struct YandexConsistentHashImpl
|
|
|
|
{
|
2018-02-26 01:27:33 +00:00
|
|
|
static constexpr auto name = "yandexConsistentHash";
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
using HashType = UInt64;
|
2018-02-23 21:22:52 +00:00
|
|
|
/// Actually it supports UInt64, but it is effective only if n < 65536
|
|
|
|
using ResultType = UInt32;
|
|
|
|
using BucketsCountType = ResultType;
|
|
|
|
|
|
|
|
static inline ResultType apply(UInt64 hash, BucketsCountType n)
|
|
|
|
{
|
|
|
|
return ConsistentHashing(hash, n);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// Code from https://arxiv.org/pdf/1406.2294.pdf
|
|
|
|
static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) {
|
|
|
|
int64_t b = -1, j = 0;
|
|
|
|
while (j < num_buckets) {
|
|
|
|
b = j;
|
|
|
|
key = key * 2862933555777941757ULL + 1;
|
|
|
|
j = static_cast<int64_t>((b + 1) * (double(1LL << 31) / double((key >> 33) + 1)));
|
|
|
|
}
|
|
|
|
return static_cast<int32_t>(b);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct JumpConsistentHashImpl
|
|
|
|
{
|
2018-02-26 01:27:33 +00:00
|
|
|
static constexpr auto name = "jumpConsistentHash";
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
using HashType = UInt64;
|
2018-02-23 21:22:52 +00:00
|
|
|
using ResultType = Int32;
|
|
|
|
using BucketsCountType = ResultType;
|
|
|
|
|
|
|
|
static inline ResultType apply(UInt64 hash, BucketsCountType n)
|
|
|
|
{
|
|
|
|
return JumpConsistentHash(hash, n);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
struct SumburConsistentHashImpl
|
|
|
|
{
|
|
|
|
static constexpr auto name = "sumburConsistentHash";
|
|
|
|
|
|
|
|
using HashType = UInt32;
|
2018-02-27 00:01:51 +00:00
|
|
|
using ResultType = UInt16;
|
2018-02-26 01:27:33 +00:00
|
|
|
using BucketsCountType = ResultType;
|
|
|
|
|
2018-02-27 00:01:51 +00:00
|
|
|
static inline ResultType apply(HashType hash, BucketsCountType n)
|
2018-02-26 01:27:33 +00:00
|
|
|
{
|
2018-02-27 00:01:51 +00:00
|
|
|
return static_cast<ResultType>(sumburConsistentHash(hash, n));
|
2018-02-26 01:27:33 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2018-02-23 21:22:52 +00:00
|
|
|
template <typename Impl>
|
|
|
|
class FunctionConsistentHashImpl : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
static constexpr auto name = Impl::name;
|
|
|
|
|
|
|
|
static FunctionPtr create(const Context &) { return std::make_shared<FunctionConsistentHashImpl<Impl>>(); };
|
|
|
|
|
|
|
|
String getName() const override { return name; }
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override { return 2; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
|
|
|
{
|
|
|
|
if (!arguments[0]->isInteger())
|
|
|
|
throw Exception("Illegal type " + arguments[0]->getName() + " of the first argument of function " + getName(),
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
if (arguments[0]->getSizeOfValueInMemory() > sizeof(HashType))
|
|
|
|
throw Exception("Function " + getName() + " accepts " + std::to_string(sizeof(HashType) * 8) + "-bit integers at most"
|
|
|
|
+ ", got " + arguments[0]->getName(), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
2018-02-23 21:22:52 +00:00
|
|
|
if (!arguments[1]->isInteger())
|
|
|
|
throw Exception("Illegal type " + arguments[1]->getName() + " of the second argument of function " + getName(),
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
return std::make_shared<DataTypeNumber<ResultType>>();
|
|
|
|
}
|
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
|
|
|
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
|
|
|
|
2018-02-23 21:22:52 +00:00
|
|
|
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
|
|
|
|
{
|
2018-02-26 01:27:33 +00:00
|
|
|
if (block.getByPosition(arguments[1]).column->isColumnConst())
|
|
|
|
executeConstBuckets(block, arguments, result);
|
|
|
|
else
|
2018-02-23 21:22:52 +00:00
|
|
|
throw Exception("The second argument of function " + getName() + " (number of buckets) must be constant", ErrorCodes::BAD_ARGUMENTS);
|
2018-02-26 01:27:33 +00:00
|
|
|
}
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
private:
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
using HashType = typename Impl::HashType;
|
|
|
|
using ResultType = typename Impl::ResultType;
|
|
|
|
using BucketsType = typename Impl::BucketsCountType;
|
|
|
|
static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
inline BucketsType checkBucketsRange(T buckets)
|
|
|
|
{
|
|
|
|
if (unlikely(buckets <= 0))
|
|
|
|
throw Exception("The second argument of function " + getName() + " (number of buckets) must be positive number",
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
|
|
|
if (unlikely(static_cast<UInt64>(buckets) > max_buckets))
|
|
|
|
throw Exception("The value of the second argument of function " + getName() + " (number of buckets) is not fit to " +
|
|
|
|
DataTypeNumber<BucketsType>().getName(), ErrorCodes::BAD_ARGUMENTS);
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
return static_cast<BucketsType>(buckets);
|
|
|
|
}
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
void executeConstBuckets(Block & block, const ColumnNumbers & arguments, size_t result)
|
|
|
|
{
|
|
|
|
Field buckets_field = (*block.getByPosition(arguments[1]).column)[0];
|
|
|
|
BucketsType num_buckets;
|
2018-02-23 21:22:52 +00:00
|
|
|
|
|
|
|
if (buckets_field.getType() == Field::Types::Int64)
|
2018-02-26 01:27:33 +00:00
|
|
|
num_buckets = checkBucketsRange(buckets_field.get<Int64>());
|
2018-02-23 21:22:52 +00:00
|
|
|
else if (buckets_field.getType() == Field::Types::UInt64)
|
2018-02-26 01:27:33 +00:00
|
|
|
num_buckets = checkBucketsRange(buckets_field.get<UInt64>());
|
2018-02-23 21:22:52 +00:00
|
|
|
else
|
|
|
|
throw Exception("Illegal type " + String(buckets_field.getTypeName()) + " of the second argument of function " + getName(),
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
const auto & hash_col = block.getByPosition(arguments[0]).column;
|
2018-02-23 21:22:52 +00:00
|
|
|
const IDataType * hash_type = block.getByPosition(arguments[0]).type.get();
|
2018-02-26 01:27:33 +00:00
|
|
|
auto res_col = ColumnVector<ResultType>::create();
|
|
|
|
|
|
|
|
if (checkDataType<DataTypeUInt8>(hash_type)) executeType<UInt8>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeUInt16>(hash_type)) executeType<UInt16>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeUInt32>(hash_type)) executeType<UInt32>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeUInt64>(hash_type)) executeType<UInt64>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeInt8>(hash_type)) executeType<Int8>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeInt16>(hash_type)) executeType<Int16>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeInt32>(hash_type)) executeType<Int32>(hash_col, num_buckets, res_col.get());
|
|
|
|
else if (checkDataType<DataTypeInt64>(hash_type)) executeType<Int64>(hash_col, num_buckets, res_col.get());
|
2018-02-23 21:22:52 +00:00
|
|
|
else
|
|
|
|
throw Exception("Illegal type " + hash_type->getName() + " of the first argument of function " + getName(),
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
block.getByPosition(result).column = std::move(res_col);
|
|
|
|
}
|
2018-02-23 21:22:52 +00:00
|
|
|
|
2018-02-26 01:27:33 +00:00
|
|
|
template <typename CurrentHashType>
|
|
|
|
void executeType(const ColumnPtr & col_hash_ptr, BucketsType num_buckets, ColumnVector<ResultType> * col_result)
|
2018-02-23 21:22:52 +00:00
|
|
|
{
|
2018-02-26 01:27:33 +00:00
|
|
|
auto col_hash = checkAndGetColumn<ColumnVector<CurrentHashType>>(col_hash_ptr.get());
|
2018-02-23 21:22:52 +00:00
|
|
|
if (!col_hash)
|
|
|
|
throw Exception("Illegal type of the first argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2018-02-26 01:27:33 +00:00
|
|
|
|
|
|
|
auto & vec_result = col_result->getData();
|
2018-02-23 21:22:52 +00:00
|
|
|
const auto & vec_hash = col_hash->getData();
|
|
|
|
|
|
|
|
size_t size = vec_hash.size();
|
|
|
|
vec_result.resize(size);
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
2018-02-26 01:27:33 +00:00
|
|
|
vec_result[i] = Impl::apply(static_cast<HashType>(vec_hash[i]), num_buckets);
|
2018-02-23 21:22:52 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
|
2018-02-26 01:27:33 +00:00
|
|
|
using FunctionJumpConsistentHash = FunctionConsistentHashImpl<JumpConsistentHashImpl>;
|
|
|
|
using FunctionSumburConsistentHash = FunctionConsistentHashImpl<SumburConsistentHashImpl>;
|
2018-02-23 21:22:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
}
|