mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-22 09:40:49 +00:00
renamed anyHash64 to cityHash64, made them identical in unary case. [#METR-11774]
This commit is contained in:
parent
2e1e33da7e
commit
9c7ac77624
@ -36,12 +36,13 @@ namespace DB
|
||||
* Быстрая некриптографическая хэш функция для строк:
|
||||
* cityHash64: String -> UInt64
|
||||
*
|
||||
* Некриптографический хеш от кортежа значений любых типов (использует cityHash64 для строк и intHash64 для чисел):
|
||||
* cityHash64: any* -> UInt64
|
||||
*
|
||||
* Быстрая некриптографическая хэш функция от любого целого числа:
|
||||
* intHash32: number -> UInt32
|
||||
* intHash64: number -> UInt64
|
||||
*
|
||||
* Некриптографический хеш от кортежа значений любых типов (использует cityHash64 для строк и intHash64 для чисел):
|
||||
* anyHash64: any* -> UInt64
|
||||
*/
|
||||
|
||||
struct HalfMD5Impl
|
||||
@ -71,14 +72,6 @@ struct SipHash64Impl
|
||||
}
|
||||
};
|
||||
|
||||
struct CityHash64Impl
|
||||
{
|
||||
static UInt64 apply(const char * begin, size_t size)
|
||||
{
|
||||
return CityHash64(begin, size);
|
||||
}
|
||||
};
|
||||
|
||||
struct IntHash32Impl
|
||||
{
|
||||
typedef UInt32 ReturnType;
|
||||
@ -258,10 +251,10 @@ UInt64 toInteger<Float64>(Float64 x)
|
||||
}
|
||||
|
||||
|
||||
class FunctionAnyHash64 : public IFunction
|
||||
class FunctionCityHash64 : public IFunction
|
||||
{
|
||||
private:
|
||||
template <typename FromType>
|
||||
template <typename FromType, bool first>
|
||||
void executeIntType(const IColumn * column, ColumnUInt64::Container_t & vec_to)
|
||||
{
|
||||
if (const ColumnVector<FromType> * col_from = typeid_cast<const ColumnVector<FromType> *>(column))
|
||||
@ -269,14 +262,27 @@ private:
|
||||
const typename ColumnVector<FromType>::Container_t & vec_from = col_from->getData();
|
||||
size_t size = vec_from.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], IntHash64Impl::apply(toInteger(vec_from[i]))));
|
||||
{
|
||||
UInt64 h = IntHash64Impl::apply(toInteger(vec_from[i]));
|
||||
if (first)
|
||||
vec_to[i] = h;
|
||||
else
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], h));
|
||||
}
|
||||
}
|
||||
else if (const ColumnConst<FromType> * col_from = typeid_cast<const ColumnConst<FromType> *>(column))
|
||||
{
|
||||
UInt64 hash = IntHash64Impl::apply(toInteger(col_from->getData()));
|
||||
size_t size = vec_to.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], hash));
|
||||
if (first)
|
||||
{
|
||||
vec_to.assign(size, hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], hash));
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + column->getName()
|
||||
@ -284,6 +290,7 @@ private:
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeString(const IColumn * column, ColumnUInt64::Container_t & vec_to)
|
||||
{
|
||||
if (const ColumnString * col_from = typeid_cast<const ColumnString *>(column))
|
||||
@ -293,26 +300,45 @@ private:
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], CityHash64Impl::apply(
|
||||
{
|
||||
UInt64 h = CityHash64(
|
||||
reinterpret_cast<const char *>(&data[i == 0 ? 0 : offsets[i - 1]]),
|
||||
i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]))));
|
||||
i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]));
|
||||
if (first)
|
||||
vec_to[i] = h;
|
||||
else
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], h));
|
||||
}
|
||||
}
|
||||
else if (const ColumnFixedString * col_from = typeid_cast<const ColumnFixedString *>(column))
|
||||
{
|
||||
const typename ColumnString::Chars_t & data = col_from->getChars();
|
||||
size_t n = col_from->getN();
|
||||
size_t size = data.size() / n;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], CityHash64Impl::apply(
|
||||
reinterpret_cast<const char *>(&data[i * n]), n)));
|
||||
{
|
||||
UInt64 h = CityHash64(reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
if (first)
|
||||
vec_to[i] = h;
|
||||
else
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], h));
|
||||
}
|
||||
}
|
||||
else if (const ColumnConstString * col_from = typeid_cast<const ColumnConstString *>(column))
|
||||
{
|
||||
UInt64 hash = CityHash64Impl::apply(col_from->getData().data(), col_from->getData().size());
|
||||
UInt64 hash = CityHash64(col_from->getData().data(), col_from->getData().size());
|
||||
size_t size = vec_to.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], hash));
|
||||
if (first)
|
||||
{
|
||||
vec_to.assign(size, hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], hash));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + column->getName()
|
||||
@ -320,6 +346,7 @@ private:
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeArray(const IDataType * type, const IColumn * column, ColumnUInt64::Container_t & vec_to)
|
||||
{
|
||||
const IDataType * nested_type = &*typeid_cast<const DataTypeArray *>(type)->getNestedType();
|
||||
@ -330,8 +357,8 @@ private:
|
||||
const ColumnArray::Offsets_t & offsets = col_from->getOffsets();
|
||||
size_t nested_size = nested_column->size();
|
||||
|
||||
ColumnUInt64::Container_t vec_temp(nested_size, 0);
|
||||
executeAny(nested_type, nested_column, vec_temp);
|
||||
ColumnUInt64::Container_t vec_temp(nested_size);
|
||||
executeAny<true>(nested_type, nested_column, vec_temp);
|
||||
|
||||
size_t size = offsets.size();
|
||||
|
||||
@ -339,7 +366,13 @@ private:
|
||||
{
|
||||
size_t begin = i == 0 ? 0 : offsets[i - 1];
|
||||
size_t end = offsets[i];
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], IntHash64Impl::apply(end - begin)));
|
||||
|
||||
UInt64 h = IntHash64Impl::apply(end - begin);
|
||||
if (first)
|
||||
vec_to[i] = h;
|
||||
else
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], h));
|
||||
|
||||
for (size_t j = begin; j < end; ++j)
|
||||
vec_to[i] = Hash128to64(uint128(vec_to[i], vec_temp[j]));
|
||||
}
|
||||
@ -348,7 +381,7 @@ private:
|
||||
{
|
||||
/// NOTE: тут, конечно, можно обойтись без материалиации столбца.
|
||||
ColumnPtr full_column = col_from->convertToFullColumn();
|
||||
executeArray(type, &*full_column, vec_to);
|
||||
executeArray<first>(type, &*full_column, vec_to);
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + column->getName()
|
||||
@ -356,23 +389,24 @@ private:
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeAny(const IDataType * from_type, const IColumn * icolumn, ColumnUInt64::Container_t & vec_to)
|
||||
{
|
||||
if (typeid_cast<const DataTypeUInt8 * >(from_type)) executeIntType<UInt8 >(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeUInt16 * >(from_type)) executeIntType<UInt16>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeUInt32 * >(from_type)) executeIntType<UInt32>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeUInt64 * >(from_type)) executeIntType<UInt64>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt8 * >(from_type)) executeIntType<Int8 >(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt16 * >(from_type)) executeIntType<Int16 >(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt32 * >(from_type)) executeIntType<Int32 >(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt64 * >(from_type)) executeIntType<Int64 >(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeDate * >(from_type)) executeIntType<UInt16>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeDateTime * >(from_type)) executeIntType<UInt32>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeFloat32 * >(from_type)) executeIntType<Float32>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeFloat64 * >(from_type)) executeIntType<Float64>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeString * >(from_type)) executeString(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeFixedString * >(from_type)) executeString(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeArray * >(from_type)) executeArray(from_type, icolumn, vec_to);
|
||||
if (typeid_cast<const DataTypeUInt8 * >(from_type)) executeIntType<UInt8, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeUInt16 * >(from_type)) executeIntType<UInt16, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeUInt32 * >(from_type)) executeIntType<UInt32, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeUInt64 * >(from_type)) executeIntType<UInt64, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt8 * >(from_type)) executeIntType<Int8, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt16 * >(from_type)) executeIntType<Int16, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt32 * >(from_type)) executeIntType<Int32, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeInt64 * >(from_type)) executeIntType<Int64, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeDate * >(from_type)) executeIntType<UInt16, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeDateTime * >(from_type)) executeIntType<UInt32, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeFloat32 * >(from_type)) executeIntType<Float32, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeFloat64 * >(from_type)) executeIntType<Float64, first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeString * >(from_type)) executeString < first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeFixedString *>(from_type)) executeString < first>(icolumn, vec_to);
|
||||
else if (typeid_cast<const DataTypeArray * >(from_type)) executeArray < first>(from_type, icolumn, vec_to);
|
||||
else
|
||||
throw Exception("Unexpected type " + from_type->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -382,7 +416,7 @@ public:
|
||||
/// Получить имя функции.
|
||||
String getName() const
|
||||
{
|
||||
return "anyHash64";
|
||||
return "cityHash64";
|
||||
}
|
||||
|
||||
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
|
||||
@ -399,15 +433,23 @@ public:
|
||||
block.getByPosition(result).column = col_to;
|
||||
|
||||
ColumnUInt64::Container_t & vec_to = col_to->getData();
|
||||
vec_to.assign(rows, 0ul);
|
||||
|
||||
for (size_t index : arguments)
|
||||
if (arguments.empty())
|
||||
{
|
||||
const ColumnWithNameAndType & column = block.getByPosition(index);
|
||||
/// Случайное число из /dev/urandom используется как хеш пустого кортежа.
|
||||
vec_to.assign(rows, 0xe28dbde7fe22e41c);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
const ColumnWithNameAndType & column = block.getByPosition(arguments[i]);
|
||||
const IDataType * from_type = &*column.type;
|
||||
const IColumn * icolumn = &*column.column;
|
||||
|
||||
executeAny(from_type, icolumn, vec_to);
|
||||
if (i == 0)
|
||||
executeAny<true>(from_type, icolumn, vec_to);
|
||||
else
|
||||
executeAny<false>(from_type, icolumn, vec_to);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -421,7 +463,6 @@ struct NameIntHash64 { static const char * get() { return "intHash64"; } };
|
||||
|
||||
typedef FunctionStringHash64<HalfMD5Impl, NameHalfMD5> FunctionHalfMD5;
|
||||
typedef FunctionStringHash64<SipHash64Impl, NameSipHash64> FunctionSipHash64;
|
||||
typedef FunctionStringHash64<CityHash64Impl, NameCityHash64> FunctionCityHash64;
|
||||
typedef FunctionIntHash<IntHash32Impl, NameIntHash32> FunctionIntHash32;
|
||||
typedef FunctionIntHash<IntHash64Impl, NameIntHash64> FunctionIntHash64;
|
||||
|
||||
|
@ -149,7 +149,6 @@ FunctionPtr FunctionFactory::get(
|
||||
else if (name == "cityHash64") return new FunctionCityHash64;
|
||||
else if (name == "intHash32") return new FunctionIntHash32;
|
||||
else if (name == "intHash64") return new FunctionIntHash64;
|
||||
else if (name == "anyHash64") return new FunctionAnyHash64;
|
||||
|
||||
else if (name == "IPv4NumToString") return new FunctionIPv4NumToString;
|
||||
else if (name == "IPv4StringToNum") return new FunctionIPv4StringToNum;
|
||||
|
Loading…
Reference in New Issue
Block a user