mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Merge branch 'java-hash' of https://github.com/shangshujie365/ClickHouse into shangshujie365-java-hash
This commit is contained in:
commit
6a779661c8
@ -20,6 +20,8 @@ void registerFunctionsHashing(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionIntHash32>();
|
||||
factory.registerFunction<FunctionIntHash64>();
|
||||
factory.registerFunction<FunctionURLHash>();
|
||||
factory.registerFunction<FunctionJavaHash>();
|
||||
factory.registerFunction<FunctionHiveHash>();
|
||||
factory.registerFunction<FunctionMurmurHash2_32>();
|
||||
factory.registerFunction<FunctionMurmurHash2_64>();
|
||||
factory.registerFunction<FunctionMurmurHash3_32>();
|
||||
|
@ -956,6 +956,139 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452
|
||||
*/
|
||||
struct JavaHashImpl
|
||||
{
|
||||
static Int32 apply(const char * data, const size_t size)
|
||||
{
|
||||
Int32 h = 0;
|
||||
for (int i = 0; i < (int)size; ++i)
|
||||
{
|
||||
h = 31 * h + data[i];
|
||||
}
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* the java string hash implement,
|
||||
* many system from java world use this string hash function or based it
|
||||
*/
|
||||
class FunctionJavaHash : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "JavaHash";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJavaHash>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto arg_count = arguments.size();
|
||||
if (arg_count != 1)
|
||||
throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " +
|
||||
toString(arg_count) + ", should be 1.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
const auto first_arg = arguments.front().get();
|
||||
if (!WhichDataType(first_arg).isString())
|
||||
throw Exception{"Illegal type " + first_arg->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
return std::make_shared<DataTypeInt32>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
|
||||
{
|
||||
const auto arg_count = arguments.size();
|
||||
|
||||
if (arg_count == 1)
|
||||
{
|
||||
const auto col_untyped = block.getByPosition(arguments.front()).column.get();
|
||||
|
||||
if (const auto col_from = checkAndGetColumn<ColumnString>(col_untyped))
|
||||
{
|
||||
const auto size = col_from->size();
|
||||
auto col_to = ColumnInt32::create(size);
|
||||
|
||||
const auto & chars = col_from->getChars();
|
||||
const auto & offsets = col_from->getOffsets();
|
||||
auto & out = col_to->getData();
|
||||
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
out[i] = JavaHashImpl::apply(
|
||||
reinterpret_cast<const char *>(&chars[current_offset]),
|
||||
offsets[i] - current_offset - 1);
|
||||
|
||||
current_offset = offsets[i];
|
||||
}
|
||||
|
||||
block.getByPosition(result).column = std::move(col_to);
|
||||
}
|
||||
else
|
||||
throw Exception{"Illegal column " + block.getByPosition(arguments[0]).column->getName() +
|
||||
" of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
}
|
||||
else
|
||||
throw Exception{"got into IFunction::execute with unexpected number of arguments", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* this hive function works for hive-version < 3.0,
|
||||
* after 3.0, hive use murmur-hash3
|
||||
*/
|
||||
class FunctionHiveHash : public FunctionJavaHash
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "HiveHash";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionHiveHash>(); }
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
|
||||
{
|
||||
const auto arg_count = arguments.size();
|
||||
|
||||
if (arg_count == 1)
|
||||
{
|
||||
const auto col_untyped = block.getByPosition(arguments.front()).column.get();
|
||||
|
||||
if (const auto col_from = checkAndGetColumn<ColumnString>(col_untyped))
|
||||
{
|
||||
const auto size = col_from->size();
|
||||
auto col_to = ColumnInt32::create(size);
|
||||
|
||||
const auto & chars = col_from->getChars();
|
||||
const auto & offsets = col_from->getOffsets();
|
||||
auto & out = col_to->getData();
|
||||
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
out[i] = JavaHashImpl::apply(
|
||||
reinterpret_cast<const char *>(&chars[current_offset]),
|
||||
offsets[i] - current_offset - 1) & 0x7fffffff;
|
||||
|
||||
current_offset = offsets[i];
|
||||
}
|
||||
|
||||
block.getByPosition(result).column = std::move(col_to);
|
||||
}
|
||||
else
|
||||
throw Exception{"Illegal column " + block.getByPosition(arguments[0]).column->getName() +
|
||||
" of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
}
|
||||
else
|
||||
throw Exception{"got into IFunction::execute with unexpected number of arguments", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
};
|
||||
|
||||
struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
|
@ -0,0 +1,4 @@
|
||||
96354
|
||||
-676697544
|
||||
96354
|
||||
1470786104
|
@ -0,0 +1,4 @@
|
||||
select JavaHash('abc');
|
||||
select JavaHash('874293087');
|
||||
select HiveHash('abc');
|
||||
select HiveHash('874293087');
|
Loading…
Reference in New Issue
Block a user