mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
add hashid support
This commit is contained in:
parent
75fc471cfc
commit
c16ce7657e
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -262,3 +262,6 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/hashidsxx"]
|
||||
path = contrib/hashidsxx
|
||||
url = https://github.com/schoentoon/hashidsxx.git
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -139,6 +139,7 @@ add_contrib (libpq-cmake libpq)
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (hashidsxx-cmake hashidsxx)
|
||||
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_NLP)
|
||||
|
1
contrib/hashidsxx
vendored
Submodule
1
contrib/hashidsxx
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee
|
21
contrib/hashidsxx-cmake/CMakeLists.txt
Normal file
21
contrib/hashidsxx-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,21 @@
|
||||
option(ENABLE_HASHIDSXX "Enable hashidsxx" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_HASHIDSXX)
|
||||
message(STATUS "Not using hashidsxx")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
|
||||
|
||||
set (SRCS
|
||||
"${LIBRARY_DIR}/hashids.cpp"
|
||||
)
|
||||
|
||||
set (HDRS
|
||||
"${LIBRARY_DIR}/hashids.h"
|
||||
)
|
||||
|
||||
add_library(_hashidsxx ${SRCS} ${HDRS})
|
||||
target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
|
||||
|
||||
add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)
|
@ -96,6 +96,10 @@ if (TARGET ch_contrib::rapidjson)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::hashidsxx)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hashidsxx)
|
||||
endif()
|
||||
|
||||
add_subdirectory(GatherUtils)
|
||||
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)
|
||||
|
||||
|
13
src/Functions/FunctionHashID.cpp
Normal file
13
src/Functions/FunctionHashID.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include "FunctionHashID.h"
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionHashID(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionHashID>();
|
||||
}
|
||||
}
|
153
src/Functions/FunctionHashID.h
Normal file
153
src/Functions/FunctionHashID.h
Normal file
@ -0,0 +1,153 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
|
||||
#include <hashids.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
}
|
||||
|
||||
// hashid(string, salt)
|
||||
class FunctionHashID : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "hashid";
|
||||
|
||||
static FunctionPtr create(ContextPtr) {
|
||||
return std::make_shared<FunctionHashID>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.size() < 1)
|
||||
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName());
|
||||
|
||||
if (!isUnsignedInteger(arguments[0].type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument of function {} must be unsigned integer, got {}", getName(), arguments[0].type->getName());
|
||||
|
||||
if (arguments.size() > 1)
|
||||
{
|
||||
if (!isString(arguments[1].type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Second argument of function {} must be String, got {}",
|
||||
getName(), arguments[1].type->getName());
|
||||
}
|
||||
|
||||
if (arguments.size() > 2)
|
||||
{
|
||||
if (!isUInt8(arguments[2].type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Third argument of function {} must be UInt8, got {}",
|
||||
getName(), arguments[2].type->getName());
|
||||
}
|
||||
|
||||
if (arguments.size() > 3)
|
||||
{
|
||||
if (!isString(arguments[3].type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Fourth argument of function {} must be String, got {}",
|
||||
getName(), arguments[3].type->getName());
|
||||
}
|
||||
|
||||
if (arguments.size() > 4)
|
||||
{
|
||||
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}",
|
||||
getName(), arguments.size());
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const auto & numcolumn = arguments[0].column;
|
||||
|
||||
if (
|
||||
checkAndGetColumn<ColumnUInt8>(numcolumn.get())
|
||||
|| checkAndGetColumn<ColumnUInt16>(numcolumn.get())
|
||||
|| checkAndGetColumn<ColumnUInt32>(numcolumn.get())
|
||||
|| checkAndGetColumn<ColumnUInt64>(numcolumn.get())
|
||||
|| checkAndGetColumnConst<ColumnUInt8>(numcolumn.get())
|
||||
|| checkAndGetColumnConst<ColumnUInt16>(numcolumn.get())
|
||||
|| checkAndGetColumnConst<ColumnUInt32>(numcolumn.get())
|
||||
|| checkAndGetColumnConst<ColumnUInt64>(numcolumn.get())
|
||||
)
|
||||
{
|
||||
std::string salt;
|
||||
UInt8 minLength = 0;
|
||||
std::string alphabet(DEFAULT_ALPHABET);
|
||||
|
||||
if (arguments.size() >= 4)
|
||||
{
|
||||
const auto & alphabetcolumn = arguments[3].column;
|
||||
if (auto alpha_col = checkAndGetColumnConst<ColumnString>(alphabetcolumn.get()))
|
||||
alphabet = alpha_col->getValue<String>();
|
||||
}
|
||||
|
||||
if (arguments.size() >= 3)
|
||||
{
|
||||
const auto & minlengthcolumn = arguments[2].column;
|
||||
if (auto min_length_col = checkAndGetColumnConst<ColumnUInt8>(minlengthcolumn.get()))
|
||||
minLength = min_length_col->getValue<UInt8>();
|
||||
}
|
||||
|
||||
if (arguments.size() >= 2)
|
||||
{
|
||||
const auto & saltcolumn = arguments[1].column;
|
||||
if (auto salt_col = checkAndGetColumnConst<ColumnString>(saltcolumn.get()))
|
||||
salt = salt_col->getValue<String>();
|
||||
}
|
||||
|
||||
hashidsxx::Hashids hash(salt, minLength, alphabet);
|
||||
|
||||
auto col_res = ColumnString::create();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
std::string hashid = hash.encode({ numcolumn->getUInt(i) });
|
||||
col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1);
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + arguments[0].column->getName()
|
||||
+ " of first argument of function hashid",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -24,6 +24,7 @@ void registerFunctionsEmbeddedDictionaries(FunctionFactory &);
|
||||
void registerFunctionsExternalDictionaries(FunctionFactory &);
|
||||
void registerFunctionsExternalModels(FunctionFactory &);
|
||||
void registerFunctionsFormatting(FunctionFactory &);
|
||||
void registerFunctionHashID(FunctionFactory &);
|
||||
void registerFunctionsHashing(FunctionFactory &);
|
||||
void registerFunctionsHigherOrder(FunctionFactory &);
|
||||
void registerFunctionsLogical(FunctionFactory &);
|
||||
@ -90,6 +91,7 @@ void registerFunctions()
|
||||
registerFunctionsExternalDictionaries(factory);
|
||||
registerFunctionsExternalModels(factory);
|
||||
registerFunctionsFormatting(factory);
|
||||
registerFunctionHashID(factory);
|
||||
registerFunctionsHashing(factory);
|
||||
registerFunctionsHigherOrder(factory);
|
||||
registerFunctionsLogical(factory);
|
||||
|
@ -100,3 +100,6 @@ endif()
|
||||
if (TARGET ch_contrib::jemalloc)
|
||||
set(USE_JEMALLOC 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::hashidsxx)
|
||||
set(USE_HASHIDSXX 1)
|
||||
endif()
|
||||
|
5
tests/queries/0_stateless/02293_hashid.reference
Normal file
5
tests/queries/0_stateless/02293_hashid.reference
Normal file
@ -0,0 +1,5 @@
|
||||
0 gY
|
||||
1 jR
|
||||
2 k5
|
||||
3 l5
|
||||
4 mO
|
1
tests/queries/0_stateless/02293_hashid.sql
Normal file
1
tests/queries/0_stateless/02293_hashid.sql
Normal file
@ -0,0 +1 @@
|
||||
select number, hashid(number) from system.numbers limit 5;
|
@ -0,0 +1,5 @@
|
||||
0 pbgkmdljlpjoapne
|
||||
1 akemglnjepjpodba
|
||||
2 obmgndljgajpkeao
|
||||
3 dldokmpjpgjgeanb
|
||||
4 nkdlpgajngjnobme
|
1
tests/queries/0_stateless/02293_hashid_arguments.sql
Normal file
1
tests/queries/0_stateless/02293_hashid_arguments.sql
Normal file
@ -0,0 +1 @@
|
||||
select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5;
|
1
tests/queries/0_stateless/02293_hashid_const.reference
Normal file
1
tests/queries/0_stateless/02293_hashid_const.reference
Normal file
@ -0,0 +1 @@
|
||||
YQrvD5XGvbx
|
1
tests/queries/0_stateless/02293_hashid_const.sql
Normal file
1
tests/queries/0_stateless/02293_hashid_const.sql
Normal file
@ -0,0 +1 @@
|
||||
select hashid(1234567890123456, 's3cr3t');
|
Loading…
Reference in New Issue
Block a user