mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
add hashid support
This commit is contained in:
parent
75fc471cfc
commit
c16ce7657e
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -262,3 +262,6 @@
|
|||||||
[submodule "contrib/minizip-ng"]
|
[submodule "contrib/minizip-ng"]
|
||||||
path = contrib/minizip-ng
|
path = contrib/minizip-ng
|
||||||
url = https://github.com/zlib-ng/minizip-ng
|
url = https://github.com/zlib-ng/minizip-ng
|
||||||
|
[submodule "contrib/hashidsxx"]
|
||||||
|
path = contrib/hashidsxx
|
||||||
|
url = https://github.com/schoentoon/hashidsxx.git
|
||||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -139,6 +139,7 @@ add_contrib (libpq-cmake libpq)
|
|||||||
add_contrib (nuraft-cmake NuRaft)
|
add_contrib (nuraft-cmake NuRaft)
|
||||||
add_contrib (fast_float-cmake fast_float)
|
add_contrib (fast_float-cmake fast_float)
|
||||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||||
|
add_contrib (hashidsxx-cmake hashidsxx)
|
||||||
|
|
||||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||||
if (ENABLE_NLP)
|
if (ENABLE_NLP)
|
||||||
|
1
contrib/hashidsxx
vendored
Submodule
1
contrib/hashidsxx
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee
|
21
contrib/hashidsxx-cmake/CMakeLists.txt
Normal file
21
contrib/hashidsxx-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
option(ENABLE_HASHIDSXX "Enable hashidsxx" ${ENABLE_LIBRARIES})
|
||||||
|
|
||||||
|
if (NOT ENABLE_HASHIDSXX)
|
||||||
|
message(STATUS "Not using hashidsxx")
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
|
||||||
|
|
||||||
|
set (SRCS
|
||||||
|
"${LIBRARY_DIR}/hashids.cpp"
|
||||||
|
)
|
||||||
|
|
||||||
|
set (HDRS
|
||||||
|
"${LIBRARY_DIR}/hashids.h"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(_hashidsxx ${SRCS} ${HDRS})
|
||||||
|
target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
|
||||||
|
|
||||||
|
add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)
|
@ -96,6 +96,10 @@ if (TARGET ch_contrib::rapidjson)
|
|||||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
|
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (TARGET ch_contrib::hashidsxx)
|
||||||
|
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hashidsxx)
|
||||||
|
endif()
|
||||||
|
|
||||||
add_subdirectory(GatherUtils)
|
add_subdirectory(GatherUtils)
|
||||||
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)
|
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)
|
||||||
|
|
||||||
|
13
src/Functions/FunctionHashID.cpp
Normal file
13
src/Functions/FunctionHashID.cpp
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include "FunctionHashID.h"
|
||||||
|
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
void registerFunctionHashID(FunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionHashID>();
|
||||||
|
}
|
||||||
|
}
|
153
src/Functions/FunctionHashID.h
Normal file
153
src/Functions/FunctionHashID.h
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Common/config.h>
|
||||||
|
|
||||||
|
#include <hashids.h>
|
||||||
|
|
||||||
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <Columns/ColumnsNumber.h>
|
||||||
|
#include <Columns/ColumnString.h>
|
||||||
|
#include <Functions/IFunction.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <Functions/FunctionHelpers.h>
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <initializer_list>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ILLEGAL_COLUMN;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
|
||||||
|
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
// hashid(string, salt)
|
||||||
|
class FunctionHashID : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "hashid";
|
||||||
|
|
||||||
|
static FunctionPtr create(ContextPtr) {
|
||||||
|
return std::make_shared<FunctionHashID>();
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override
|
||||||
|
{
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getNumberOfArguments() const override { return 0; }
|
||||||
|
|
||||||
|
bool isVariadic() const override { return true; }
|
||||||
|
|
||||||
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||||
|
{
|
||||||
|
if (arguments.size() < 1)
|
||||||
|
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName());
|
||||||
|
|
||||||
|
if (!isUnsignedInteger(arguments[0].type))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
|
"First argument of function {} must be unsigned integer, got {}", getName(), arguments[0].type->getName());
|
||||||
|
|
||||||
|
if (arguments.size() > 1)
|
||||||
|
{
|
||||||
|
if (!isString(arguments[1].type))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
|
"Second argument of function {} must be String, got {}",
|
||||||
|
getName(), arguments[1].type->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arguments.size() > 2)
|
||||||
|
{
|
||||||
|
if (!isUInt8(arguments[2].type))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
|
"Third argument of function {} must be UInt8, got {}",
|
||||||
|
getName(), arguments[2].type->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arguments.size() > 3)
|
||||||
|
{
|
||||||
|
if (!isString(arguments[3].type))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
|
"Fourth argument of function {} must be String, got {}",
|
||||||
|
getName(), arguments[3].type->getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arguments.size() > 4)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
|
||||||
|
"Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}",
|
||||||
|
getName(), arguments.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_shared<DataTypeString>();
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||||
|
{
|
||||||
|
const auto & numcolumn = arguments[0].column;
|
||||||
|
|
||||||
|
if (
|
||||||
|
checkAndGetColumn<ColumnUInt8>(numcolumn.get())
|
||||||
|
|| checkAndGetColumn<ColumnUInt16>(numcolumn.get())
|
||||||
|
|| checkAndGetColumn<ColumnUInt32>(numcolumn.get())
|
||||||
|
|| checkAndGetColumn<ColumnUInt64>(numcolumn.get())
|
||||||
|
|| checkAndGetColumnConst<ColumnUInt8>(numcolumn.get())
|
||||||
|
|| checkAndGetColumnConst<ColumnUInt16>(numcolumn.get())
|
||||||
|
|| checkAndGetColumnConst<ColumnUInt32>(numcolumn.get())
|
||||||
|
|| checkAndGetColumnConst<ColumnUInt64>(numcolumn.get())
|
||||||
|
)
|
||||||
|
{
|
||||||
|
std::string salt;
|
||||||
|
UInt8 minLength = 0;
|
||||||
|
std::string alphabet(DEFAULT_ALPHABET);
|
||||||
|
|
||||||
|
if (arguments.size() >= 4)
|
||||||
|
{
|
||||||
|
const auto & alphabetcolumn = arguments[3].column;
|
||||||
|
if (auto alpha_col = checkAndGetColumnConst<ColumnString>(alphabetcolumn.get()))
|
||||||
|
alphabet = alpha_col->getValue<String>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arguments.size() >= 3)
|
||||||
|
{
|
||||||
|
const auto & minlengthcolumn = arguments[2].column;
|
||||||
|
if (auto min_length_col = checkAndGetColumnConst<ColumnUInt8>(minlengthcolumn.get()))
|
||||||
|
minLength = min_length_col->getValue<UInt8>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arguments.size() >= 2)
|
||||||
|
{
|
||||||
|
const auto & saltcolumn = arguments[1].column;
|
||||||
|
if (auto salt_col = checkAndGetColumnConst<ColumnString>(saltcolumn.get()))
|
||||||
|
salt = salt_col->getValue<String>();
|
||||||
|
}
|
||||||
|
|
||||||
|
hashidsxx::Hashids hash(salt, minLength, alphabet);
|
||||||
|
|
||||||
|
auto col_res = ColumnString::create();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
|
{
|
||||||
|
std::string hashid = hash.encode({ numcolumn->getUInt(i) });
|
||||||
|
col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return col_res;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw Exception("Illegal column " + arguments[0].column->getName()
|
||||||
|
+ " of first argument of function hashid",
|
||||||
|
ErrorCodes::ILLEGAL_COLUMN);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -24,6 +24,7 @@ void registerFunctionsEmbeddedDictionaries(FunctionFactory &);
|
|||||||
void registerFunctionsExternalDictionaries(FunctionFactory &);
|
void registerFunctionsExternalDictionaries(FunctionFactory &);
|
||||||
void registerFunctionsExternalModels(FunctionFactory &);
|
void registerFunctionsExternalModels(FunctionFactory &);
|
||||||
void registerFunctionsFormatting(FunctionFactory &);
|
void registerFunctionsFormatting(FunctionFactory &);
|
||||||
|
void registerFunctionHashID(FunctionFactory &);
|
||||||
void registerFunctionsHashing(FunctionFactory &);
|
void registerFunctionsHashing(FunctionFactory &);
|
||||||
void registerFunctionsHigherOrder(FunctionFactory &);
|
void registerFunctionsHigherOrder(FunctionFactory &);
|
||||||
void registerFunctionsLogical(FunctionFactory &);
|
void registerFunctionsLogical(FunctionFactory &);
|
||||||
@ -90,6 +91,7 @@ void registerFunctions()
|
|||||||
registerFunctionsExternalDictionaries(factory);
|
registerFunctionsExternalDictionaries(factory);
|
||||||
registerFunctionsExternalModels(factory);
|
registerFunctionsExternalModels(factory);
|
||||||
registerFunctionsFormatting(factory);
|
registerFunctionsFormatting(factory);
|
||||||
|
registerFunctionHashID(factory);
|
||||||
registerFunctionsHashing(factory);
|
registerFunctionsHashing(factory);
|
||||||
registerFunctionsHigherOrder(factory);
|
registerFunctionsHigherOrder(factory);
|
||||||
registerFunctionsLogical(factory);
|
registerFunctionsLogical(factory);
|
||||||
|
@ -100,3 +100,6 @@ endif()
|
|||||||
if (TARGET ch_contrib::jemalloc)
|
if (TARGET ch_contrib::jemalloc)
|
||||||
set(USE_JEMALLOC 1)
|
set(USE_JEMALLOC 1)
|
||||||
endif()
|
endif()
|
||||||
|
if (TARGET ch_contrib::hashidsxx)
|
||||||
|
set(USE_HASHIDSXX 1)
|
||||||
|
endif()
|
||||||
|
5
tests/queries/0_stateless/02293_hashid.reference
Normal file
5
tests/queries/0_stateless/02293_hashid.reference
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
0 gY
|
||||||
|
1 jR
|
||||||
|
2 k5
|
||||||
|
3 l5
|
||||||
|
4 mO
|
1
tests/queries/0_stateless/02293_hashid.sql
Normal file
1
tests/queries/0_stateless/02293_hashid.sql
Normal file
@ -0,0 +1 @@
|
|||||||
|
select number, hashid(number) from system.numbers limit 5;
|
@ -0,0 +1,5 @@
|
|||||||
|
0 pbgkmdljlpjoapne
|
||||||
|
1 akemglnjepjpodba
|
||||||
|
2 obmgndljgajpkeao
|
||||||
|
3 dldokmpjpgjgeanb
|
||||||
|
4 nkdlpgajngjnobme
|
1
tests/queries/0_stateless/02293_hashid_arguments.sql
Normal file
1
tests/queries/0_stateless/02293_hashid_arguments.sql
Normal file
@ -0,0 +1 @@
|
|||||||
|
select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5;
|
1
tests/queries/0_stateless/02293_hashid_const.reference
Normal file
1
tests/queries/0_stateless/02293_hashid_const.reference
Normal file
@ -0,0 +1 @@
|
|||||||
|
YQrvD5XGvbx
|
1
tests/queries/0_stateless/02293_hashid_const.sql
Normal file
1
tests/queries/0_stateless/02293_hashid_const.sql
Normal file
@ -0,0 +1 @@
|
|||||||
|
select hashid(1234567890123456, 's3cr3t');
|
Loading…
Reference in New Issue
Block a user