add hashid support

This commit is contained in:
Michael Nutt 2022-05-06 21:25:20 -04:00
parent 75fc471cfc
commit c16ce7657e
15 changed files with 215 additions and 0 deletions

3
.gitmodules vendored
View File

@ -262,3 +262,6 @@
[submodule "contrib/minizip-ng"] [submodule "contrib/minizip-ng"]
path = contrib/minizip-ng path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/hashidsxx"]
path = contrib/hashidsxx
url = https://github.com/schoentoon/hashidsxx.git

View File

@ -139,6 +139,7 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft) add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float) add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (hashidsxx-cmake hashidsxx)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP) if (ENABLE_NLP)

1
contrib/hashidsxx vendored Submodule

@ -0,0 +1 @@
Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee

View File

@ -0,0 +1,21 @@
option(ENABLE_HASHIDSXX "Enable hashidsxx" ${ENABLE_LIBRARIES})
if (NOT ENABLE_HASHIDSXX)
message(STATUS "Not using hashidsxx")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
set (SRCS
"${LIBRARY_DIR}/hashids.cpp"
)
set (HDRS
"${LIBRARY_DIR}/hashids.h"
)
add_library(_hashidsxx ${SRCS} ${HDRS})
target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)

View File

@ -96,6 +96,10 @@ if (TARGET ch_contrib::rapidjson)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
endif() endif()
if (TARGET ch_contrib::hashidsxx)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hashidsxx)
endif()
add_subdirectory(GatherUtils) add_subdirectory(GatherUtils)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)

View File

@ -0,0 +1,13 @@
#include "FunctionHashID.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerFunctionHashID(FunctionFactory & factory)
{
factory.registerFunction<FunctionHashID>();
}
}

View File

@ -0,0 +1,153 @@
#pragma once
#include <Common/config.h>
#include <hashids.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <functional>
#include <initializer_list>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
}
// hashid(string, salt)
class FunctionHashID : public IFunction
{
public:
static constexpr auto name = "hashid";
static FunctionPtr create(ContextPtr) {
return std::make_shared<FunctionHashID>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() < 1)
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName());
if (!isUnsignedInteger(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument of function {} must be unsigned integer, got {}", getName(), arguments[0].type->getName());
if (arguments.size() > 1)
{
if (!isString(arguments[1].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument of function {} must be String, got {}",
getName(), arguments[1].type->getName());
}
if (arguments.size() > 2)
{
if (!isUInt8(arguments[2].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument of function {} must be UInt8, got {}",
getName(), arguments[2].type->getName());
}
if (arguments.size() > 3)
{
if (!isString(arguments[3].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Fourth argument of function {} must be String, got {}",
getName(), arguments[3].type->getName());
}
if (arguments.size() > 4)
{
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Function {} expect no more than three arguments (integer, salt, optional_alphabet), got {}",
getName(), arguments.size());
}
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & numcolumn = arguments[0].column;
if (
checkAndGetColumn<ColumnUInt8>(numcolumn.get())
|| checkAndGetColumn<ColumnUInt16>(numcolumn.get())
|| checkAndGetColumn<ColumnUInt32>(numcolumn.get())
|| checkAndGetColumn<ColumnUInt64>(numcolumn.get())
|| checkAndGetColumnConst<ColumnUInt8>(numcolumn.get())
|| checkAndGetColumnConst<ColumnUInt16>(numcolumn.get())
|| checkAndGetColumnConst<ColumnUInt32>(numcolumn.get())
|| checkAndGetColumnConst<ColumnUInt64>(numcolumn.get())
)
{
std::string salt;
UInt8 minLength = 0;
std::string alphabet(DEFAULT_ALPHABET);
if (arguments.size() >= 4)
{
const auto & alphabetcolumn = arguments[3].column;
if (auto alpha_col = checkAndGetColumnConst<ColumnString>(alphabetcolumn.get()))
alphabet = alpha_col->getValue<String>();
}
if (arguments.size() >= 3)
{
const auto & minlengthcolumn = arguments[2].column;
if (auto min_length_col = checkAndGetColumnConst<ColumnUInt8>(minlengthcolumn.get()))
minLength = min_length_col->getValue<UInt8>();
}
if (arguments.size() >= 2)
{
const auto & saltcolumn = arguments[1].column;
if (auto salt_col = checkAndGetColumnConst<ColumnString>(saltcolumn.get()))
salt = salt_col->getValue<String>();
}
hashidsxx::Hashids hash(salt, minLength, alphabet);
auto col_res = ColumnString::create();
for (size_t i = 0; i < input_rows_count; ++i)
{
std::string hashid = hash.encode({ numcolumn->getUInt(i) });
col_res->insertDataWithTerminatingZero(hashid.data(), hashid.size() + 1);
}
return col_res;
}
else
throw Exception("Illegal column " + arguments[0].column->getName()
+ " of first argument of function hashid",
ErrorCodes::ILLEGAL_COLUMN);
}
};
}

View File

@ -24,6 +24,7 @@ void registerFunctionsEmbeddedDictionaries(FunctionFactory &);
void registerFunctionsExternalDictionaries(FunctionFactory &); void registerFunctionsExternalDictionaries(FunctionFactory &);
void registerFunctionsExternalModels(FunctionFactory &); void registerFunctionsExternalModels(FunctionFactory &);
void registerFunctionsFormatting(FunctionFactory &); void registerFunctionsFormatting(FunctionFactory &);
void registerFunctionHashID(FunctionFactory &);
void registerFunctionsHashing(FunctionFactory &); void registerFunctionsHashing(FunctionFactory &);
void registerFunctionsHigherOrder(FunctionFactory &); void registerFunctionsHigherOrder(FunctionFactory &);
void registerFunctionsLogical(FunctionFactory &); void registerFunctionsLogical(FunctionFactory &);
@ -90,6 +91,7 @@ void registerFunctions()
registerFunctionsExternalDictionaries(factory); registerFunctionsExternalDictionaries(factory);
registerFunctionsExternalModels(factory); registerFunctionsExternalModels(factory);
registerFunctionsFormatting(factory); registerFunctionsFormatting(factory);
registerFunctionHashID(factory);
registerFunctionsHashing(factory); registerFunctionsHashing(factory);
registerFunctionsHigherOrder(factory); registerFunctionsHigherOrder(factory);
registerFunctionsLogical(factory); registerFunctionsLogical(factory);

View File

@ -100,3 +100,6 @@ endif()
if (TARGET ch_contrib::jemalloc) if (TARGET ch_contrib::jemalloc)
set(USE_JEMALLOC 1) set(USE_JEMALLOC 1)
endif() endif()
if (TARGET ch_contrib::hashidsxx)
set(USE_HASHIDSXX 1)
endif()

View File

@ -0,0 +1,5 @@
0 gY
1 jR
2 k5
3 l5
4 mO

View File

@ -0,0 +1 @@
select number, hashid(number) from system.numbers limit 5;

View File

@ -0,0 +1,5 @@
0 pbgkmdljlpjoapne
1 akemglnjepjpodba
2 obmgndljgajpkeao
3 dldokmpjpgjgeanb
4 nkdlpgajngjnobme

View File

@ -0,0 +1 @@
select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5;

View File

@ -0,0 +1 @@
YQrvD5XGvbx

View File

@ -0,0 +1 @@
select hashid(1234567890123456, 's3cr3t');