Revert "Revert "Add sqid() function""

This reverts commit 8d5d0ef38f.
This commit is contained in:
Robert Schulze 2023-12-05 10:50:30 +00:00
parent 350baef6e3
commit a2cd240317
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
13 changed files with 193 additions and 1 deletions

3
.gitmodules vendored
View File

@ -357,3 +357,6 @@
[submodule "contrib/pocketfft"]
path = contrib/pocketfft
url = https://github.com/mreineck/pocketfft.git
[submodule "contrib/sqids-cpp"]
path = contrib/sqids-cpp
url = https://github.com/sqids/sqids-cpp.git

View File

@ -156,6 +156,7 @@ add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
add_contrib (sqids-cpp-cmake sqids-cpp)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)

1
contrib/sqids-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit 3756e537d4d48cc0dd4176801fe19f99601439b0

View File

@ -0,0 +1,14 @@
option(ENABLE_SQIDS "Enable sqids support" ${ENABLE_LIBRARIES})
if ((NOT ENABLE_SQIDS))
message (STATUS "Not using sqids")
return()
endif()
set (SQIDS_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqids-cpp")
set (SQIDS_INCLUDE_DIR "${SQIDS_SOURCE_DIR}/include")
add_library(_sqids INTERFACE)
target_include_directories(_sqids SYSTEM INTERFACE ${SQIDS_INCLUDE_DIR})
add_library(ch_contrib::sqids ALIAS _sqids)
target_compile_definitions(_sqids INTERFACE ENABLE_SQIDS)

View File

@ -1776,3 +1776,34 @@ Result:
│ (('queries','database','analytical'),('oriented','processing','DBMS')) │
└────────────────────────────────────────────────────────────────────────┘
```
## sqid
Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/).
To use this function, set setting `allow_experimental_hash_functions = 1`.
**Syntax**
```sql
sqid(number1,...)
```
**Arguments**
- A variable number of UInt8, UInt16, UInt32 or UInt64 numbers.
**Returned Value**
A hash id [String](/docs/en/sql-reference/data-types/string.md).
**Example**
```sql
SELECT sqid(1, 2, 3, 4, 5);
```
```response
┌─sqid(1, 2, 3, 4, 5)─┐
│ gXHfJ1C6dN │
└─────────────────────┘
```

View File

@ -27,6 +27,7 @@
#cmakedefine01 USE_H3
#cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_SQIDS
#cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN
#cmakedefine01 USE_LIBURING

View File

@ -79,6 +79,10 @@ if (ENABLE_NLP)
list (APPEND PRIVATE_LIBS ch_contrib::cld2)
endif()
if (TARGET ch_contrib::sqids)
list (APPEND PRIVATE_LIBS ch_contrib::sqids)
endif()
if (TARGET ch_contrib::h3)
list (APPEND PRIVATE_LIBS ch_contrib::h3)
endif()

View File

@ -0,0 +1,97 @@
#include "config.h"
#ifdef ENABLE_SQIDS
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <sqids/sqids.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SUPPORT_IS_DISABLED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
// sqid(number1, ...)
class FunctionSqid : public IFunction
{
public:
static constexpr auto name = "sqid";
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettingsRef().allow_experimental_hash_functions)
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it",
name);
return std::make_shared<FunctionSqid>();
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName());
for (size_t i = 0; i < arguments.size(); ++i)
{
if (!checkDataTypes<
DataTypeUInt8,
DataTypeUInt16,
DataTypeUInt32,
DataTypeUInt64>(arguments[i].get()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument {} for function {} must have datatype UInt*, given type: {}.",
i, getName(), arguments[i]->getName());
}
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t num_args = arguments.size();
auto col_res = ColumnString::create();
sqidscxx::Sqids<> sqids;
std::vector<UInt64> numbers(num_args);
for (size_t i = 0; i < input_rows_count; ++i)
{
for (size_t j = 0; j < num_args; ++j)
{
const ColumnWithTypeAndName & arg = arguments[j];
ColumnPtr current_column = arg.column;
numbers[j] = current_column->getUInt(i);
}
auto id = sqids.encode(numbers);
col_res->insert(id);
}
return col_res;
}
};
REGISTER_FUNCTION(Sqid)
{
factory.registerFunction<FunctionSqid>();
}
}
#endif

View File

@ -128,6 +128,9 @@ endif()
if (TARGET ch_contrib::fastops)
set(USE_FASTOPS 1)
endif()
if (TARGET ch_contrib::sqids)
set(USE_SQIDS 1)
endif()
if (TARGET ch_contrib::vectorscan)
set(USE_VECTORSCAN 1)
endif()

View File

@ -2,7 +2,7 @@
-- Please help shorten this list down to zero elements.
SELECT name FROM system.functions WHERE NOT is_aggregate AND origin = 'System' AND alias_to = '' AND length(description) < 10
AND name NOT IN (
'MD4', 'MD5', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'halfMD5',
'MD4', 'MD5', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'halfMD5', 'sqid',
'aes_decrypt_mysql', 'aes_encrypt_mysql', 'decrypt', 'encrypt',
'base64Decode', 'base64Encode', 'tryBase64Decode',
'convertCharset',

View File

@ -0,0 +1,13 @@
-- negative tests
-- const UInt*
Uk
XMbT
86Rf07
Td1EnWQo
XMbT
-- non-const UInt*
Uk
XMbT
86Rf07
Td1EnWQo
XMbT

View File

@ -0,0 +1,22 @@
-- Tags: no-fasttest
SET allow_experimental_hash_functions = 1;
SET allow_suspicious_low_cardinality_types = 1;
SELECT '-- negative tests';
SELECT sqid(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT sqid('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT '-- const UInt*';
SELECT sqid(1);
SELECT sqid(1, 2);
SELECT sqid(1, 2, 3);
SELECT sqid(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64);
SELECT sqid(toNullable(1), toLowCardinality(2));
SELECT '-- non-const UInt*';
SELECT sqid(materialize(1));
SELECT sqid(materialize(1), materialize(2));
SELECT sqid(materialize(1), materialize(2), materialize(3));
SELECT sqid(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64));
SELECT sqid(toNullable(materialize(1)), toLowCardinality(materialize(2)));

View File

@ -1791,6 +1791,7 @@ logTrace
logagent
loghouse
london
lookups
lowcardinality
lowerUTF
lowercased
@ -2277,6 +2278,7 @@ splitByRegexp
splitByString
splitByWhitespace
splitby
sqid
sql
sqlalchemy
sqlinsert