diff --git a/.gitmodules b/.gitmodules index af90c788012..53ef899dd99 100644 --- a/.gitmodules +++ b/.gitmodules @@ -357,3 +357,6 @@ [submodule "contrib/pocketfft"] path = contrib/pocketfft url = https://github.com/mreineck/pocketfft.git +[submodule "contrib/sqids-cpp"] + path = contrib/sqids-cpp + url = https://github.com/sqids/sqids-cpp.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index a8f0705df88..3d3b9b72faf 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -156,6 +156,7 @@ add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (incbin-cmake incbin) +add_contrib (sqids-cpp-cmake sqids-cpp) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/sqids-cpp b/contrib/sqids-cpp new file mode 160000 index 00000000000..3756e537d4d --- /dev/null +++ b/contrib/sqids-cpp @@ -0,0 +1 @@ +Subproject commit 3756e537d4d48cc0dd4176801fe19f99601439b0 diff --git a/contrib/sqids-cpp-cmake/CMakeLists.txt b/contrib/sqids-cpp-cmake/CMakeLists.txt new file mode 100644 index 00000000000..7eb77c92437 --- /dev/null +++ b/contrib/sqids-cpp-cmake/CMakeLists.txt @@ -0,0 +1,14 @@ +option(ENABLE_SQIDS "Enable sqids support" ${ENABLE_LIBRARIES}) +if ((NOT ENABLE_SQIDS)) + message (STATUS "Not using sqids") + return() +endif() + +set (SQIDS_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqids-cpp") +set (SQIDS_INCLUDE_DIR "${SQIDS_SOURCE_DIR}/include") + +add_library(_sqids INTERFACE) +target_include_directories(_sqids SYSTEM INTERFACE ${SQIDS_INCLUDE_DIR}) + +add_library(ch_contrib::sqids ALIAS _sqids) +target_compile_definitions(_sqids INTERFACE ENABLE_SQIDS) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 7276437ec82..4816a6f0032 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1776,3 +1776,34 @@ Result: │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ ``` + +## sqid + +Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/). +To use this function, set setting `allow_experimental_hash_functions = 1`. + +**Syntax** + +```sql +sqid(number1,...) +``` + +**Arguments** + +- A variable number of UInt8, UInt16, UInt32 or UInt64 numbers. + +**Returned Value** + +A hash id [String](/docs/en/sql-reference/data-types/string.md). + +**Example** + +```sql +SELECT sqid(1, 2, 3, 4, 5); +``` + +```response +┌─sqid(1, 2, 3, 4, 5)─┐ +│ gXHfJ1C6dN │ +└─────────────────────┘ +``` diff --git a/src/Common/config.h.in b/src/Common/config.h.in index ea77e2fed2d..f84e28942c5 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -27,6 +27,7 @@ #cmakedefine01 USE_H3 #cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_FASTOPS +#cmakedefine01 USE_SQIDS #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN #cmakedefine01 USE_LIBURING diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 4b0d9335198..89676594581 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -79,6 +79,10 @@ if (ENABLE_NLP) list (APPEND PRIVATE_LIBS ch_contrib::cld2) endif() +if (TARGET ch_contrib::sqids) + list (APPEND PRIVATE_LIBS ch_contrib::sqids) +endif() + if (TARGET ch_contrib::h3) list (APPEND PRIVATE_LIBS ch_contrib::h3) endif() diff --git a/src/Functions/FunctionSqid.cpp b/src/Functions/FunctionSqid.cpp new file mode 100644 index 00000000000..24868d9a13e --- /dev/null +++ b/src/Functions/FunctionSqid.cpp @@ -0,0 +1,97 @@ +#include "config.h" + +#ifdef ENABLE_SQIDS + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int SUPPORT_IS_DISABLED; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +// sqid(number1, ...) +class FunctionSqid : public IFunction +{ +public: + static constexpr auto name = "sqid"; + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + static FunctionPtr create(ContextPtr context) + { + if (!context->getSettingsRef().allow_experimental_hash_functions) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", + name); + + return std::make_shared(); + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); + + for (size_t i = 0; i < arguments.size(); ++i) + { + if (!checkDataTypes< + DataTypeUInt8, + DataTypeUInt16, + DataTypeUInt32, + DataTypeUInt64>(arguments[i].get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must have datatype UInt*, given type: {}.", + i, getName(), arguments[i]->getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + size_t num_args = arguments.size(); + auto col_res = ColumnString::create(); + + sqidscxx::Sqids<> sqids; + std::vector numbers(num_args); + for (size_t i = 0; i < input_rows_count; ++i) + { + for (size_t j = 0; j < num_args; ++j) + { + const ColumnWithTypeAndName & arg = arguments[j]; + ColumnPtr current_column = arg.column; + numbers[j] = current_column->getUInt(i); + } + auto id = sqids.encode(numbers); + col_res->insert(id); + } + return col_res; + } +}; + +REGISTER_FUNCTION(Sqid) +{ + factory.registerFunction(); +} +} + +#endif diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 2980fde4851..c3c6d9be6da 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -128,6 +128,9 @@ endif() if (TARGET ch_contrib::fastops) set(USE_FASTOPS 1) endif() +if (TARGET ch_contrib::sqids) + set(USE_SQIDS 1) +endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql index 148ad303bd4..729a0b3995c 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql @@ -2,7 +2,7 @@ -- Please help shorten this list down to zero elements. SELECT name FROM system.functions WHERE NOT is_aggregate AND origin = 'System' AND alias_to = '' AND length(description) < 10 AND name NOT IN ( - 'MD4', 'MD5', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'halfMD5', + 'MD4', 'MD5', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'halfMD5', 'sqid', 'aes_decrypt_mysql', 'aes_encrypt_mysql', 'decrypt', 'encrypt', 'base64Decode', 'base64Encode', 'tryBase64Decode', 'convertCharset', diff --git a/tests/queries/0_stateless/02933_sqid.reference b/tests/queries/0_stateless/02933_sqid.reference new file mode 100644 index 00000000000..4506cc4d01a --- /dev/null +++ b/tests/queries/0_stateless/02933_sqid.reference @@ -0,0 +1,13 @@ +-- negative tests +-- const UInt* +Uk +XMbT +86Rf07 +Td1EnWQo +XMbT +-- non-const UInt* +Uk +XMbT +86Rf07 +Td1EnWQo +XMbT diff --git a/tests/queries/0_stateless/02933_sqid.sql b/tests/queries/0_stateless/02933_sqid.sql new file mode 100644 index 00000000000..db8b2f29ee8 --- /dev/null +++ b/tests/queries/0_stateless/02933_sqid.sql @@ -0,0 +1,22 @@ +-- Tags: no-fasttest + +SET allow_experimental_hash_functions = 1; +SET allow_suspicious_low_cardinality_types = 1; + +SELECT '-- negative tests'; +SELECT sqid(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sqid('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT '-- const UInt*'; +SELECT sqid(1); +SELECT sqid(1, 2); +SELECT sqid(1, 2, 3); +SELECT sqid(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64); +SELECT sqid(toNullable(1), toLowCardinality(2)); + +SELECT '-- non-const UInt*'; +SELECT sqid(materialize(1)); +SELECT sqid(materialize(1), materialize(2)); +SELECT sqid(materialize(1), materialize(2), materialize(3)); +SELECT sqid(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64)); +SELECT sqid(toNullable(materialize(1)), toLowCardinality(materialize(2))); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 9f7eed82dde..becd6271b95 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1791,6 +1791,7 @@ logTrace logagent loghouse london +lookups lowcardinality lowerUTF lowercased @@ -2277,6 +2278,7 @@ splitByRegexp splitByString splitByWhitespace splitby +sqid sql sqlalchemy sqlinsert