From 838acb22b70633f9e65d64bd9d6a14287b08d8e6 Mon Sep 17 00:00:00 2001 From: Aleksandr Musorin Date: Sun, 23 Oct 2022 21:50:30 +0200 Subject: [PATCH 01/42] added num_processed_files and processed_files_size --- src/Backups/BackupImpl.cpp | 30 ++++ src/Backups/BackupImpl.h | 12 +- src/Backups/BackupsWorker.cpp | 19 ++- src/Backups/BackupsWorker.h | 10 +- src/Backups/IBackup.h | 6 + src/Storages/System/StorageSystemBackups.cpp | 6 + .../test_backup_restore_new/test.py | 128 +++++++++++++++++- 7 files changed, 201 insertions(+), 10 deletions(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index ec35b8ed07a..32e0d74b90d 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -271,6 +271,18 @@ size_t BackupImpl::getNumFiles() const return num_files; } +size_t BackupImpl::getNumProcessedFiles() const +{ + std::lock_guard lock{mutex}; + return num_processed_files; +} + +UInt64 BackupImpl::getProcessedFilesSize() const +{ + std::lock_guard lock{mutex}; + return processed_files_size; +} + UInt64 BackupImpl::getUncompressedSize() const { std::lock_guard lock{mutex}; @@ -355,6 +367,7 @@ void BackupImpl::writeBackupMetadata() out->finalize(); increaseUncompressedSize(str.size()); + increaseProcessedSize(str.size()); } @@ -380,6 +393,7 @@ void BackupImpl::readBackupMetadata() String str; readStringUntilEOF(str, *in); increaseUncompressedSize(str.size()); + increaseProcessedSize(str.size()); Poco::XML::DOMParser dom_parser; Poco::AutoPtr config = dom_parser.parseMemory(str.data(), str.size()); const Poco::XML::Node * config_root = getRootNode(config); @@ -598,6 +612,8 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c if (open_mode != OpenMode::READ) throw Exception("Backup is not opened for reading", ErrorCodes::LOGICAL_ERROR); + increaseProcessedSize(size_and_checksum.first); + if (!size_and_checksum.first) { /// Entry's data is empty. @@ -761,6 +777,8 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) .base_checksum = 0, }; + increaseProcessedSize(info); + /// Empty file, nothing to backup if (info.size == 0 && deduplicate_files) { @@ -971,6 +989,18 @@ void BackupImpl::increaseUncompressedSize(const FileInfo & info) increaseUncompressedSize(info.size - info.base_size); } +void BackupImpl::increaseProcessedSize(UInt64 file_size) const +{ + processed_files_size += file_size; + ++num_processed_files; +} + +void BackupImpl::increaseProcessedSize(const FileInfo & info) +{ + if ((info.size > info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name))) + increaseProcessedSize(info.size - info.base_size); +} + void BackupImpl::setCompressedSize() { if (use_archives) diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 9fc881bf680..45c471aa825 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -59,6 +59,8 @@ public: time_t getTimestamp() const override { return timestamp; } UUID getUUID() const override { return *uuid; } size_t getNumFiles() const override; + size_t getNumProcessedFiles() const override; + UInt64 getProcessedFilesSize() const override; UInt64 getUncompressedSize() const override; UInt64 getCompressedSize() const override; Strings listFiles(const String & directory, bool recursive) const override; @@ -101,10 +103,16 @@ private: std::shared_ptr getArchiveReader(const String & suffix) const; std::shared_ptr getArchiveWriter(const String & suffix); - /// Increases `uncompressed_size` by a specific value and `num_files` by 1. + /// Increases `uncompressed_size` by a specific value, + /// also increases `num_files` by 1. void increaseUncompressedSize(UInt64 file_size); void increaseUncompressedSize(const FileInfo & info); + /// Increases `num_processed_files` by a specific value, + /// also increases `num_processed_files` by 1. + void increaseProcessedSize(UInt64 file_size) const; + void increaseProcessedSize(const FileInfo & info); + /// Calculates and sets `compressed_size`. void setCompressedSize(); @@ -121,6 +129,8 @@ private: std::optional uuid; time_t timestamp = 0; size_t num_files = 0; + mutable size_t num_processed_files = 0; + mutable UInt64 processed_files_size = 0; UInt64 uncompressed_size = 0; UInt64 compressed_size = 0; int version; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 53bebaf06d7..2c012464ca6 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -341,16 +341,20 @@ void BackupsWorker::doBackup( } size_t num_files = 0; + size_t num_processed_files = 0; UInt64 uncompressed_size = 0; UInt64 compressed_size = 0; + UInt64 processed_files_size = 0; /// Finalize backup (write its metadata). if (!backup_settings.internal) { backup->finalizeWriting(); num_files = backup->getNumFiles(); + num_processed_files = backup->getNumProcessedFiles(); uncompressed_size = backup->getUncompressedSize(); compressed_size = backup->getCompressedSize(); + processed_files_size = backup->getProcessedFilesSize(); } /// Close the backup. @@ -358,7 +362,7 @@ void BackupsWorker::doBackup( LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging); setStatus(backup_id, BackupStatus::BACKUP_CREATED); - setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size); + setNumFilesAndSize(backup_id, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size); } catch (...) { @@ -501,8 +505,6 @@ void BackupsWorker::doRestore( backup_open_params.password = restore_settings.password; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); - setNumFilesAndSize(restore_id, backup->getNumFiles(), backup->getUncompressedSize(), backup->getCompressedSize()); - String current_database = context->getCurrentDatabase(); /// Checks access rights if this is ON CLUSTER query. @@ -583,6 +585,13 @@ void BackupsWorker::doRestore( LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging); setStatus(restore_id, BackupStatus::RESTORED); + setNumFilesAndSize( + restore_id, + backup->getNumFiles(), + backup->getNumProcessedFiles(), + backup->getProcessedFilesSize(), + backup->getUncompressedSize(), + backup->getCompressedSize()); } catch (...) { @@ -663,7 +672,7 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw } -void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size) +void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, size_t num_processed_files, UInt64 processed_files_size, UInt64 uncompressed_size, UInt64 compressed_size) { std::lock_guard lock{infos_mutex}; auto it = infos.find(id); @@ -672,6 +681,8 @@ void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, UInt auto & info = it->second; info.num_files = num_files; + info.num_processed_files = num_processed_files; + info.processed_files_size = processed_files_size; info.uncompressed_size = uncompressed_size; info.compressed_size = compressed_size; } diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index b6d9729833e..19d2e4d0d1d 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -56,6 +56,14 @@ public: /// Number of files in the backup (including backup's metadata; only unique files are counted). size_t num_files = 0; + /// Number of processed files during backup or restore process + /// For restore it includes files from base backups + size_t num_processed_files = 0; + + /// Size of processed files during backup or restore + /// For restore in includes sizes from base backups + UInt64 processed_files_size = 0; + /// Size of all files in the backup (including backup's metadata; only unique files are counted). UInt64 uncompressed_size = 0; @@ -102,7 +110,7 @@ private: void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status); void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true); void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); } - void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size); + void setNumFilesAndSize(const OperationID & id, size_t num_files, size_t num_processed_files, UInt64 processed_files_size, UInt64 uncompressed_size, UInt64 compressed_size); std::vector getAllActiveBackupInfos() const; std::vector getAllActiveRestoreInfos() const; diff --git a/src/Backups/IBackup.h b/src/Backups/IBackup.h index 43763c5bfde..13c21fb0287 100644 --- a/src/Backups/IBackup.h +++ b/src/Backups/IBackup.h @@ -40,6 +40,12 @@ public: /// Returns the number of unique files in the backup. virtual size_t getNumFiles() const = 0; + /// Returns the number of files were processed for backup or restore + virtual size_t getNumProcessedFiles() const = 0; + + // Returns the total size of processed files for backup or restore + virtual UInt64 getProcessedFilesSize() const = 0; + /// Returns the total size of unique files in the backup. virtual UInt64 getUncompressedSize() const = 0; diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 52a26fe0cd6..268cc9d0963 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -19,6 +19,8 @@ NamesAndTypesList StorageSystemBackups::getNamesAndTypes() {"name", std::make_shared()}, {"status", std::make_shared(getBackupStatusEnumValues())}, {"num_files", std::make_shared()}, + {"num_processed_files", std::make_shared()}, + {"processed_files_size", std::make_shared()}, {"uncompressed_size", std::make_shared()}, {"compressed_size", std::make_shared()}, {"error", std::make_shared()}, @@ -36,6 +38,8 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_status = assert_cast(*res_columns[column_index++]); auto & column_num_files = assert_cast(*res_columns[column_index++]); + auto & column_num_processed_files = assert_cast(*res_columns[column_index++]); + auto & column_processed_files_size = assert_cast(*res_columns[column_index++]); auto & column_uncompressed_size = assert_cast(*res_columns[column_index++]); auto & column_compressed_size = assert_cast(*res_columns[column_index++]); auto & column_error = assert_cast(*res_columns[column_index++]); @@ -48,6 +52,8 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con column_name.insertData(info.name.data(), info.name.size()); column_status.insertValue(static_cast(info.status)); column_num_files.insertValue(info.num_files); + column_num_processed_files.insertValue(info.num_processed_files); + column_processed_files_size.insertValue(info.processed_files_size); column_uncompressed_size.insertValue(info.uncompressed_size); column_compressed_size.insertValue(info.compressed_size); column_error.insertData(info.error_message.data(), info.error_message.size()); diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 90e8acc702d..b1fab60f33b 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -192,6 +192,101 @@ def test_incremental_backup(): assert instance.query("SELECT count(), sum(x) FROM test.table2") == "102\t5081\n" +def test_increment_backup_without_changes(): + backup_name = new_backup_name() + incremental_backup_name = new_backup_name() + create_and_fill_table(n=1) + + system_backup_qry = "SELECT status, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size, error FROM system.backups WHERE id='{id_backup}'" + + assert instance.query("SELECT count(), sum(x) FROM test.table") == TSV([["1", "0"]]) + + # prepare first backup without base_backup + (id_backup, status) = instance.query( + f"BACKUP TABLE test.table TO {backup_name}" + ).split("\t") + + ( + backup_status, + num_files, + num_processed_files, + processed_files_size, + uncompressed_size, + compressed_size, + error, + ) = ( + instance.query(system_backup_qry.format(id_backup=id_backup)) + .strip("\n") + .split("\t") + ) + + assert backup_status == "BACKUP_CREATED" + assert num_files == "11" + assert int(uncompressed_size) > 0 + assert int(compressed_size) > 0 + assert error == "" + + # create second backup without changes based on the first one + (id_backup_wo_changes, status_backup_wo_changes) = instance.query( + f"BACKUP TABLE test.table TO {incremental_backup_name} SETTINGS base_backup = {backup_name}" + ).split("\t") + + ( + backup_status_wo_changes, + num_files_backup_wo_changes, + num_processed_files_backup_wo_changes, + processed_files_size_backup_wo_changes, + uncompressed_size_backup_wo_changes, + compressed_size_backup_wo_changes, + error_snd, + ) = ( + instance.query(system_backup_qry.format(id_backup=id_backup_wo_changes)) + .strip("\n") + .split("\t") + ) + + assert backup_status_wo_changes == "BACKUP_CREATED" + assert num_files_backup_wo_changes == "1" + assert num_processed_files_backup_wo_changes == "11" + assert int(processed_files_size_backup_wo_changes) > 0 + assert int(uncompressed_size_backup_wo_changes) > 0 + assert int(compressed_size_backup_wo_changes) > 0 + assert error_snd == "" + + # restore the second backup + # we expect to see all files in the meta info of the restore and a sum of uncompressed and compressed sizes + (id_restore, status_restore) = instance.query( + f"RESTORE TABLE test.table AS test.table2 FROM {incremental_backup_name}" + ).split("\t") + + assert instance.query("SELECT count(), sum(x) FROM test.table2") == TSV( + [["1", "0"]] + ) + + ( + restore_status, + restore_num_files, + restore_num_processed_files, + restore_processed_files_size, + restore_uncompressed_size, + restore_compressed_size, + restore_error, + ) = ( + instance.query(system_backup_qry.format(id_backup=id_restore)) + .strip("\n") + .split("\t") + ) + + assert restore_status == "RESTORED" + assert int(restore_num_files) == 1 + assert int(restore_num_processed_files) == int( + num_processed_files_backup_wo_changes + ) + assert int(restore_uncompressed_size) > 0 + assert int(restore_compressed_size) > 0 + assert restore_error == "" + + def test_incremental_backup_overflow(): backup_name = new_backup_name() incremental_backup_name = new_backup_name() @@ -1089,9 +1184,18 @@ def test_system_backups(): id = instance.query(f"BACKUP TABLE test.table TO {backup_name}").split("\t")[0] - [name, status, num_files, uncompressed_size, compressed_size, error] = ( + [ + name, + status, + num_files, + num_processed_files, + processed_files_size, + uncompressed_size, + compressed_size, + error, + ] = ( instance.query( - f"SELECT name, status, num_files, uncompressed_size, compressed_size, error FROM system.backups WHERE id='{id}'" + f"SELECT name, status, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size, error FROM system.backups WHERE id='{id}'" ) .strip("\n") .split("\t") @@ -1101,9 +1205,13 @@ def test_system_backups(): num_files = int(num_files) compressed_size = int(compressed_size) uncompressed_size = int(uncompressed_size) + num_processed_files = int(num_processed_files) + processed_files_size = int(processed_files_size) assert name == escaped_backup_name assert status == "BACKUP_CREATED" assert num_files > 1 + assert num_processed_files > 1 + assert processed_files_size > 1 assert uncompressed_size > 1 assert compressed_size == uncompressed_size assert error == "" @@ -1115,9 +1223,17 @@ def test_system_backups(): ) escaped_backup_name = backup_name.replace("'", "\\'") - [status, num_files, uncompressed_size, compressed_size, error] = ( + [ + status, + num_files, + num_processed_files, + processed_files_size, + uncompressed_size, + compressed_size, + error, + ] = ( instance.query( - f"SELECT status, num_files, uncompressed_size, compressed_size, error FROM system.backups WHERE name='{escaped_backup_name}'" + f"SELECT status, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size, error FROM system.backups WHERE name='{escaped_backup_name}'" ) .strip("\n") .split("\t") @@ -1126,10 +1242,14 @@ def test_system_backups(): num_files = int(num_files) compressed_size = int(compressed_size) uncompressed_size = int(uncompressed_size) + num_processed_files = int(num_processed_files) + processed_files_size = int(processed_files_size) assert status == "BACKUP_FAILED" assert num_files == 0 assert uncompressed_size == 0 assert compressed_size == 0 + assert num_processed_files == 0 + assert processed_files_size == 0 assert expected_error in error From 206eb4d4461e4aca9410397bdb6e69d53ef48c87 Mon Sep 17 00:00:00 2001 From: Aleksandr Date: Sat, 21 Jan 2023 15:07:25 +0100 Subject: [PATCH 02/42] removed unused if statement for increaseProcessedSize Co-authored-by: Vitaly Baranov --- src/Backups/BackupImpl.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 32e0d74b90d..3e8c9b36886 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -997,8 +997,7 @@ void BackupImpl::increaseProcessedSize(UInt64 file_size) const void BackupImpl::increaseProcessedSize(const FileInfo & info) { - if ((info.size > info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name))) - increaseProcessedSize(info.size - info.base_size); + increaseProcessedSize(info.size); } void BackupImpl::setCompressedSize() From 82f194fbc679fa6d53534a59f660d34c28003716 Mon Sep 17 00:00:00 2001 From: AVMusorin Date: Mon, 23 Jan 2023 17:15:50 +0100 Subject: [PATCH 03/42] added mutex for increaseProcessedSize --- src/Backups/BackupImpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 4159ff8bc96..9c50d0ed1ee 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -991,6 +991,7 @@ void BackupImpl::increaseUncompressedSize(const FileInfo & info) void BackupImpl::increaseProcessedSize(UInt64 file_size) const { + std::lock_guard lock{mutex}; processed_files_size += file_size; ++num_processed_files; } From ec1e0bde6bf1b2a9cba78c1285ad2d64d0924b32 Mon Sep 17 00:00:00 2001 From: bkuschel Date: Mon, 23 Jan 2023 11:15:50 -0500 Subject: [PATCH 04/42] Update krb5 --- contrib/krb5 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/krb5 b/contrib/krb5 index b89e20367b0..f8262a1b548 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit b89e20367b074bd02dd118a6534099b21e88b3c3 +Subproject commit f8262a1b548eb29d97e059260042036255d07f8d From d6d2414ef8d1166379aa0746eba2c85deb3257a8 Mon Sep 17 00:00:00 2001 From: bkuschel Date: Mon, 23 Jan 2023 11:40:55 -0500 Subject: [PATCH 05/42] Remove aes.c and use the one krb5 --- contrib/krb5-cmake/CMakeLists.txt | 6 - contrib/krb5-cmake/aes.c | 302 ------------------------------ 2 files changed, 308 deletions(-) delete mode 100644 contrib/krb5-cmake/aes.c diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 7e184d424aa..214d23bc2a9 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -578,12 +578,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c") endif() -if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC) - list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c") - list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c") -endif () - - target_sources(_krb5 PRIVATE ${ALL_SRCS} ) diff --git a/contrib/krb5-cmake/aes.c b/contrib/krb5-cmake/aes.c deleted file mode 100644 index c0c8c728bff..00000000000 --- a/contrib/krb5-cmake/aes.c +++ /dev/null @@ -1,302 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* lib/crypto/openssl/enc_provider/aes.c */ -/* - * Copyright (C) 2003, 2007, 2008, 2009 by the Massachusetts Institute of Technology. - * All rights reserved. - * - * Export of this software from the United States of America may - * require a specific license from the United States Government. - * It is the responsibility of any person or organization contemplating - * export to obtain such a license before exporting. - * - * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and - * distribute this software and its documentation for any purpose and - * without fee is hereby granted, provided that the above copyright - * notice appear in all copies and that both that copyright notice and - * this permission notice appear in supporting documentation, and that - * the name of M.I.T. not be used in advertising or publicity pertaining - * to distribution of the software without specific, written prior - * permission. Furthermore if you modify this software you must label - * your software as modified software and not distribute it in such a - * fashion that it might be confused with the original M.I.T. software. - * M.I.T. makes no representations about the suitability of - * this software for any purpose. It is provided "as is" without express - * or implied warranty. - */ - -#include "crypto_int.h" -#include -#include - -/* proto's */ -static krb5_error_code -cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data); -static krb5_error_code -cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data); -static krb5_error_code -cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen); -static krb5_error_code -cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen); - -#define BLOCK_SIZE 16 -#define NUM_BITS 8 -#define IV_CTS_BUF_SIZE 16 /* 16 - hardcoded in CRYPTO_cts128_en/decrypt */ - -static const EVP_CIPHER * -map_mode(unsigned int len) -{ - if (len==16) - return EVP_aes_128_cbc(); - if (len==32) - return EVP_aes_256_cbc(); - else - return NULL; -} - -/* Encrypt one block using CBC. */ -static krb5_error_code -cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data) -{ - int ret, olen = BLOCK_SIZE; - unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE]; - EVP_CIPHER_CTX *ctx; - struct iov_cursor cursor; - - ctx = EVP_CIPHER_CTX_new(); - if (ctx == NULL) - return ENOMEM; - - ret = EVP_EncryptInit_ex(ctx, map_mode(key->keyblock.length), - NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL); - if (ret == 0) { - EVP_CIPHER_CTX_free(ctx); - return KRB5_CRYPTO_INTERNAL; - } - - k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE); - k5_iov_cursor_get(&cursor, iblock); - EVP_CIPHER_CTX_set_padding(ctx,0); - ret = EVP_EncryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE); - if (ret == 1) - k5_iov_cursor_put(&cursor, oblock); - EVP_CIPHER_CTX_free(ctx); - - zap(iblock, BLOCK_SIZE); - zap(oblock, BLOCK_SIZE); - return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL; -} - -/* Decrypt one block using CBC. */ -static krb5_error_code -cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data) -{ - int ret = 0, olen = BLOCK_SIZE; - unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE]; - EVP_CIPHER_CTX *ctx; - struct iov_cursor cursor; - - ctx = EVP_CIPHER_CTX_new(); - if (ctx == NULL) - return ENOMEM; - - ret = EVP_DecryptInit_ex(ctx, map_mode(key->keyblock.length), - NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL); - if (ret == 0) { - EVP_CIPHER_CTX_free(ctx); - return KRB5_CRYPTO_INTERNAL; - } - - k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE); - k5_iov_cursor_get(&cursor, iblock); - EVP_CIPHER_CTX_set_padding(ctx,0); - ret = EVP_DecryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE); - if (ret == 1) - k5_iov_cursor_put(&cursor, oblock); - EVP_CIPHER_CTX_free(ctx); - - zap(iblock, BLOCK_SIZE); - zap(oblock, BLOCK_SIZE); - return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL; -} - -static krb5_error_code -cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen) -{ - int ret = 0; - size_t size = 0; - unsigned char *oblock = NULL, *dbuf = NULL; - unsigned char iv_cts[IV_CTS_BUF_SIZE]; - struct iov_cursor cursor; - AES_KEY enck; - - memset(iv_cts,0,sizeof(iv_cts)); - if (ivec && ivec->data){ - if (ivec->length != sizeof(iv_cts)) - return KRB5_CRYPTO_INTERNAL; - memcpy(iv_cts, ivec->data,ivec->length); - } - - oblock = OPENSSL_malloc(dlen); - if (!oblock){ - return ENOMEM; - } - dbuf = OPENSSL_malloc(dlen); - if (!dbuf){ - OPENSSL_free(oblock); - return ENOMEM; - } - - k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE); - k5_iov_cursor_get(&cursor, dbuf); - - AES_set_encrypt_key(key->keyblock.contents, - NUM_BITS * key->keyblock.length, &enck); - - size = CRYPTO_cts128_encrypt((unsigned char *)dbuf, oblock, dlen, &enck, - iv_cts, AES_cbc_encrypt); - if (size <= 0) - ret = KRB5_CRYPTO_INTERNAL; - else - k5_iov_cursor_put(&cursor, oblock); - - if (!ret && ivec && ivec->data) - memcpy(ivec->data, iv_cts, sizeof(iv_cts)); - - zap(oblock, dlen); - zap(dbuf, dlen); - OPENSSL_free(oblock); - OPENSSL_free(dbuf); - - return ret; -} - -static krb5_error_code -cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen) -{ - int ret = 0; - size_t size = 0; - unsigned char *oblock = NULL; - unsigned char *dbuf = NULL; - unsigned char iv_cts[IV_CTS_BUF_SIZE]; - struct iov_cursor cursor; - AES_KEY deck; - - memset(iv_cts,0,sizeof(iv_cts)); - if (ivec && ivec->data){ - if (ivec->length != sizeof(iv_cts)) - return KRB5_CRYPTO_INTERNAL; - memcpy(iv_cts, ivec->data,ivec->length); - } - - oblock = OPENSSL_malloc(dlen); - if (!oblock) - return ENOMEM; - dbuf = OPENSSL_malloc(dlen); - if (!dbuf){ - OPENSSL_free(oblock); - return ENOMEM; - } - - AES_set_decrypt_key(key->keyblock.contents, - NUM_BITS * key->keyblock.length, &deck); - - k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE); - k5_iov_cursor_get(&cursor, dbuf); - - size = CRYPTO_cts128_decrypt((unsigned char *)dbuf, oblock, - dlen, &deck, - iv_cts, AES_cbc_encrypt); - if (size <= 0) - ret = KRB5_CRYPTO_INTERNAL; - else - k5_iov_cursor_put(&cursor, oblock); - - if (!ret && ivec && ivec->data) - memcpy(ivec->data, iv_cts, sizeof(iv_cts)); - - zap(oblock, dlen); - zap(dbuf, dlen); - OPENSSL_free(oblock); - OPENSSL_free(dbuf); - - return ret; -} - -krb5_error_code -krb5int_aes_encrypt(krb5_key key, const krb5_data *ivec, - krb5_crypto_iov *data, size_t num_data) -{ - int ret = 0; - size_t input_length, nblocks; - - input_length = iov_total_length(data, num_data, FALSE); - nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE; - if (nblocks == 1) { - if (input_length != BLOCK_SIZE) - return KRB5_BAD_MSIZE; - ret = cbc_enc(key, ivec, data, num_data); - } else if (nblocks > 1) { - ret = cts_encr(key, ivec, data, num_data, input_length); - } - - return ret; -} - -krb5_error_code -krb5int_aes_decrypt(krb5_key key, const krb5_data *ivec, - krb5_crypto_iov *data, size_t num_data) -{ - int ret = 0; - size_t input_length, nblocks; - - input_length = iov_total_length(data, num_data, FALSE); - nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE; - if (nblocks == 1) { - if (input_length != BLOCK_SIZE) - return KRB5_BAD_MSIZE; - ret = cbc_decr(key, ivec, data, num_data); - } else if (nblocks > 1) { - ret = cts_decr(key, ivec, data, num_data, input_length); - } - - return ret; -} - -static krb5_error_code -krb5int_aes_init_state (const krb5_keyblock *key, krb5_keyusage usage, - krb5_data *state) -{ - state->length = 16; - state->data = (void *) malloc(16); - if (state->data == NULL) - return ENOMEM; - memset(state->data, 0, state->length); - return 0; -} -const struct krb5_enc_provider krb5int_enc_aes128 = { - 16, - 16, 16, - krb5int_aes_encrypt, - krb5int_aes_decrypt, - NULL, - krb5int_aes_init_state, - krb5int_default_free_state -}; - -const struct krb5_enc_provider krb5int_enc_aes256 = { - 16, - 32, 32, - krb5int_aes_encrypt, - krb5int_aes_decrypt, - NULL, - krb5int_aes_init_state, - krb5int_default_free_state -}; From 748c3ad0159f830fa6d8d81dad03a469bbe8be26 Mon Sep 17 00:00:00 2001 From: bkuschel Date: Wed, 25 Jan 2023 13:35:31 -0500 Subject: [PATCH 06/42] Fix build error --- contrib/krb5-cmake/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 214d23bc2a9..b2407e5b500 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -15,6 +15,10 @@ if(NOT AWK_PROGRAM) message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.") endif() +if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) + set(USE_BORINGSSL 1) +endif () + set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src") set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private") From 5aa3c10c3f1cfb13e33f9487a13a1c0cb4573d9c Mon Sep 17 00:00:00 2001 From: bkuschel Date: Wed, 25 Jan 2023 17:58:09 -0500 Subject: [PATCH 07/42] Fix Cmake 2 --- contrib/krb5-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index b2407e5b500..ceaa270ad85 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -16,7 +16,7 @@ if(NOT AWK_PROGRAM) endif() if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) - set(USE_BORINGSSL 1) + add_compile_definitions(USE_BORINGSSL=1) endif () set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src") From 15b1f3f6a163bacbc18a4853fdd726d400fe5eb9 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Mon, 23 Jan 2023 15:46:27 +0100 Subject: [PATCH 08/42] doc: functions: siphash: use correct value in example --- docs/en/sql-reference/functions/hash-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 730b494fcb5..2baee5b892d 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -84,7 +84,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 ```response ┌──────────────SipHash─┬─type───┐ -│ 13726873534472839665 │ UInt64 │ +│ 11400366955626497465 │ UInt64 │ └──────────────────────┴────────┘ ``` From dfd6dfc1558ca1940dc8026e947e7155437c0791 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 25 Jan 2023 15:07:10 +0100 Subject: [PATCH 09/42] doc: functions: siphash: fix broken links --- docs/en/sql-reference/functions/hash-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 2baee5b892d..d7d3390066b 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -53,7 +53,7 @@ If you want to get the same result as output by the md5sum utility, use lower(he ## sipHash64 -Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value. +Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. ```sql sipHash64(par1,...) @@ -90,7 +90,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 ## sipHash128 -Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits. +Produces a 128-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits. **Syntax** From 2e72e272066a2ab74f3471a3e82f02c01eafee18 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Mon, 23 Jan 2023 15:48:10 +0100 Subject: [PATCH 10/42] common: siphash: add support for custom keys --- src/Common/SipHash.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 96b095724c2..1d602a3b191 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -78,13 +78,13 @@ private: public: /// Arguments - seed. - SipHash(UInt64 k0 = 0, UInt64 k1 = 0) /// NOLINT + SipHash(UInt64 key0 = 0, UInt64 key1 = 0) /// NOLINT { /// Initialize the state with some random bytes and seed. - v0 = 0x736f6d6570736575ULL ^ k0; - v1 = 0x646f72616e646f6dULL ^ k1; - v2 = 0x6c7967656e657261ULL ^ k0; - v3 = 0x7465646279746573ULL ^ k1; + v0 = 0x736f6d6570736575ULL ^ key0; + v1 = 0x646f72616e646f6dULL ^ key1; + v2 = 0x6c7967656e657261ULL ^ key0; + v3 = 0x7465646279746573ULL ^ key1; cnt = 0; current_word = 0; @@ -216,20 +216,30 @@ inline void sipHash128(const char * data, const size_t size, char * out) hash.get128(out); } -inline UInt128 sipHash128(const char * data, const size_t size) +inline UInt128 sipHash128Keyed(UInt64 key0, UInt64 key1, const char * data, const size_t size) { - SipHash hash; + SipHash hash(key0, key1); hash.update(data, size); return hash.get128(); } -inline UInt64 sipHash64(const char * data, const size_t size) +inline UInt128 sipHash128(const char * data, const size_t size) { - SipHash hash; + return sipHash128Keyed(0, 0, data, size); +} + +inline UInt64 sipHash64Keyed(UInt64 key0, UInt64 key1, const char * data, const size_t size) +{ + SipHash hash(key0, key1); hash.update(data, size); return hash.get64(); } +inline UInt64 sipHash64(const char * data, const size_t size) +{ + return sipHash64Keyed(0, 0, data, size); +} + template UInt64 sipHash64(const T & x) { From 31e4d042f8dfaeeb1c77a741b5f452b971a96b48 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Mon, 23 Jan 2023 15:51:25 +0100 Subject: [PATCH 11/42] function: hashing: add generic support for keyed algos --- src/Functions/FunctionsHashing.h | 181 ++++++++++++++++++------------- 1 file changed, 103 insertions(+), 78 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 239e497c7d6..718cd2cf543 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -896,7 +896,7 @@ private: DECLARE_MULTITARGET_CODE( -template +template class FunctionAnyHash : public IFunction { public: @@ -906,7 +906,7 @@ private: using ToType = typename Impl::ReturnType; template - void executeIntType(const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; @@ -930,13 +930,13 @@ private: if (std::is_same_v) h = JavaHashImpl::apply(vec_from[i]); else - h = Impl::apply(reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); } if constexpr (first) vec_to[i] = h; else - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); } } else if (auto col_from_const = checkAndGetColumnConst(column)) @@ -956,7 +956,7 @@ private: else { for (size_t i = 0; i < size; ++i) - vec_to[i] = Impl::combineHashes(vec_to[i], hash); + vec_to[i] = combineHashes(key, vec_to[i], hash); } } else @@ -965,7 +965,7 @@ private: } template - void executeBigIntType(const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; @@ -975,19 +975,19 @@ private: size_t size = vec_from.size(); for (size_t i = 0; i < size; ++i) { - ToType h = Impl::apply(reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + ToType h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); if constexpr (first) vec_to[i] = h; else - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); } } else if (auto col_from_const = checkAndGetColumnConst(column)) { auto value = col_from_const->template getValue(); - ToType h = Impl::apply(reinterpret_cast(&value), sizeof(value)); + ToType h = apply(key, reinterpret_cast(&value), sizeof(value)); size_t size = vec_to.size(); if constexpr (first) @@ -997,7 +997,7 @@ private: else { for (size_t i = 0; i < size; ++i) - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); } } else @@ -1006,21 +1006,21 @@ private: } template - void executeGeneric(const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { for (size_t i = 0, size = column->size(); i < size; ++i) { StringRef bytes = column->getDataAt(i); - const ToType h = Impl::apply(bytes.data, bytes.size); + const ToType h = apply(key, bytes.data, bytes.size); if constexpr (first) vec_to[i] = h; else - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); } } template - void executeString(const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeString(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { if (const ColumnString * col_from = checkAndGetColumn(column)) { @@ -1031,14 +1031,14 @@ private: ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { - const ToType h = Impl::apply( + const ToType h = apply(key, reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1); if constexpr (first) vec_to[i] = h; else - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); current_offset = offsets[i]; } @@ -1051,17 +1051,17 @@ private: for (size_t i = 0; i < size; ++i) { - const ToType h = Impl::apply(reinterpret_cast(&data[i * n]), n); + const ToType h = apply(key, reinterpret_cast(&data[i * n]), n); if constexpr (first) vec_to[i] = h; else - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); } } else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column)) { String value = col_from_const->getValue(); - const ToType hash = Impl::apply(value.data(), value.size()); + const ToType hash = apply(key, value.data(), value.size()); const size_t size = vec_to.size(); if constexpr (first) @@ -1072,7 +1072,7 @@ private: { for (size_t i = 0; i < size; ++i) { - vec_to[i] = Impl::combineHashes(vec_to[i], hash); + vec_to[i] = combineHashes(key, vec_to[i], hash); } } } @@ -1082,7 +1082,7 @@ private: } template - void executeArray(const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const { const IDataType * nested_type = typeid_cast(type)->getNestedType().get(); @@ -1094,7 +1094,7 @@ private: typename ColumnVector::Container vec_temp(nested_size); bool nested_is_first = true; - executeForArgument(nested_type, nested_column, vec_temp, nested_is_first); + executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); @@ -1112,10 +1112,10 @@ private: if constexpr (first) vec_to[i] = h; else - vec_to[i] = Impl::combineHashes(vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], h); for (size_t j = current_offset; j < next_offset; ++j) - vec_to[i] = Impl::combineHashes(vec_to[i], vec_temp[j]); + vec_to[i] = combineHashes(key, vec_to[i], vec_temp[j]); current_offset = offsets[i]; } @@ -1124,7 +1124,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(type, &*full_column, vec_to); + executeArray(key, type, &*full_column, vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1132,44 +1132,44 @@ private: } template - void executeAny(const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const + void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const { WhichDataType which(from_type); - if (which.isUInt8()) executeIntType(icolumn, vec_to); - else if (which.isUInt16()) executeIntType(icolumn, vec_to); - else if (which.isUInt32()) executeIntType(icolumn, vec_to); - else if (which.isUInt64()) executeIntType(icolumn, vec_to); - else if (which.isUInt128()) executeBigIntType(icolumn, vec_to); - else if (which.isUInt256()) executeBigIntType(icolumn, vec_to); - else if (which.isInt8()) executeIntType(icolumn, vec_to); - else if (which.isInt16()) executeIntType(icolumn, vec_to); - else if (which.isInt32()) executeIntType(icolumn, vec_to); - else if (which.isInt64()) executeIntType(icolumn, vec_to); - else if (which.isInt128()) executeBigIntType(icolumn, vec_to); - else if (which.isInt256()) executeBigIntType(icolumn, vec_to); - else if (which.isUUID()) executeBigIntType(icolumn, vec_to); - else if (which.isIPv4()) executeIntType(icolumn, vec_to); - else if (which.isIPv6()) executeBigIntType(icolumn, vec_to); - else if (which.isEnum8()) executeIntType(icolumn, vec_to); - else if (which.isEnum16()) executeIntType(icolumn, vec_to); - else if (which.isDate()) executeIntType(icolumn, vec_to); - else if (which.isDate32()) executeIntType(icolumn, vec_to); - else if (which.isDateTime()) executeIntType(icolumn, vec_to); + if (which.isUInt8()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt64()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isUInt256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isInt8()) executeIntType(key, icolumn, vec_to); + else if (which.isInt16()) executeIntType(key, icolumn, vec_to); + else if (which.isInt32()) executeIntType(key, icolumn, vec_to); + else if (which.isInt64()) executeIntType(key, icolumn, vec_to); + else if (which.isInt128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isInt256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isUUID()) executeBigIntType(key, icolumn, vec_to); + else if (which.isIPv4()) executeIntType(key, icolumn, vec_to); + else if (which.isIPv6()) executeBigIntType(key, icolumn, vec_to); + else if (which.isEnum8()) executeIntType(key, icolumn, vec_to); + else if (which.isEnum16()) executeIntType(key, icolumn, vec_to); + else if (which.isDate()) executeIntType(key, icolumn, vec_to); + else if (which.isDate32()) executeIntType(key, icolumn, vec_to); + else if (which.isDateTime()) executeIntType(key, icolumn, vec_to); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result - else if (which.isDecimal32()) executeBigIntType(icolumn, vec_to); - else if (which.isDecimal64()) executeBigIntType(icolumn, vec_to); - else if (which.isDecimal128()) executeBigIntType(icolumn, vec_to); - else if (which.isDecimal256()) executeBigIntType(icolumn, vec_to); - else if (which.isFloat32()) executeIntType(icolumn, vec_to); - else if (which.isFloat64()) executeIntType(icolumn, vec_to); - else if (which.isString()) executeString(icolumn, vec_to); - else if (which.isFixedString()) executeString(icolumn, vec_to); - else if (which.isArray()) executeArray(from_type, icolumn, vec_to); - else executeGeneric(icolumn, vec_to); + else if (which.isDecimal32()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal64()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isFloat32()) executeIntType(key, icolumn, vec_to); + else if (which.isFloat64()) executeIntType(key, icolumn, vec_to); + else if (which.isString()) executeString(key, icolumn, vec_to); + else if (which.isFixedString()) executeString(key, icolumn, vec_to); + else if (which.isArray()) executeArray(key, from_type, icolumn, vec_to); + else executeGeneric(key, icolumn, vec_to); } - void executeForArgument(const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const + void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -1178,7 +1178,7 @@ private: const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) - executeForArgument(tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); + executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { @@ -1188,25 +1188,25 @@ private: for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); - executeForArgument(tuple_types[i].get(), tmp.get(), vec_to, is_first); + executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first); } } else if (const auto * map = checkAndGetColumn(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } else if (const auto * const_map = checkAndGetColumnConstData(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first); } else { if (is_first) - executeAny(type, column, vec_to); + executeAny(key, type, column, vec_to); else - executeAny(type, column, vec_to); + executeAny(key, type, column, vec_to); } is_first = false; @@ -1240,17 +1240,29 @@ public: typename ColumnVector::Container & vec_to = col_to->getData(); - if (arguments.empty()) + /// If using a "keyed" algorithm, the first argument is the key and + /// the data starts from the second argument. + /// Otherwise there is no key and all arguments are interpreted as data. + constexpr size_t first_data_argument = Keyed; + + if (arguments.size() <= first_data_argument) { - /// Constant random number from /dev/urandom is used as a hash value of empty list of arguments. + /// Return a fixed random-looking magic number when input is empty vec_to.assign(rows, static_cast(0xe28dbde7fe22e41c)); } - /// The function supports arbitrary number of arguments of arbitrary types. + KeyType key{}; + if constexpr (Keyed) + if (!arguments.empty()) + key = Impl::parseKey(arguments[0]); + /// The function supports arbitrary number of arguments of arbitrary types. bool is_first_argument = true; - for (const auto & col : arguments) - executeForArgument(col.type.get(), col.column.get(), vec_to, is_first_argument); + for (size_t i = first_data_argument; i < arguments.size(); ++i) + { + const auto & col = arguments[i]; + executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument); + } if constexpr (std::is_same_v) /// backward-compatible { @@ -1261,25 +1273,38 @@ public: return col_to; } + + static ToType apply(const KeyType & key, const char * begin, size_t size) + { + if constexpr (Keyed) + return Impl::applyKeyed(key, begin, size); + else + return Impl::apply(begin, size); + } + + static ToType combineHashes(const KeyType & key, ToType h1, ToType h2) + { + if constexpr (Keyed) + return Impl::combineHashesKeyed(key, h1, h2); + else + return Impl::combineHashes(h1, h2); + } }; ) // DECLARE_MULTITARGET_CODE -template -class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash +template +class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash { public: explicit FunctionAnyHash(ContextPtr context) : selector(context) { - selector.registerImplementation>(); + selector.registerImplementation>(); - #if USE_MULTITARGET_CODE - selector.registerImplementation>(); - selector.registerImplementation>(); - #endif +#if USE_MULTITARGET_CODE + selector.registerImplementation>(); + selector.registerImplementation>(); +#endif } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override From 59c17a771914f83ddd16193a0234b7738eea0a7c Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Mon, 23 Jan 2023 15:52:08 +0100 Subject: [PATCH 12/42] functions: hashing: add sipHash{64,128}Keyed --- src/Functions/FunctionsHashing.h | 72 ++++++++++++++++++++++++++ src/Functions/FunctionsHashingMisc.cpp | 2 + 2 files changed, 74 insertions(+) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 718cd2cf543..031d6e3b586 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -71,6 +71,38 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } +namespace impl +{ + struct SipHashKey + { + UInt64 key0 = 0; + UInt64 key1 = 0; + }; + + static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) + { + SipHashKey ret; + + const auto * tuple = checkAndGetColumn(key.column.get()); + if (!tuple) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); + + if (tuple->tupleSize() != 2) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + + if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) + ret.key0 = key0col->get64(0); + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); + + if (const auto * key1col = checkAndGetColumn(&(tuple->getColumn(1)))) + ret.key1 = key1col->get64(0); + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); + + return ret; + } +} /** Hashing functions. * @@ -274,6 +306,25 @@ struct SipHash64Impl static constexpr bool use_int_hash_for_pods = false; }; +struct SipHash64KeyedImpl +{ + static constexpr auto name = "sipHash64Keyed"; + using ReturnType = UInt64; + using Key = impl::SipHashKey; + + static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } + + static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } + + static UInt64 combineHashesKeyed(const Key & key, UInt64 h1, UInt64 h2) + { + UInt64 hashes[] = {h1, h2}; + return applyKeyed(key, reinterpret_cast(hashes), 2 * sizeof(UInt64)); + } + + static constexpr bool use_int_hash_for_pods = false; +}; + struct SipHash128Impl { static constexpr auto name = "sipHash128"; @@ -293,6 +344,25 @@ struct SipHash128Impl static constexpr bool use_int_hash_for_pods = false; }; +struct SipHash128KeyedImpl +{ + static constexpr auto name = "sipHash128Keyed"; + using ReturnType = UInt128; + using Key = impl::SipHashKey; + + static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } + + static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); } + + static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2) + { + UInt128 hashes[] = {h1, h2}; + return applyKeyed(key, reinterpret_cast(hashes), 2 * sizeof(UInt128)); + } + + static constexpr bool use_int_hash_for_pods = false; +}; + /** Why we need MurmurHash2? * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. * Usually there is no reason to use MurmurHash. @@ -1539,6 +1609,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; using FunctionSipHash64 = FunctionAnyHash; +using FunctionSipHash64Keyed = FunctionAnyHash; using FunctionIntHash32 = FunctionIntHash; using FunctionIntHash64 = FunctionIntHash; #if USE_SSL @@ -1552,6 +1623,7 @@ using FunctionSHA384 = FunctionStringHashFixedString; using FunctionSHA512 = FunctionStringHashFixedString; #endif using FunctionSipHash128 = FunctionAnyHash; +using FunctionSipHash128Keyed = FunctionAnyHash; using FunctionCityHash64 = FunctionAnyHash; using FunctionFarmFingerprint64 = FunctionAnyHash; using FunctionFarmHash64 = FunctionAnyHash; diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp index b33d9366094..2a705e87a1e 100644 --- a/src/Functions/FunctionsHashingMisc.cpp +++ b/src/Functions/FunctionsHashingMisc.cpp @@ -12,7 +12,9 @@ namespace DB REGISTER_FUNCTION(Hashing) { factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); From 464ecf50efb76d301eb16650a8080a7529082430 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 25 Jan 2023 15:05:09 +0100 Subject: [PATCH 13/42] doc: functions: hash: add sipHash{64,128}Keyed --- .../sql-reference/functions/hash-functions.md | 67 +++++++++++++++++++ ...new_functions_must_be_documented.reference | 2 + 2 files changed, 69 insertions(+) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index d7d3390066b..937390dbe8b 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -88,6 +88,38 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 └──────────────────────┴────────┘ ``` +## sipHash64Keyed + +Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that it takes the key as an argument instead of using a fixed value. + +**Syntax** + +```sql +sipHash64Keyed((k0, k1), par1,...) +``` + +**Arguments** + +Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key. + +**Returned value** + +A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. + +**Example** + +Query: + +```sql +SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type; +``` + +```response +┌─────────────SipHash─┬─type───┐ +│ 8017656310194184311 │ UInt64 │ +└─────────────────────┴────────┘ +``` + ## sipHash128 Produces a 128-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits. @@ -124,6 +156,41 @@ Result: └──────────────────────────────────┘ ``` +## sipHash128Keyed + +Produces a 128-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. +Differs from [sipHash128](#hash_functions-siphash128) in that it takes the key as an argument instead of using a fixed value. + +**Syntax** + +```sql +sipHash128Keyed((k0, k1), par1,...) +``` + +**Arguments** + +Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key. + +**Returned value** + +A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. + +**Example** + +Query: + +```sql +SELECT hex(sipHash128Keyed((506097522914230528, 1084818905618843912),'foo', '\x01', 3)); +``` + +Result: + +```response +┌─hex(sipHash128Keyed((506097522914230528, 1084818905618843912), 'foo', '', 3))─┐ +│ B8467F65C8B4CFD9A5F8BD733917D9BF │ +└───────────────────────────────────────────────────────────────────────────────┘ +``` + ## cityHash64 Produces a 64-bit [CityHash](https://github.com/google/cityhash) hash value. diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index d225cf5f332..e41249af54c 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -646,7 +646,9 @@ simpleJSONHas sin sinh sipHash128 +sipHash128Keyed sipHash64 +sipHash64Keyed sleep sleepEachRow snowflakeToDateTime From c9411759d208638ab77b2644936f1135bd6c9953 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Mon, 23 Jan 2023 15:53:51 +0100 Subject: [PATCH 14/42] tests: queries: add tests for sipHash{64,128}Keyed --- .../025334_keyed_siphash.reference | 196 +++++++++++++ .../0_stateless/025334_keyed_siphash.sql | 274 ++++++++++++++++++ 2 files changed, 470 insertions(+) create mode 100644 tests/queries/0_stateless/025334_keyed_siphash.reference create mode 100644 tests/queries/0_stateless/025334_keyed_siphash.sql diff --git a/tests/queries/0_stateless/025334_keyed_siphash.reference b/tests/queries/0_stateless/025334_keyed_siphash.reference new file mode 100644 index 00000000000..52e92f37720 --- /dev/null +++ b/tests/queries/0_stateless/025334_keyed_siphash.reference @@ -0,0 +1,196 @@ +726FDB47DD0E0E31 +74F839C593DC67FD +0D6C8009D9A94F5A +85676696D7FB7E2D +CF2794E0277187B7 +18765564CD99A68D +CBC9466E58FEE3CE +AB0200F58B01D137 +93F5F5799A932462 +9E0082DF0BA9E4B0 +7A5DBBC594DDB9F3 +F4B32F46226BADA7 +751E8FBC860EE5FB +14EA5627C0843D90 +F723CA908E7AF2EE +A129CA6149BE45E5 +3F2ACC7F57C29BDB +699AE9F52CBE4794 +4BC1B3F0968DD39C +BB6DC91DA77961BD +BED65CF21AA2EE98 +D0F2CBB02E3B67C7 +93536795E3A33E88 +A80C038CCD5CCEC8 +B8AD50C6F649AF94 +BCE192DE8A85B8EA +17D835B85BBB15F3 +2F2E6163076BCFAD +DE4DAAACA71DC9A5 +A6A2506687956571 +AD87A3535C49EF28 +32D892FAD841C342 +7127512F72F27CCE +A7F32346F95978E3 +12E0B01ABB051238 +15E034D40FA197AE +314DFFBE0815A3B4 +027990F029623981 +CADCD4E59EF40C4D +9ABFD8766A33735C +0E3EA96B5304A7D0 +AD0C42D6FC585992 +187306C89BC215A9 +D4A60ABCF3792B95 +F935451DE4F21DF2 +A9538F0419755787 +DB9ACDDFF56CA510 +D06C98CD5C0975EB +E612A3CB9ECBA951 +C766E62CFCADAF96 +EE64435A9752FE72 +A192D576B245165A +0A8787BF8ECB74B2 +81B3E73D20B49B6F +7FA8220BA3B2ECEA +245731C13CA42499 +B78DBFAF3A8D83BD +EA1AD565322A1A0B +60E61C23A3795013 +6606D7E446282B93 +6CA4ECB15C5F91E1 +9F626DA15C9625F3 +E51B38608EF25F57 +958A324CEB064572 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +E28DBDE7FE22E41C +1CE422FEE7BD8DE20000000000000000 +E28DBDE7FE22E41C +1CE422FEE7BD8DE20000000000000000 diff --git a/tests/queries/0_stateless/025334_keyed_siphash.sql b/tests/queries/0_stateless/025334_keyed_siphash.sql new file mode 100644 index 00000000000..3c41efd7d58 --- /dev/null +++ b/tests/queries/0_stateless/025334_keyed_siphash.sql @@ -0,0 +1,274 @@ +-- Test Vectors from the SipHash reference C implementation: +-- Written in 2012 by +-- Jean-Philippe Aumasson +-- Daniel J. Bernstein +-- Released under CC0 + +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + '')); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62))); + +-- CH tests +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash64(char(0)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash64(char(0, 1)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash64(char(0, 1, 2)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash64(char(0, 1, 2, 3)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash64(char(0, 1, 2, 3, 4)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash64(char(0, 1, 2, 3, 4, 5)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128(char(0)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128(char(0, 1)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128(char(0, 1, 2)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128(char(0, 1, 2, 3)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128(char(0, 1, 2, 3, 4)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128(char(0, 1, 2, 3, 4, 5)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); + +select sipHash64Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash128Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 } + +select hex(sipHash64()); +select hex(sipHash128()); +select hex(sipHash64Keyed()); +select hex(sipHash128Keyed()); From 5edf32192797832c4942c010ea8bd627ea331abc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 26 Jan 2023 13:58:02 +0100 Subject: [PATCH 15/42] Some docs fixes --- .../sql-reference/functions/hash-functions.md | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 937390dbe8b..ae6cdb7052d 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16). -## MD5 +## MD5 {#hash_functions-md5} Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 +## sipHash64 (#hash_functions-siphash64) Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. @@ -59,23 +59,24 @@ Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. sipHash64(par1,...) ``` -This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function. +This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function. -Function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: +The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. After hashing all the input parameters, the function gets the array of hashes. -2. Function takes the first and the second elements and calculates a hash for the array of them. -3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them. -4. The previous step is repeated for all the remaining elements of the initial hash array. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md). **Returned Value** A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data. + **Example** ```sql @@ -90,7 +91,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 ## sipHash64Keyed -Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that it takes the key as an argument instead of using a fixed value. +Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key. **Syntax** @@ -122,7 +123,7 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x',' ## sipHash128 -Produces a 128-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits. +Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits. **Syntax** @@ -132,13 +133,11 @@ sipHash128(par1,...) **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Same as for [sipHash64](#hash_functions-siphash64). **Returned value** -A 128-bit `SipHash` hash value. - -Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). **Example** @@ -158,8 +157,7 @@ Result: ## sipHash128Keyed -Produces a 128-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. -Differs from [sipHash128](#hash_functions-siphash128) in that it takes the key as an argument instead of using a fixed value. +Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key. **Syntax** From 4adb6288eb0b5ae1b33705624489dbb5436e8475 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Wed, 11 Jan 2023 22:06:02 +0000 Subject: [PATCH 16/42] Add arrayShuffle function --- .../functions/array-functions.md | 25 ++ src/Functions/array/arrayShuffle.cpp | 310 ++++++++++++++++++ .../0_stateless/02523_array_shuffle.reference | 18 + .../0_stateless/02523_array_shuffle.sql | 22 ++ 4 files changed, 375 insertions(+) create mode 100644 src/Functions/array/arrayShuffle.cpp create mode 100644 tests/queries/0_stateless/02523_array_shuffle.reference create mode 100644 tests/queries/0_stateless/02523_array_shuffle.sql diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 9d2f89c1837..dd4b7abfbeb 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1102,6 +1102,31 @@ SELECT arrayReverse([1, 2, 3]) Synonym for [“arrayReverse”](#arrayreverse) +## arrayShuffle(arr [, random_seed]) + +Returns an array of the same size as the original array containing the elements in shuffled order. Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. + +**Arguments** + +- `[arr]` — Input array. [Array](../data-types/array.md). +- 'random_seed` — Random seed manual override to produce stable results. Optional. [64 bit integer](../data-types/int-uint.md) + +**Example** + +Query: + +``` sql +SELECT arrayShuffle([1, 2, 3, 4], 41) +``` + +Result: + +``` text +┌─arrayShuffle([1, 2, 3, 4], 41)─┐ +│ [3,2,1,4] │ +└────────────────────────────────┘ +``` + ## arrayFlatten Converts an array of arrays to a flat array. diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp new file mode 100644 index 00000000000..00f8727db8d --- /dev/null +++ b/src/Functions/array/arrayShuffle.cpp @@ -0,0 +1,310 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Shuffle array elements + * arrayShuffle(arr) + * arrayShuffle(arr, seed) + */ +class FunctionArrayShuffle : public IFunction +{ +public: + static constexpr auto name = "arrayShuffle"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() > 2 || arguments.empty()) + { + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs 1..2 arguments; passed {}.", getName(), arguments.size()); + } + + const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); + if (!array_type) + throw Exception("Argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() == 2) + { + WhichDataType which(arguments[1]); + if (!which.isUInt() && !which.isInt()) + throw Exception{ + "Illegal type " + arguments[1]->getName() + " of argument of function " + getName() + " (must be UInt or Int)", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } + + return arguments[0]; + } + + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override; + +private: + template + static bool executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast &); + static bool executeFixedString(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng); + static bool executeString(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data, pcg64_fast & rng); + static bool executeGeneric(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast &); +}; + +ColumnPtr FunctionArrayShuffle::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const +{ + const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + auto res_ptr = array->cloneEmpty(); + ColumnArray & res = assert_cast(*res_ptr); + res.getOffsetsPtr() = array->getOffsetsPtr(); + + const IColumn & src_data = array->getData(); + const ColumnArray::Offsets & offsets = array->getOffsets(); + + IColumn & res_data = res.getData(); + + const ColumnNullable * src_nullable_col = typeid_cast(&src_data); + ColumnNullable * res_nullable_col = typeid_cast(&res_data); + + const IColumn * src_inner_col = src_nullable_col ? &src_nullable_col->getNestedColumn() : &src_data; + IColumn * res_inner_col = res_nullable_col ? &res_nullable_col->getNestedColumn() : &res_data; + + const auto seed = [&]() -> uint64_t + { + if (arguments.size() == 1) + return randomSeed(); + const auto * val = arguments[1].column.get(); + return val->getUInt(0); + }(); + pcg64_fast rng(seed); + + false // NOLINT + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) + || executeString(*src_inner_col, offsets, *res_inner_col, rng) + || executeFixedString(*src_inner_col, offsets, *res_inner_col, rng) + || executeGeneric(*src_inner_col, offsets, *res_inner_col, rng); + + if (src_nullable_col) + { + rng.seed(seed); + if (!executeNumber(src_nullable_col->getNullMapColumn(), offsets, res_nullable_col->getNullMapColumn(), rng)) + throw Exception( + "Illegal column " + src_nullable_col->getNullMapColumn().getName() + " of null map of the first argument of function " + + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return res_ptr; +} + +bool FunctionArrayShuffle::executeGeneric(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng) +{ + size_t size = src_offsets.size(); + res_data.reserve(size); + + IColumn::Permutation permutation; + ColumnArray::Offset prev_off = 0; + for (size_t i = 0; i < src_offsets.size(); ++i) + { + ColumnArray::Offset off = src_offsets[i]; + size_t count = off - prev_off; + + permutation.resize(count); + for (size_t idx = 0; idx < count; ++idx) + permutation[idx] = idx; + + std::shuffle(std::begin(permutation), std::end(permutation), rng); + + for (size_t unshuffled_idx = 0; unshuffled_idx != count; ++unshuffled_idx) + { + auto shuffled_idx = permutation[unshuffled_idx]; + res_data.insertFrom(src_data, shuffled_idx); + } + + prev_off = src_offsets[i]; + } + + return true; +} + +template +bool FunctionArrayShuffle::executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng) +{ + if (const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data)) + { + const PaddedPODArray & src_vec = src_data_concrete->getData(); + PaddedPODArray & res_vec = typeid_cast &>(res_data).getData(); + res_vec.resize(src_data.size()); + + ColumnArray::Offset prev_off = 0; + for (size_t i = 0; i < src_offsets.size(); ++i) + { + ColumnArray::Offset off = src_offsets[i]; + + // [prev_off, off) + const auto * src = &src_vec[prev_off]; + const auto * src_end = &src_vec[off]; + + if (src == src_end) + continue; + + auto * dst = &res_vec[prev_off]; + + size_t count = off - prev_off; + + memcpy(dst, src, count * sizeof(T)); + std::shuffle(dst, dst + count, rng); + + prev_off = off; + } + + return true; + } + else + return false; +} + +bool FunctionArrayShuffle::executeFixedString(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng) +{ + if (const ColumnFixedString * src_data_concrete = checkAndGetColumn(&src_data)) + { + const size_t n = src_data_concrete->getN(); + const ColumnFixedString::Chars & src_data_chars = src_data_concrete->getChars(); + ColumnFixedString::Chars & res_chars = typeid_cast(res_data).getChars(); + res_chars.resize(src_data_chars.size()); + + IColumn::Permutation permutation; + + ColumnArray::Offset prev_off = 0; + for (size_t i = 0; i < src_offsets.size(); ++i) + { + ColumnArray::Offset off = src_offsets[i]; + + const UInt8 * src = &src_data_chars[prev_off * n]; + size_t count = off - prev_off; + + if (count == 0) + continue; + + UInt8 * dst = &res_chars[prev_off * n]; + + + permutation.resize(count); + for (size_t idx = 0; idx < count; ++idx) + permutation[idx] = idx; + + std::shuffle(std::begin(permutation), std::end(permutation), rng); + + for (size_t unshuffled_idx = 0; unshuffled_idx != count; ++unshuffled_idx) + { + auto shuffled_idx = permutation[unshuffled_idx]; + memcpy(dst + unshuffled_idx * n, src + shuffled_idx * n, n); + } + + prev_off = off; + } + return true; + } + else + return false; +} + +bool FunctionArrayShuffle::executeString(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data, pcg64_fast & rng) +{ + if (const ColumnString * src_data_concrete = checkAndGetColumn(&src_data)) + { + const ColumnString::Offsets & src_string_offsets = src_data_concrete->getOffsets(); + ColumnString::Offsets & res_string_offsets = typeid_cast(res_data).getOffsets(); + + const ColumnString::Chars & src_data_chars = src_data_concrete->getChars(); + ColumnString::Chars & res_chars = typeid_cast(res_data).getChars(); + + res_string_offsets.resize(src_string_offsets.size()); + res_chars.resize(src_data_chars.size()); + + IColumn::Permutation permutation; + + ColumnArray::Offset arr_prev_off = 0; + ColumnString::Offset string_prev_off = 0; + + for (size_t i = 0; i < src_array_offsets.size(); ++i) + { + ColumnArray::Offset arr_off = src_array_offsets[i]; + + if (arr_off != arr_prev_off) + { + size_t string_count = arr_off - arr_prev_off; + + permutation.resize(string_count); + for (size_t idx = 0; idx < string_count; ++idx) + permutation[idx] = idx; + + std::shuffle(std::begin(permutation), std::end(permutation), rng); + + for (size_t unshuffled_idx = 0; unshuffled_idx < string_count; ++unshuffled_idx) + { + auto shuffled_idx = permutation[unshuffled_idx]; + + auto src_pos = src_string_offsets[arr_prev_off + shuffled_idx - 1]; + + size_t string_size = src_string_offsets[arr_prev_off + shuffled_idx] - src_pos; + + memcpySmallAllowReadWriteOverflow15(&res_chars[string_prev_off], &src_data_chars[src_pos], string_size); + + string_prev_off += string_size; + res_string_offsets[arr_prev_off + unshuffled_idx] = string_prev_off; + } + } + + arr_prev_off = arr_off; + } + + return true; + } + else + return false; +} + +REGISTER_FUNCTION(ArrayShuffle) +{ + factory.registerFunction(); +} + +} diff --git a/tests/queries/0_stateless/02523_array_shuffle.reference b/tests/queries/0_stateless/02523_array_shuffle.reference new file mode 100644 index 00000000000..a84be39b50a --- /dev/null +++ b/tests/queries/0_stateless/02523_array_shuffle.reference @@ -0,0 +1,18 @@ +[] +[] +[9223372036854775808] +[9223372036854775808] +[10,9,4,2,5,6,7,1,8,3] +[10.1,9,4,2,5,6,7,1,8,3] +[9223372036854775808,9,4,2,5,6,7,1,8,3] +[NULL,9,4,2,5,6,7,1,8,3] +['789','123','ABC','000','456'] +['789','123','ABC',NULL,'456'] +['imposter','storage','sensation','uniform','tiger','terminal'] +[NULL,'storage','sensation','uniform','tiger','terminal'] +[NULL] +[NULL,NULL] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[NULL,-2,-3,-4]] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] diff --git a/tests/queries/0_stateless/02523_array_shuffle.sql b/tests/queries/0_stateless/02523_array_shuffle.sql new file mode 100644 index 00000000000..46bb95fdcec --- /dev/null +++ b/tests/queries/0_stateless/02523_array_shuffle.sql @@ -0,0 +1,22 @@ +SELECT arrayShuffle([]); +SELECT arrayShuffle([], 0xbad_cafe); +SELECT arrayShuffle([9223372036854775808]); +SELECT arrayShuffle([9223372036854775808], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,10], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,10.1], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,NULL], 0xbad_cafe); +SELECT arrayShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), toFixedString('000', 3)], 0xbad_cafe); +SELECT arrayShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), NULL], 0xbad_cafe); +SELECT arrayShuffle(['storage','tiger','imposter','terminal','uniform','sensation'], 0xbad_cafe); +SELECT arrayShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 0xbad_cafe); +SELECT arrayShuffle([NULL]); +SELECT arrayShuffle([NULL,NULL]); +SELECT arrayShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); +SELECT arrayShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); +SELECT arrayShuffle(groupArray(x),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayShuffle(groupArray(toUInt64(x)),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayShuffle(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShuffle([1], 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShuffle([1], 1.1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShuffle([1], 0xcafe, 1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } \ No newline at end of file From 2355780737e6fa9a753bba2f5235d182c9ddc5cf Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Thu, 12 Jan 2023 20:47:53 +0000 Subject: [PATCH 17/42] Minor formatting --- .../functions/array-functions.md | 6 ++--- src/Functions/array/arrayShuffle.cpp | 23 +------------------ 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index dd4b7abfbeb..8888a2f9256 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1102,14 +1102,14 @@ SELECT arrayReverse([1, 2, 3]) Synonym for [“arrayReverse”](#arrayreverse) -## arrayShuffle(arr [, random_seed]) +## arrayShuffle(arr[, random_seed]) -Returns an array of the same size as the original array containing the elements in shuffled order. Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. +Returns an array of the same size as the original array containing the elements in shuffled order. Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. **Arguments** - `[arr]` — Input array. [Array](../data-types/array.md). -- 'random_seed` — Random seed manual override to produce stable results. Optional. [64 bit integer](../data-types/int-uint.md) +- 'random_seed` — Random seed manual override to produce stable results. Optional. [64 bit integer](../data-types/int-uint.md). **Example** diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 00f8727db8d..9d2a1c416a2 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -149,15 +149,12 @@ bool FunctionArrayShuffle::executeGeneric(const IColumn & src_data, const Column permutation.resize(count); for (size_t idx = 0; idx < count; ++idx) permutation[idx] = idx; - std::shuffle(std::begin(permutation), std::end(permutation), rng); - for (size_t unshuffled_idx = 0; unshuffled_idx != count; ++unshuffled_idx) { auto shuffled_idx = permutation[unshuffled_idx]; res_data.insertFrom(src_data, shuffled_idx); } - prev_off = src_offsets[i]; } @@ -178,23 +175,18 @@ bool FunctionArrayShuffle::executeNumber(const IColumn & src_data, const ColumnA { ColumnArray::Offset off = src_offsets[i]; - // [prev_off, off) const auto * src = &src_vec[prev_off]; const auto * src_end = &src_vec[off]; - if (src == src_end) continue; - auto * dst = &res_vec[prev_off]; - size_t count = off - prev_off; - memcpy(dst, src, count * sizeof(T)); + std::shuffle(dst, dst + count, rng); prev_off = off; } - return true; } else @@ -211,7 +203,6 @@ bool FunctionArrayShuffle::executeFixedString(const IColumn & src_data, const Co res_chars.resize(src_data_chars.size()); IColumn::Permutation permutation; - ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < src_offsets.size(); ++i) { @@ -225,11 +216,9 @@ bool FunctionArrayShuffle::executeFixedString(const IColumn & src_data, const Co UInt8 * dst = &res_chars[prev_off * n]; - permutation.resize(count); for (size_t idx = 0; idx < count; ++idx) permutation[idx] = idx; - std::shuffle(std::begin(permutation), std::end(permutation), rng); for (size_t unshuffled_idx = 0; unshuffled_idx != count; ++unshuffled_idx) @@ -237,7 +226,6 @@ bool FunctionArrayShuffle::executeFixedString(const IColumn & src_data, const Co auto shuffled_idx = permutation[unshuffled_idx]; memcpy(dst + unshuffled_idx * n, src + shuffled_idx * n, n); } - prev_off = off; } return true; @@ -260,14 +248,11 @@ bool FunctionArrayShuffle::executeString(const IColumn & src_data, const ColumnA res_chars.resize(src_data_chars.size()); IColumn::Permutation permutation; - ColumnArray::Offset arr_prev_off = 0; ColumnString::Offset string_prev_off = 0; - for (size_t i = 0; i < src_array_offsets.size(); ++i) { ColumnArray::Offset arr_off = src_array_offsets[i]; - if (arr_off != arr_prev_off) { size_t string_count = arr_off - arr_prev_off; @@ -275,27 +260,21 @@ bool FunctionArrayShuffle::executeString(const IColumn & src_data, const ColumnA permutation.resize(string_count); for (size_t idx = 0; idx < string_count; ++idx) permutation[idx] = idx; - std::shuffle(std::begin(permutation), std::end(permutation), rng); for (size_t unshuffled_idx = 0; unshuffled_idx < string_count; ++unshuffled_idx) { auto shuffled_idx = permutation[unshuffled_idx]; - auto src_pos = src_string_offsets[arr_prev_off + shuffled_idx - 1]; - size_t string_size = src_string_offsets[arr_prev_off + shuffled_idx] - src_pos; - memcpySmallAllowReadWriteOverflow15(&res_chars[string_prev_off], &src_data_chars[src_pos], string_size); string_prev_off += string_size; res_string_offsets[arr_prev_off + unshuffled_idx] = string_prev_off; } } - arr_prev_off = arr_off; } - return true; } else From bc97dcb763a09e714ec4e8be5c7bb1371c03289c Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sat, 14 Jan 2023 11:21:12 +0000 Subject: [PATCH 18/42] Fix typo --- docs/en/sql-reference/functions/array-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 8888a2f9256..2c9fc601f06 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1109,7 +1109,7 @@ Returns an array of the same size as the original array containing the elements **Arguments** - `[arr]` — Input array. [Array](../data-types/array.md). -- 'random_seed` — Random seed manual override to produce stable results. Optional. [64 bit integer](../data-types/int-uint.md). +- `random_seed` — Random seed manual override to produce stable results. Optional. [64 bit integer](../data-types/int-uint.md). **Example** From 3c360fe96395421972700cb935322da14b587d3a Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sat, 14 Jan 2023 21:01:40 +0000 Subject: [PATCH 19/42] FIXUP - function is documented test --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index d225cf5f332..3a7f3006d62 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -130,6 +130,7 @@ arrayReverse arrayReverseFill arrayReverseSort arrayReverseSplit +arrayShuffle arraySlice arraySort arraySplit From 01624e2f23a41ab52ea9f4a74e4fea1c01ac956d Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sat, 14 Jan 2023 21:30:20 +0000 Subject: [PATCH 20/42] FIXUP: style --- src/Functions/array/arrayShuffle.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 9d2a1c416a2..79ffb0f41c5 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -54,9 +54,9 @@ public: { WhichDataType which(arguments[1]); if (!which.isUInt() && !which.isInt()) - throw Exception{ + throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName() + " (must be UInt or Int)", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } return arguments[0]; From 8d8d1bb8878122538e77e8329fbdf33163f9d034 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sun, 15 Jan 2023 10:38:44 +0000 Subject: [PATCH 21/42] FIXUP: make clang-tidy happier --- src/Functions/array/arrayShuffle.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 79ffb0f41c5..9d0bf8d0706 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -141,9 +141,8 @@ bool FunctionArrayShuffle::executeGeneric(const IColumn & src_data, const Column IColumn::Permutation permutation; ColumnArray::Offset prev_off = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + for (auto off: src_offsets) { - ColumnArray::Offset off = src_offsets[i]; size_t count = off - prev_off; permutation.resize(count); @@ -155,7 +154,7 @@ bool FunctionArrayShuffle::executeGeneric(const IColumn & src_data, const Column auto shuffled_idx = permutation[unshuffled_idx]; res_data.insertFrom(src_data, shuffled_idx); } - prev_off = src_offsets[i]; + prev_off = off; } return true; @@ -171,10 +170,8 @@ bool FunctionArrayShuffle::executeNumber(const IColumn & src_data, const ColumnA res_vec.resize(src_data.size()); ColumnArray::Offset prev_off = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + for (auto off: src_offsets) { - ColumnArray::Offset off = src_offsets[i]; - const auto * src = &src_vec[prev_off]; const auto * src_end = &src_vec[off]; if (src == src_end) @@ -204,10 +201,8 @@ bool FunctionArrayShuffle::executeFixedString(const IColumn & src_data, const Co IColumn::Permutation permutation; ColumnArray::Offset prev_off = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + for (auto off: src_offsets) { - ColumnArray::Offset off = src_offsets[i]; - const UInt8 * src = &src_data_chars[prev_off * n]; size_t count = off - prev_off; @@ -250,9 +245,8 @@ bool FunctionArrayShuffle::executeString(const IColumn & src_data, const ColumnA IColumn::Permutation permutation; ColumnArray::Offset arr_prev_off = 0; ColumnString::Offset string_prev_off = 0; - for (size_t i = 0; i < src_array_offsets.size(); ++i) + for (auto arr_off: src_array_offsets) { - ColumnArray::Offset arr_off = src_array_offsets[i]; if (arr_off != arr_prev_off) { size_t string_count = arr_off - arr_prev_off; From 000c19f05b97df45636a7623ef5ca4d646d5efd8 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Mon, 16 Jan 2023 18:46:02 +0000 Subject: [PATCH 22/42] FIXUP: more tests - array of tuples --- tests/queries/0_stateless/02523_array_shuffle.reference | 2 ++ tests/queries/0_stateless/02523_array_shuffle.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02523_array_shuffle.reference b/tests/queries/0_stateless/02523_array_shuffle.reference index a84be39b50a..a92ad2a05c6 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.reference +++ b/tests/queries/0_stateless/02523_array_shuffle.reference @@ -16,3 +16,5 @@ [[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[NULL,-2,-3,-4]] [10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] [10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[(3,-3),(1,-1),(99999999,-99999999)] +[(3,'A'),(1,NULL),(2,'a')] diff --git a/tests/queries/0_stateless/02523_array_shuffle.sql b/tests/queries/0_stateless/02523_array_shuffle.sql index 46bb95fdcec..ecbc9e649d4 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.sql +++ b/tests/queries/0_stateless/02523_array_shuffle.sql @@ -16,6 +16,8 @@ SELECT arrayShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500, SELECT arrayShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); SELECT arrayShuffle(groupArray(x),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); SELECT arrayShuffle(groupArray(toUInt64(x)),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 0xbad_cafe); +SELECT arrayShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 0xbad_cafe); SELECT arrayShuffle(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayShuffle([1], 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayShuffle([1], 1.1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From c09a4dd132f1120f85e19ebc692ca02531bfb57b Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Mon, 16 Jan 2023 18:56:47 +0000 Subject: [PATCH 23/42] FIXUP: Docs into code --- .../functions/array-functions.md | 25 ------------------- src/Functions/array/arrayShuffle.cpp | 19 +++++++++++++- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 2c9fc601f06..9d2f89c1837 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1102,31 +1102,6 @@ SELECT arrayReverse([1, 2, 3]) Synonym for [“arrayReverse”](#arrayreverse) -## arrayShuffle(arr[, random_seed]) - -Returns an array of the same size as the original array containing the elements in shuffled order. Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. - -**Arguments** - -- `[arr]` — Input array. [Array](../data-types/array.md). -- `random_seed` — Random seed manual override to produce stable results. Optional. [64 bit integer](../data-types/int-uint.md). - -**Example** - -Query: - -``` sql -SELECT arrayShuffle([1, 2, 3, 4], 41) -``` - -Result: - -``` text -┌─arrayShuffle([1, 2, 3, 4], 41)─┐ -│ [3,2,1,4] │ -└────────────────────────────────┘ -``` - ## arrayFlatten Converts an array of arrays to a flat array. diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 9d0bf8d0706..a0daa9c08ee 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -277,7 +277,24 @@ bool FunctionArrayShuffle::executeString(const IColumn & src_data, const ColumnA REGISTER_FUNCTION(ArrayShuffle) { - factory.registerFunction(); + factory.registerFunction( + { + R"( +Returns an array of the same size as the original array containing the elements in shuffled order. +Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. + +If no seed is provided a random one will be used: +[example:random_seed] + +It is possible to override the seed to produce stable results: +[example:explicit_seed] +)", + Documentation::Examples{ + {"random_seed", "SELECT arrayShuffle([1, 2, 3, 4])"}, + {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 41)"}}, + Documentation::Categories{"Array"} + }, + FunctionFactory::CaseInsensitive); } } From a65b2cf8615d64de82aa2b3d84522e23f77a8956 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Mon, 16 Jan 2023 19:16:32 +0000 Subject: [PATCH 24/42] FIXUP: Simplify logic by using permute function --- src/Functions/array/arrayShuffle.cpp | 197 +++------------------------ 1 file changed, 16 insertions(+), 181 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index a0daa9c08ee..8326ec1c196 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -12,6 +12,7 @@ #include #include +#include namespace DB { @@ -68,11 +69,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override; private: - template - static bool executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast &); - static bool executeFixedString(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng); - static bool executeString(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data, pcg64_fast & rng); - static bool executeGeneric(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast &); + static ColumnPtr executeGeneric(const ColumnArray & array, ColumnPtr mapped, pcg64_fast & rng); }; ColumnPtr FunctionArrayShuffle::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const @@ -82,21 +79,6 @@ ColumnPtr FunctionArrayShuffle::executeImpl(const ColumnsWithTypeAndName & argum throw Exception( "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - auto res_ptr = array->cloneEmpty(); - ColumnArray & res = assert_cast(*res_ptr); - res.getOffsetsPtr() = array->getOffsetsPtr(); - - const IColumn & src_data = array->getData(); - const ColumnArray::Offsets & offsets = array->getOffsets(); - - IColumn & res_data = res.getData(); - - const ColumnNullable * src_nullable_col = typeid_cast(&src_data); - ColumnNullable * res_nullable_col = typeid_cast(&res_data); - - const IColumn * src_inner_col = src_nullable_col ? &src_nullable_col->getNestedColumn() : &src_data; - IColumn * res_inner_col = res_nullable_col ? &res_nullable_col->getNestedColumn() : &res_data; - const auto seed = [&]() -> uint64_t { if (arguments.size() == 1) @@ -106,173 +88,26 @@ ColumnPtr FunctionArrayShuffle::executeImpl(const ColumnsWithTypeAndName & argum }(); pcg64_fast rng(seed); - false // NOLINT - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeNumber(*src_inner_col, offsets, *res_inner_col, rng) - || executeString(*src_inner_col, offsets, *res_inner_col, rng) - || executeFixedString(*src_inner_col, offsets, *res_inner_col, rng) - || executeGeneric(*src_inner_col, offsets, *res_inner_col, rng); - - if (src_nullable_col) - { - rng.seed(seed); - if (!executeNumber(src_nullable_col->getNullMapColumn(), offsets, res_nullable_col->getNullMapColumn(), rng)) - throw Exception( - "Illegal column " + src_nullable_col->getNullMapColumn().getName() + " of null map of the first argument of function " - + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - - return res_ptr; + return executeGeneric(*array, array->getDataPtr(), rng); } -bool FunctionArrayShuffle::executeGeneric(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng) +ColumnPtr FunctionArrayShuffle::executeGeneric(const ColumnArray & array, ColumnPtr /*mapped*/, pcg64_fast & rng) { - size_t size = src_offsets.size(); - res_data.reserve(size); + const ColumnArray::Offsets & offsets = array.getOffsets(); - IColumn::Permutation permutation; - ColumnArray::Offset prev_off = 0; - for (auto off: src_offsets) + size_t size = offsets.size(); + size_t nested_size = array.getData().size(); + IColumn::Permutation permutation(nested_size); + std::iota(std::begin(permutation), std::end(permutation), 0); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) { - size_t count = off - prev_off; - - permutation.resize(count); - for (size_t idx = 0; idx < count; ++idx) - permutation[idx] = idx; - std::shuffle(std::begin(permutation), std::end(permutation), rng); - for (size_t unshuffled_idx = 0; unshuffled_idx != count; ++unshuffled_idx) - { - auto shuffled_idx = permutation[unshuffled_idx]; - res_data.insertFrom(src_data, shuffled_idx); - } - prev_off = off; + auto next_offset = offsets[i]; + std::shuffle(&permutation[current_offset], &permutation[next_offset], rng); + current_offset = next_offset; } - - return true; -} - -template -bool FunctionArrayShuffle::executeNumber(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng) -{ - if (const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data)) - { - const PaddedPODArray & src_vec = src_data_concrete->getData(); - PaddedPODArray & res_vec = typeid_cast &>(res_data).getData(); - res_vec.resize(src_data.size()); - - ColumnArray::Offset prev_off = 0; - for (auto off: src_offsets) - { - const auto * src = &src_vec[prev_off]; - const auto * src_end = &src_vec[off]; - if (src == src_end) - continue; - auto * dst = &res_vec[prev_off]; - size_t count = off - prev_off; - memcpy(dst, src, count * sizeof(T)); - - std::shuffle(dst, dst + count, rng); - - prev_off = off; - } - return true; - } - else - return false; -} - -bool FunctionArrayShuffle::executeFixedString(const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data, pcg64_fast & rng) -{ - if (const ColumnFixedString * src_data_concrete = checkAndGetColumn(&src_data)) - { - const size_t n = src_data_concrete->getN(); - const ColumnFixedString::Chars & src_data_chars = src_data_concrete->getChars(); - ColumnFixedString::Chars & res_chars = typeid_cast(res_data).getChars(); - res_chars.resize(src_data_chars.size()); - - IColumn::Permutation permutation; - ColumnArray::Offset prev_off = 0; - for (auto off: src_offsets) - { - const UInt8 * src = &src_data_chars[prev_off * n]; - size_t count = off - prev_off; - - if (count == 0) - continue; - - UInt8 * dst = &res_chars[prev_off * n]; - - permutation.resize(count); - for (size_t idx = 0; idx < count; ++idx) - permutation[idx] = idx; - std::shuffle(std::begin(permutation), std::end(permutation), rng); - - for (size_t unshuffled_idx = 0; unshuffled_idx != count; ++unshuffled_idx) - { - auto shuffled_idx = permutation[unshuffled_idx]; - memcpy(dst + unshuffled_idx * n, src + shuffled_idx * n, n); - } - prev_off = off; - } - return true; - } - else - return false; -} - -bool FunctionArrayShuffle::executeString(const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, IColumn & res_data, pcg64_fast & rng) -{ - if (const ColumnString * src_data_concrete = checkAndGetColumn(&src_data)) - { - const ColumnString::Offsets & src_string_offsets = src_data_concrete->getOffsets(); - ColumnString::Offsets & res_string_offsets = typeid_cast(res_data).getOffsets(); - - const ColumnString::Chars & src_data_chars = src_data_concrete->getChars(); - ColumnString::Chars & res_chars = typeid_cast(res_data).getChars(); - - res_string_offsets.resize(src_string_offsets.size()); - res_chars.resize(src_data_chars.size()); - - IColumn::Permutation permutation; - ColumnArray::Offset arr_prev_off = 0; - ColumnString::Offset string_prev_off = 0; - for (auto arr_off: src_array_offsets) - { - if (arr_off != arr_prev_off) - { - size_t string_count = arr_off - arr_prev_off; - - permutation.resize(string_count); - for (size_t idx = 0; idx < string_count; ++idx) - permutation[idx] = idx; - std::shuffle(std::begin(permutation), std::end(permutation), rng); - - for (size_t unshuffled_idx = 0; unshuffled_idx < string_count; ++unshuffled_idx) - { - auto shuffled_idx = permutation[unshuffled_idx]; - auto src_pos = src_string_offsets[arr_prev_off + shuffled_idx - 1]; - size_t string_size = src_string_offsets[arr_prev_off + shuffled_idx] - src_pos; - memcpySmallAllowReadWriteOverflow15(&res_chars[string_prev_off], &src_data_chars[src_pos], string_size); - - string_prev_off += string_size; - res_string_offsets[arr_prev_off + unshuffled_idx] = string_prev_off; - } - } - arr_prev_off = arr_off; - } - return true; - } - else - return false; + return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr()); } REGISTER_FUNCTION(ArrayShuffle) From b9bd0ed4f6985c7d5e941f854d4bafc2c6010249 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Tue, 17 Jan 2023 19:51:29 +0000 Subject: [PATCH 25/42] FIXUP: after in-code documentation update --- .../02415_all_new_functions_must_be_documented.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 3a7f3006d62..d225cf5f332 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -130,7 +130,6 @@ arrayReverse arrayReverseFill arrayReverseSort arrayReverseSplit -arrayShuffle arraySlice arraySort arraySplit From 09789b027f3ccaf09566a34f1780427bf0f5aa99 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Tue, 17 Jan 2023 19:51:53 +0000 Subject: [PATCH 26/42] FIXUP: PR comments --- src/Functions/array/arrayShuffle.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 8326ec1c196..3941eb7271d 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -38,18 +38,19 @@ public: String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() > 2 || arguments.empty()) { throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs 1..2 arguments; passed {}.", getName(), arguments.size()); + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function '{}' needs 1 or 2 arguments, passed {}.", getName(), arguments.size()); } const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) - throw Exception("Argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function '{}' must be array", getName()); if (arguments.size() == 2) { From 31eb936457902c91cdf1beb2273e667a9a1f7b4c Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 20 Jan 2023 18:40:00 +0000 Subject: [PATCH 27/42] Added Fisher-Yates shuffle and partial-shuffle --- src/Common/shuffle.h | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/Common/shuffle.h diff --git a/src/Common/shuffle.h b/src/Common/shuffle.h new file mode 100644 index 00000000000..f2477db0352 --- /dev/null +++ b/src/Common/shuffle.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +/* Reorders the elements in the given range [first, last) such that each + * possible permutation of those elements has equal probability of appearance. + */ +template +void shuffle(Iter first, Iter last, Rng && rng) +{ + using diff_t = typename std::iterator_traits::difference_type; + using distr_t = std::uniform_int_distribution; + using param_t = typename distr_t::param_type; + distr_t d; + diff_t n = last - first; + for (ssize_t i = 0; i < n - 1; ++i) + { + using std::swap; + auto j = d(rng, param_t(i, n - 1)); + swap(first[i], first[j]); + } +} + + +/* Partially shuffle elements in range [first, last) in such a way that + * [first, first + limit) is a random subset of the original range. + * [first + limit, last) shall contain the elements not in [first, first + limit) + * in undefined order. + */ +template +void partial_shuffle(Iter first, Iter last, size_t limit, Rng && rng) +{ + using diff_t = typename std::iterator_traits::difference_type; + using distr_t = std::uniform_int_distribution; + using param_t = typename distr_t::param_type; + distr_t d; + diff_t n = last - first; + for (size_t i = 0; i < limit; ++i) + { + using std::swap; + auto j = d(rng, param_t(i, n - 1)); + swap(first[i], first[j]); + } +} From a8b78abc543863ee404395c9b24145a28c6aee57 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 20 Jan 2023 20:39:33 +0000 Subject: [PATCH 28/42] Added arrayPartialShuffle function --- src/Functions/array/arrayShuffle.cpp | 125 ++++++++++++++---- .../0_stateless/02523_array_shuffle.reference | 42 ++++++ .../0_stateless/02523_array_shuffle.sql | 42 ++++++ 3 files changed, 186 insertions(+), 23 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 3941eb7271d..47608a8524e 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -6,11 +6,13 @@ #include #include #include -#include #include #include +#include #include +#include + #include #include @@ -28,52 +30,83 @@ namespace ErrorCodes * arrayShuffle(arr) * arrayShuffle(arr, seed) */ -class FunctionArrayShuffle : public IFunction +struct FunctionArrayShuffleTraits +{ + static constexpr auto name = "arrayShuffle"; + static constexpr auto has_limit = false; // Permute the whole array + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static constexpr auto max_num_params = 2; // array[, seed] + static constexpr auto seed_param_idx = 1; +}; + +/** Partial shuffle array elements + * arrayPartialShuffle(arr) + * arrayPartialShuffle(arr, limit) + * arrayPartialShuffle(arr, limit, seed) + */ +struct FunctionArrayPartialShuffleTraits +{ + static constexpr auto name = "arrayPartialShuffle"; + static constexpr auto has_limit = true; + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2}; } + static constexpr auto max_num_params = 3; // array[, limit[, seed]] + static constexpr auto seed_param_idx = 2; +}; + +template +class FunctionArrayShuffleImpl : public IFunction { public: - static constexpr auto name = "arrayShuffle"; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Traits::name; String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return Traits::getArgumentsThatAreAlwaysConstant(); } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() > 2 || arguments.empty()) + if (arguments.size() > Traits::max_num_params || arguments.empty()) { throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function '{}' needs 1 or 2 arguments, passed {}.", getName(), arguments.size()); + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function '{}' needs from 1 to {} arguments, passed {}.", + getName(), + Traits::max_num_params, + arguments.size()); } const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function '{}' must be array", getName()); - if (arguments.size() == 2) + auto check_is_integral = [&](auto param_idx) { - WhichDataType which(arguments[1]); + WhichDataType which(arguments[param_idx]); if (!which.isUInt() && !which.isInt()) throw Exception( - "Illegal type " + arguments[1]->getName() + " of argument of function " + getName() + " (must be UInt or Int)", + "Illegal type " + arguments[param_idx]->getName() + " of argument of function " + getName() + " (must be UInt or Int)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } + }; + + for (size_t idx = 1; idx < arguments.size(); ++idx) + check_is_integral(idx); return arguments[0]; } - bool useDefaultImplementationForConstants() const override { return true; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override; private: - static ColumnPtr executeGeneric(const ColumnArray & array, ColumnPtr mapped, pcg64_fast & rng); + static ColumnPtr executeGeneric(const ColumnArray & array, pcg64_fast & rng, size_t limit); }; -ColumnPtr FunctionArrayShuffle::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const +template +ColumnPtr FunctionArrayShuffleImpl::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const { const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); if (!array) @@ -82,17 +115,32 @@ ColumnPtr FunctionArrayShuffle::executeImpl(const ColumnsWithTypeAndName & argum const auto seed = [&]() -> uint64_t { - if (arguments.size() == 1) + // If present, seed comes as the last argument + if (arguments.size() != Traits::max_num_params) return randomSeed(); - const auto * val = arguments[1].column.get(); + const auto * val = arguments[Traits::seed_param_idx].column.get(); return val->getUInt(0); }(); pcg64_fast rng(seed); - return executeGeneric(*array, array->getDataPtr(), rng); + size_t limit = [&] + { + if constexpr (Traits::has_limit) + { + if (arguments.size() > 1) + { + const auto * val = arguments[1].column.get(); + return val->getUInt(0); + } + } + return static_cast(0); + }(); + + return executeGeneric(*array, rng, limit); } -ColumnPtr FunctionArrayShuffle::executeGeneric(const ColumnArray & array, ColumnPtr /*mapped*/, pcg64_fast & rng) +template +ColumnPtr FunctionArrayShuffleImpl::executeGeneric(const ColumnArray & array, pcg64_fast & rng, size_t limit [[maybe_unused]]) { const ColumnArray::Offsets & offsets = array.getOffsets(); @@ -105,7 +153,15 @@ ColumnPtr FunctionArrayShuffle::executeGeneric(const ColumnArray & array, Column for (size_t i = 0; i < size; ++i) { auto next_offset = offsets[i]; - std::shuffle(&permutation[current_offset], &permutation[next_offset], rng); + if constexpr (Traits::has_limit) + { + if (limit && next_offset > limit) + { + partial_shuffle(&permutation[current_offset], &permutation[next_offset], limit, rng); + break; + } + } + shuffle(&permutation[current_offset], &permutation[next_offset], rng); current_offset = next_offset; } return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr()); @@ -113,7 +169,7 @@ ColumnPtr FunctionArrayShuffle::executeGeneric(const ColumnArray & array, Column REGISTER_FUNCTION(ArrayShuffle) { - factory.registerFunction( + factory.registerFunction>( { R"( Returns an array of the same size as the original array containing the elements in shuffled order. @@ -131,6 +187,29 @@ It is possible to override the seed to produce stable results: Documentation::Categories{"Array"} }, FunctionFactory::CaseInsensitive); + factory.registerFunction>( + { + R"( +Returns an array of the same size as the original array where elements in range [0..limit) are a random +subset of the original array. Remaining [limit..n) shall contain the elements not in [0..limit) range in undefined order. +Value of limit shall be in range [0..n]. Values outside of that range are equivalent to performing full arrayShuffle: +[example:no_limit1] +[example:no_limit2] + +If no seed is provided a random one will be used: +[example:random_seed] + +It is possible to override the seed to produce stable results: +[example:explicit_seed] +)", + Documentation::Examples{ + {"no_limit1", "SELECT arrayPartialShuffle([1, 2, 3, 4], 0)"}, + {"no_limit2", "SELECT arrayPartialShuffle([1, 2, 3, 4])"}, + {"random_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2)"}, + {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 2, 41)"}}, + Documentation::Categories{"Array"} + }, + FunctionFactory::CaseInsensitive); } } diff --git a/tests/queries/0_stateless/02523_array_shuffle.reference b/tests/queries/0_stateless/02523_array_shuffle.reference index a92ad2a05c6..2263f8dc92a 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.reference +++ b/tests/queries/0_stateless/02523_array_shuffle.reference @@ -18,3 +18,45 @@ [10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] [(3,-3),(1,-1),(99999999,-99999999)] [(3,'A'),(1,NULL),(2,'a')] +[] +[] +[] +[9223372036854775808] +[9223372036854775808] +[9223372036854775808] +[10,9,4,2,5,6,7,1,8,3] +[10.1,9,4,2,5,6,7,1,8,3] +[9223372036854775808,9,4,2,5,6,7,1,8,3] +[NULL,9,4,2,5,6,7,1,8,3] +['789','123','ABC','000','456'] +['789','123','ABC',NULL,'456'] +['imposter','storage','sensation','uniform','tiger','terminal'] +[NULL,'storage','sensation','uniform','tiger','terminal'] +[NULL] +[NULL,NULL] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[NULL,-2,-3,-4]] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[(3,-3),(1,-1),(99999999,-99999999)] +[(3,'A'),(1,NULL),(2,'a')] +[NULL,NULL,NULL] +[10,2,3,4,5,6,7,8,9,1] +[10,9,3,4,5,6,7,8,2,1] +[10,9,4,2,5,6,7,8,3,1] +[10,9,4,2,5,6,7,1,3,8] +[10,9,4,2,5,6,7,1,8,3] +[10,9,4,2,5,6,7,1,8,3] +[10.1,9,4,2,5,6,7,8,3,1] +[9223372036854775808,9,4,2,5,6,7,8,3,1] +[NULL,9,4,2,5,6,7,8,3,1] +['789','123','ABC','456','000'] +['789','123','ABC','456',NULL] +['imposter','storage','sensation','terminal','uniform','tiger'] +[NULL,'storage','sensation','terminal','uniform','tiger'] +[[10,20,30,40],[1,2,3,4],[-1,-2,-3,-4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]] +[[10,20,30,40],[1,2,3,4],[NULL,-2,-3,-4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,20,21,22,23,24,25,26,27,28,29,17,31,15,33,34,2,36,37,38,39,40,41,42,43,8,45,6,47,48,49,50,16,52,14,54,55,56,57,58,59,60,61,62,63,64,65,66,67,19,69,70,7,1,4,74,75,5,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,12,98,99] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,20,21,22,23,24,25,26,27,28,29,17,31,15,33,34,2,36,37,38,39,40,41,42,43,8,45,6,47,48,49,50,16,52,14,54,55,56,57,58,59,60,61,62,63,64,65,66,67,19,69,70,7,1,4,74,75,5,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,12,98,99] +[(3,-3),(1,-1),(99999999,-99999999)] +[(3,'A'),(1,NULL),(2,'a')] diff --git a/tests/queries/0_stateless/02523_array_shuffle.sql b/tests/queries/0_stateless/02523_array_shuffle.sql index ecbc9e649d4..dfeb75e01c5 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.sql +++ b/tests/queries/0_stateless/02523_array_shuffle.sql @@ -18,6 +18,48 @@ SELECT arrayShuffle(groupArray(x),0xbad_cafe) FROM (SELECT number as x from syst SELECT arrayShuffle(groupArray(toUInt64(x)),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); SELECT arrayShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 0xbad_cafe); SELECT arrayShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 0xbad_cafe); +SELECT arrayPartialShuffle([]); -- trivial cases (equivalent to arrayShuffle) +SELECT arrayPartialShuffle([], 0); +SELECT arrayPartialShuffle([], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([9223372036854775808]); +SELECT arrayPartialShuffle([9223372036854775808], 0); +SELECT arrayPartialShuffle([9223372036854775808], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10.1], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,NULL], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), toFixedString('000', 3)], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), NULL], 0, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger','imposter','terminal','uniform','sensation'], 0, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([NULL]); +SELECT arrayPartialShuffle([NULL,NULL]); +SELECT arrayPartialShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0, 0xbad_cafe); +SELECT arrayPartialShuffle(groupArray(x),0,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle(groupArray(toUInt64(x)),0,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([NULL,NULL,NULL], 2); -- other, mostly non-trivial cases +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 1, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 2, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 8, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 9, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 10, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10.1], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,NULL], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), toFixedString('000', 3)], 3, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), NULL], 3, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger','imposter','terminal','uniform','sensation'], 3, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 3, 0xbad_cafe); +SELECT arrayPartialShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 2, 0xbad_cafe); +SELECT arrayPartialShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 2, 0xbad_cafe); +SELECT arrayPartialShuffle(groupArray(x),20,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle(groupArray(toUInt64(x)),20,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 2, 0xbad_cafe); +SELECT arrayPartialShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 2, 0xbad_cafe); SELECT arrayShuffle(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayShuffle([1], 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayShuffle([1], 1.1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From d378e453c14987a1297b5b3f11038ab785b3c751 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sat, 21 Jan 2023 11:20:56 +0000 Subject: [PATCH 29/42] FIXUP: fix in arrayPartialShuffle --- src/Functions/array/arrayShuffle.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 47608a8524e..9f95c7f67f1 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -155,13 +155,13 @@ ColumnPtr FunctionArrayShuffleImpl::executeGeneric(const ColumnArray & a auto next_offset = offsets[i]; if constexpr (Traits::has_limit) { - if (limit && next_offset > limit) - { + if (limit) partial_shuffle(&permutation[current_offset], &permutation[next_offset], limit, rng); - break; - } + else + shuffle(&permutation[current_offset], &permutation[next_offset], rng); } - shuffle(&permutation[current_offset], &permutation[next_offset], rng); + else + shuffle(&permutation[current_offset], &permutation[next_offset], rng); current_offset = next_offset; } return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr()); From b0ba8c02bef755e22e37c3eca192381607461fe2 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sat, 21 Jan 2023 11:35:17 +0000 Subject: [PATCH 30/42] FIXUP: Darwin compilation issue --- src/Functions/array/arrayShuffle.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 9f95c7f67f1..b2432e650c4 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -123,7 +123,7 @@ ColumnPtr FunctionArrayShuffleImpl::executeImpl(const ColumnsWithTypeAnd }(); pcg64_fast rng(seed); - size_t limit = [&] + size_t limit = [&]() -> size_t { if constexpr (Traits::has_limit) { @@ -133,7 +133,7 @@ ColumnPtr FunctionArrayShuffleImpl::executeImpl(const ColumnsWithTypeAnd return val->getUInt(0); } } - return static_cast(0); + return 0; }(); return executeGeneric(*array, rng, limit); From e1d281206f3696768fd89afb31196c522c2104db Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sat, 21 Jan 2023 14:08:30 +0000 Subject: [PATCH 31/42] Clamp the limit for arrayPartialShuffle --- src/Functions/array/arrayShuffle.cpp | 5 ++++- tests/queries/0_stateless/02523_array_shuffle.reference | 1 + tests/queries/0_stateless/02523_array_shuffle.sql | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index b2432e650c4..b9c16fc9a07 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -156,7 +156,10 @@ ColumnPtr FunctionArrayShuffleImpl::executeGeneric(const ColumnArray & a if constexpr (Traits::has_limit) { if (limit) - partial_shuffle(&permutation[current_offset], &permutation[next_offset], limit, rng); + { + const auto effective_limit = std::min(limit, next_offset - current_offset); + partial_shuffle(&permutation[current_offset], &permutation[next_offset], effective_limit, rng); + } else shuffle(&permutation[current_offset], &permutation[next_offset], rng); } diff --git a/tests/queries/0_stateless/02523_array_shuffle.reference b/tests/queries/0_stateless/02523_array_shuffle.reference index 2263f8dc92a..db5d1b06342 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.reference +++ b/tests/queries/0_stateless/02523_array_shuffle.reference @@ -47,6 +47,7 @@ [10,9,4,2,5,6,7,1,3,8] [10,9,4,2,5,6,7,1,8,3] [10,9,4,2,5,6,7,1,8,3] +[10,9,4,2,5,6,7,1,8,3] [10.1,9,4,2,5,6,7,8,3,1] [9223372036854775808,9,4,2,5,6,7,8,3,1] [NULL,9,4,2,5,6,7,8,3,1] diff --git a/tests/queries/0_stateless/02523_array_shuffle.sql b/tests/queries/0_stateless/02523_array_shuffle.sql index dfeb75e01c5..cec0b3fbd29 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.sql +++ b/tests/queries/0_stateless/02523_array_shuffle.sql @@ -47,6 +47,7 @@ SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 4, 0xbad_cafe); SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 8, 0xbad_cafe); SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 9, 0xbad_cafe); SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 10, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 100, 0xbad_cafe); SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10.1], 4, 0xbad_cafe); SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 4, 0xbad_cafe); SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,NULL], 4, 0xbad_cafe); From 3b472eb2dd18ef707fce0dfafaa2790f57158e7b Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Sun, 22 Jan 2023 13:38:48 +0000 Subject: [PATCH 32/42] FIXUP: Darwin compilation issue --- src/Functions/array/arrayShuffle.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index b9c16fc9a07..6532fb89ae8 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -157,7 +157,7 @@ ColumnPtr FunctionArrayShuffleImpl::executeGeneric(const ColumnArray & a { if (limit) { - const auto effective_limit = std::min(limit, next_offset - current_offset); + const auto effective_limit = std::min(limit, next_offset - current_offset); partial_shuffle(&permutation[current_offset], &permutation[next_offset], effective_limit, rng); } else From 8791a44e01d96ac5bbbacb9097eb49064724e926 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 27 Jan 2023 18:01:54 +0000 Subject: [PATCH 33/42] FIXUP: Added info and test for materialized array --- src/Functions/array/arrayShuffle.cpp | 18 ++++++++++----- .../0_stateless/02523_array_shuffle.reference | 22 +++++++++++++++++++ .../0_stateless/02523_array_shuffle.sql | 4 ++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 6532fb89ae8..0c5696d1d37 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -178,6 +178,9 @@ REGISTER_FUNCTION(ArrayShuffle) Returns an array of the same size as the original array containing the elements in shuffled order. Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. +Note: this function will not materialize constants: +[example:materialize] + If no seed is provided a random one will be used: [example:random_seed] @@ -186,19 +189,23 @@ It is possible to override the seed to produce stable results: )", Documentation::Examples{ {"random_seed", "SELECT arrayShuffle([1, 2, 3, 4])"}, - {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 41)"}}, + {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 41)"}, + {"materialize", "SELECT arrayShuffle(materialize([1, 2, 3]), 42), arrayShuffle([1, 2, 3], 42) FROM numbers(10)"}}, Documentation::Categories{"Array"} }, FunctionFactory::CaseInsensitive); factory.registerFunction>( { R"( -Returns an array of the same size as the original array where elements in range [0..limit) are a random -subset of the original array. Remaining [limit..n) shall contain the elements not in [0..limit) range in undefined order. -Value of limit shall be in range [0..n]. Values outside of that range are equivalent to performing full arrayShuffle: +Returns an array of the same size as the original array where elements in range [1..limit] are a random +subset of the original array. Remaining (limit..n] shall contain the elements not in [1..limit] range in undefined order. +Value of limit shall be in range [1..n]. Values outside of that range are equivalent to performing full arrayShuffle: [example:no_limit1] [example:no_limit2] +Note: this function will not materialize constants: +[example:materialize] + If no seed is provided a random one will be used: [example:random_seed] @@ -209,7 +216,8 @@ It is possible to override the seed to produce stable results: {"no_limit1", "SELECT arrayPartialShuffle([1, 2, 3, 4], 0)"}, {"no_limit2", "SELECT arrayPartialShuffle([1, 2, 3, 4])"}, {"random_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2)"}, - {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 2, 41)"}}, + {"explicit_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2, 41)"}, + {"materialize", "SELECT arrayPartialShuffle(materialize([1, 2, 3, 4]), 2, 42), arrayPartialShuffle([1, 2, 3], 2, 42) FROM numbers(10)"}}, Documentation::Categories{"Array"} }, FunctionFactory::CaseInsensitive); diff --git a/tests/queries/0_stateless/02523_array_shuffle.reference b/tests/queries/0_stateless/02523_array_shuffle.reference index db5d1b06342..0504da61f9d 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.reference +++ b/tests/queries/0_stateless/02523_array_shuffle.reference @@ -3,6 +3,7 @@ [9223372036854775808] [9223372036854775808] [10,9,4,2,5,6,7,1,8,3] +[10,9,4,2,5,6,7,1,8,3] [10.1,9,4,2,5,6,7,1,8,3] [9223372036854775808,9,4,2,5,6,7,1,8,3] [NULL,9,4,2,5,6,7,1,8,3] @@ -13,6 +14,7 @@ [NULL] [NULL,NULL] [[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] [[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[NULL,-2,-3,-4]] [10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] [10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] @@ -61,3 +63,23 @@ [10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,20,21,22,23,24,25,26,27,28,29,17,31,15,33,34,2,36,37,38,39,40,41,42,43,8,45,6,47,48,49,50,16,52,14,54,55,56,57,58,59,60,61,62,63,64,65,66,67,19,69,70,7,1,4,74,75,5,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,12,98,99] [(3,-3),(1,-1),(99999999,-99999999)] [(3,'A'),(1,NULL),(2,'a')] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,1,2] +[1,3,2] +[2,1,3] +[3,2,1] +[3,2,1] +[1,2,3] +[3,2,1] +[3,2,1] +[2,1,3] diff --git a/tests/queries/0_stateless/02523_array_shuffle.sql b/tests/queries/0_stateless/02523_array_shuffle.sql index cec0b3fbd29..9138657c842 100644 --- a/tests/queries/0_stateless/02523_array_shuffle.sql +++ b/tests/queries/0_stateless/02523_array_shuffle.sql @@ -3,6 +3,7 @@ SELECT arrayShuffle([], 0xbad_cafe); SELECT arrayShuffle([9223372036854775808]); SELECT arrayShuffle([9223372036854775808], 0xbad_cafe); SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,10], 0xbad_cafe); +SELECT arrayShuffle(materialize([1,2,3,4,5,6,7,8,9,10]), 0xbad_cafe); SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,10.1], 0xbad_cafe); SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 0xbad_cafe); SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,NULL], 0xbad_cafe); @@ -13,6 +14,7 @@ SELECT arrayShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 0 SELECT arrayShuffle([NULL]); SELECT arrayShuffle([NULL,NULL]); SELECT arrayShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); +SELECT arrayShuffle(materialize([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]]), 0xbad_cafe); SELECT arrayShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); SELECT arrayShuffle(groupArray(x),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); SELECT arrayShuffle(groupArray(toUInt64(x)),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); @@ -61,6 +63,8 @@ SELECT arrayPartialShuffle(groupArray(x),20,0xbad_cafe) FROM (SELECT number as x SELECT arrayPartialShuffle(groupArray(toUInt64(x)),20,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); SELECT arrayPartialShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 2, 0xbad_cafe); SELECT arrayPartialShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 2, 0xbad_cafe); +SELECT arrayShuffle([1, 2, 3], 42) FROM numbers(10); -- for constant array we don not materialize it and each row gets the same permutation +SELECT arrayShuffle(materialize([1, 2, 3]), 42) FROM numbers(10); SELECT arrayShuffle(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayShuffle([1], 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayShuffle([1], 1.1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From 9a559b5475f4c7a89b422e0f3c420291b2d048a0 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 27 Jan 2023 18:03:13 +0000 Subject: [PATCH 34/42] FIXUP: More comments about shuffle --- src/Common/shuffle.h | 8 ++++++++ src/Functions/array/arrayShuffle.cpp | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Common/shuffle.h b/src/Common/shuffle.h index f2477db0352..c21a3e4ea33 100644 --- a/src/Common/shuffle.h +++ b/src/Common/shuffle.h @@ -6,6 +6,10 @@ /* Reorders the elements in the given range [first, last) such that each * possible permutation of those elements has equal probability of appearance. + * + * for i ∈ [0, n-2): + * j ← random from ∈ [i, n) + * swap arr[i] ↔ arr[j] */ template void shuffle(Iter first, Iter last, Rng && rng) @@ -28,6 +32,10 @@ void shuffle(Iter first, Iter last, Rng && rng) * [first, first + limit) is a random subset of the original range. * [first + limit, last) shall contain the elements not in [first, first + limit) * in undefined order. + * + * for i ∈ [0, limit): + * j ← random from ∈ [i, n) + * swap arr[i] ↔ arr[j] */ template void partial_shuffle(Iter first, Iter last, size_t limit, Rng && rng) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 0c5696d1d37..d78024236bf 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -36,7 +36,7 @@ struct FunctionArrayShuffleTraits static constexpr auto has_limit = false; // Permute the whole array static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } static constexpr auto max_num_params = 2; // array[, seed] - static constexpr auto seed_param_idx = 1; + static constexpr auto seed_param_idx = 1; // --------^^^^ }; /** Partial shuffle array elements @@ -50,7 +50,7 @@ struct FunctionArrayPartialShuffleTraits static constexpr auto has_limit = true; static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2}; } static constexpr auto max_num_params = 3; // array[, limit[, seed]] - static constexpr auto seed_param_idx = 2; + static constexpr auto seed_param_idx = 2; // ----------------^^^^ }; template From 67377dc81d2e6c04879b2cd54521b2ca64050002 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 27 Jan 2023 18:04:31 +0000 Subject: [PATCH 35/42] FIXUP: Add arrayShuffle and arrayShufflePartial to fuzzer corpus --- tests/fuzz/all.dict | 2 ++ tests/fuzz/dictionaries/functions.dict | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 7977cb9ed21..17ef7d2ab1e 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -72,6 +72,7 @@ "arrayMap" "arrayMax" "arrayMin" +"arrayPartialShuffle" "arrayPopBack" "arrayPopFront" "arrayProduct" @@ -84,6 +85,7 @@ "arrayReverseFill" "arrayReverseSort" "arrayReverseSplit" +"arrayShuffle" "arraySlice" "arraySort" "arraySplit" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index a07841f733e..e77a2a779fd 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -872,6 +872,8 @@ "nullIn" "MONTH" "arrayReverse" +"arrayShuffle" +"arrayPartialShuffle" "now64" "DATE" "addressToLine" From df030a56f0b3ad4a8c08371ea04e30cd2e39fdd9 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 27 Jan 2023 21:27:09 +0000 Subject: [PATCH 36/42] FIXUP: Remove use of legacy exceptions api --- src/Functions/array/arrayShuffle.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index d78024236bf..9cf3ac8f3fe 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -89,8 +89,10 @@ public: WhichDataType which(arguments[param_idx]); if (!which.isUInt() && !which.isInt()) throw Exception( - "Illegal type " + arguments[param_idx]->getName() + " of argument of function " + getName() + " (must be UInt or Int)", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of arguments of function {} (must be UInt or Int)", + arguments[param_idx]->getName(), + getName()); }; for (size_t idx = 1; idx < arguments.size(); ++idx) @@ -111,7 +113,7 @@ ColumnPtr FunctionArrayShuffleImpl::executeImpl(const ColumnsWithTypeAnd const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); if (!array) throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); const auto seed = [&]() -> uint64_t { @@ -187,12 +189,11 @@ If no seed is provided a random one will be used: It is possible to override the seed to produce stable results: [example:explicit_seed] )", - Documentation::Examples{ + Documentation::Examples{ {"random_seed", "SELECT arrayShuffle([1, 2, 3, 4])"}, {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 41)"}, {"materialize", "SELECT arrayShuffle(materialize([1, 2, 3]), 42), arrayShuffle([1, 2, 3], 42) FROM numbers(10)"}}, - Documentation::Categories{"Array"} - }, + Documentation::Categories{"Array"}}, FunctionFactory::CaseInsensitive); factory.registerFunction>( { @@ -212,14 +213,14 @@ If no seed is provided a random one will be used: It is possible to override the seed to produce stable results: [example:explicit_seed] )", - Documentation::Examples{ + Documentation::Examples{ {"no_limit1", "SELECT arrayPartialShuffle([1, 2, 3, 4], 0)"}, {"no_limit2", "SELECT arrayPartialShuffle([1, 2, 3, 4])"}, {"random_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2)"}, {"explicit_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2, 41)"}, - {"materialize", "SELECT arrayPartialShuffle(materialize([1, 2, 3, 4]), 2, 42), arrayPartialShuffle([1, 2, 3], 2, 42) FROM numbers(10)"}}, - Documentation::Categories{"Array"} - }, + {"materialize", + "SELECT arrayPartialShuffle(materialize([1, 2, 3, 4]), 2, 42), arrayPartialShuffle([1, 2, 3], 2, 42) FROM numbers(10)"}}, + Documentation::Categories{"Array"}}, FunctionFactory::CaseInsensitive); } From 326f4d2a4fd91098ee609dd34819ee5752603c3e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 29 Jan 2023 17:50:53 +0100 Subject: [PATCH 37/42] Fix using mutex for increaseProcessSize --- src/Backups/BackupImpl.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 9c50d0ed1ee..e657c1a92c7 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -777,7 +777,10 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) .base_checksum = 0, }; - increaseProcessedSize(info); + { + std::lock_guard lock{mutex}; + increaseProcessedSize(info); + } /// Empty file, nothing to backup if (info.size == 0 && deduplicate_files) @@ -991,7 +994,6 @@ void BackupImpl::increaseUncompressedSize(const FileInfo & info) void BackupImpl::increaseProcessedSize(UInt64 file_size) const { - std::lock_guard lock{mutex}; processed_files_size += file_size; ++num_processed_files; } From e88aa18a8cff8742e45193ebf263769708cfcff3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 29 Jan 2023 20:01:30 +0000 Subject: [PATCH 38/42] Fix typo --- src/Functions/formatDateTime.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index ea8a490f279..630add20835 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -478,7 +478,7 @@ private: return res.size(); } - static size_t jodaCentryOfEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + static size_t jodaCenturyOfEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto year = static_cast(ToYearImpl::execute(source, timezone)); year = (year < 0 ? -year : year); @@ -1137,7 +1137,7 @@ public: reserve_size += repetitions <= 3 ? 2 : 13; break; case 'C': - instructions.emplace_back(std::bind_front(&Action::jodaCentryOfEra, repetitions)); + instructions.emplace_back(std::bind_front(&Action::jodaCenturyOfEra, repetitions)); /// Year range [1900, 2299] reserve_size += std::max(repetitions, 2); break; From 49b7c45686e87daf35677af517fbc94ae2740c97 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 13 Dec 2022 08:01:27 +0000 Subject: [PATCH 39/42] Add note about OpenSSL --- contrib/openssl-cmake/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index dff5dff0936..92739ff3608 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -1,3 +1,9 @@ +# Note: ClickHouse uses BoringSSL. The presence of OpenSSL is only due to IBM's port of ClickHouse to s390x. BoringSSL does not support +# s390x, also FIPS validation provided by the OS vendor (Red Hat, Ubuntu) requires (preferrably dynamic) linking with OS packages which +# ClickHouse generally avoids. +# +# Furthermore, the in-source OpenSSL dump in this directory is due to development purposes and non FIPS-compliant. + if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL) set(ENABLE_SSL 1 CACHE INTERNAL "") set(OPENSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/openssl) From b1bc3b6b4346dd2b41ba04854899652abdebc6e0 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 30 Jan 2023 10:27:50 +0100 Subject: [PATCH 40/42] Add troubleshooting for wrong RPM repos signature --- docs/en/operations/_troubleshooting.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/en/operations/_troubleshooting.md b/docs/en/operations/_troubleshooting.md index aed63ec4d0f..a5c07ed18bd 100644 --- a/docs/en/operations/_troubleshooting.md +++ b/docs/en/operations/_troubleshooting.md @@ -56,6 +56,19 @@ sudo apt-get clean sudo apt-get autoclean ``` +### You Can't Get Packages With Yum Because Of Wrong Signature + +Possible issue: the cache is wrong, maybe it's broken after updated GPG key in 2022-09. + +The solution is to clean out the cache and lib directory for yum: + +``` +sudo find /var/lib/yum/repos/ /var/cache/yum/ -name 'clickhouse-*' -type d -exec rm -rf {} + +sudo rm -f /etc/yum.repos.d/clickhouse.repo +``` + +After that follow the [install guide](../getting-started/install.md#from-rpm-packages) + ## Connecting to the Server {#troubleshooting-accepts-no-connections} Possible issues: From 692b7ab211af9a9763a1bf6aa5ac42178fc8ecb4 Mon Sep 17 00:00:00 2001 From: Derek Chia Date: Mon, 30 Jan 2023 17:56:17 +0800 Subject: [PATCH 41/42] Update dictionary.md Remove "statement:" from a query --- docs/en/sql-reference/statements/create/dictionary.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index a470b071971..e789dd9257f 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -110,7 +110,7 @@ LIFETIME(MIN 0 MAX 1000) ### Create a dictionary from a file available by HTTP(S) ```sql -statement: CREATE DICTIONARY default.taxi_zone_dictionary +CREATE DICTIONARY default.taxi_zone_dictionary ( `LocationID` UInt16 DEFAULT 0, `Borough` String, From 9124e56e2a8baacbcb1a19053e5668969242b7e2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 30 Jan 2023 11:47:58 +0000 Subject: [PATCH 42/42] Docs: Remove non existing function --- .../en/engines/table-engines/mergetree-family/invertedindexes.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md index 4d7a0050c76..b59240cb6d2 100644 --- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md @@ -62,7 +62,6 @@ SELECT * from tab WHERE s IN (‘Hello’, ‘World’); SELECT * from tab WHERE s LIKE ‘%Hello%’; SELECT * from tab WHERE multiSearchAny(s, ‘Hello’, ‘World’); SELECT * from tab WHERE hasToken(s, ‘Hello’); -SELECT * from tab WHERE multiSearchAll(s, [‘Hello’, ‘World’]); ``` The inverted index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`.