Merge branch 'master' into ch_canh_fix_rankcorr

This commit is contained in:
Nikolay Degterinsky 2022-10-03 20:19:01 +02:00 committed by GitHub
commit 58d644441f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 700 additions and 41 deletions

3
.gitmodules vendored
View File

@ -284,3 +284,6 @@
[submodule "contrib/llvm-project"]
path = contrib/llvm-project
url = https://github.com/ClickHouse/llvm-project.git
[submodule "contrib/corrosion"]
path = contrib/corrosion
url = https://github.com/corrosion-rs/corrosion.git

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.15)
cmake_minimum_required(VERSION 3.20)
project(ClickHouse LANGUAGES C CXX ASM)
@ -557,9 +557,9 @@ macro (clickhouse_add_executable target)
endif()
endmacro()
# With cross-compiling, all targets are built for the target platform which usually different from the host
# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running
# another executable Y previously produced in the build. This is solved by compiling and running Y for/on
# With cross-compiling, all targets are built for the target platform which usually different from the host
# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running
# another executable Y previously produced in the build. This is solved by compiling and running Y for/on
# the host platform. Add target to the list:
# add_native_target(<target> ...)
set_property (GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
@ -574,6 +574,10 @@ include_directories(${ConfigIncludePath})
include (cmake/warnings.cmake)
include (cmake/print_flags.cmake)
if (ENABLE_RUST)
add_subdirectory (rust)
endif()
add_subdirectory (base)
add_subdirectory (src)
add_subdirectory (programs)
@ -584,7 +588,7 @@ include (cmake/sanitize_target_link_libraries.cmake)
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
if (NATIVE_BUILD_TARGETS
if (NATIVE_BUILD_TARGETS
AND NOT(
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR

View File

@ -176,6 +176,249 @@ void __explicit_bzero_chk(void * buf, size_t len, size_t unused)
}
#include <unistd.h>
#include "syscall.h"
ssize_t copy_file_range(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags)
{
return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags);
}
long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, unsigned flags)
{
return syscall(SYS_splice, fd_in, off_in, fd_out, off_out, len, flags);
}
#define _BSD_SOURCE
#include <sys/stat.h>
#include <stdint.h>
#if !defined(__aarch64__)
struct statx {
uint32_t stx_mask;
uint32_t stx_blksize;
uint64_t stx_attributes;
uint32_t stx_nlink;
uint32_t stx_uid;
uint32_t stx_gid;
uint16_t stx_mode;
uint16_t pad1;
uint64_t stx_ino;
uint64_t stx_size;
uint64_t stx_blocks;
uint64_t stx_attributes_mask;
struct {
int64_t tv_sec;
uint32_t tv_nsec;
int32_t pad;
} stx_atime, stx_btime, stx_ctime, stx_mtime;
uint32_t stx_rdev_major;
uint32_t stx_rdev_minor;
uint32_t stx_dev_major;
uint32_t stx_dev_minor;
uint64_t spare[14];
};
#endif
int statx(int fd, const char *restrict path, int flag,
unsigned int mask, struct statx *restrict statxbuf)
{
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
}
#include <syscall.h>
ssize_t getrandom(void *buf, size_t buflen, unsigned flags)
{
/// There was cancellable syscall (syscall_cp), but I don't care too.
return syscall(SYS_getrandom, buf, buflen, flags);
}
#include <errno.h>
#include <limits.h>
#define ALIGN (sizeof(size_t))
#define ONES ((size_t)-1/UCHAR_MAX)
#define HIGHS (ONES * (UCHAR_MAX/2+1))
#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
char *__strchrnul(const char *s, int c)
{
c = (unsigned char)c;
if (!c) return (char *)s + strlen(s);
#ifdef __GNUC__
typedef size_t __attribute__((__may_alias__)) word;
const word *w;
for (; (uintptr_t)s % ALIGN; s++)
if (!*s || *(unsigned char *)s == c) return (char *)s;
size_t k = ONES * c;
for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++);
s = (void *)w;
#endif
for (; *s && *(unsigned char *)s != c; s++);
return (char *)s;
}
int __execvpe(const char *file, char *const argv[], char *const envp[])
{
const char *p, *z, *path = getenv("PATH");
size_t l, k;
int seen_eacces = 0;
errno = ENOENT;
if (!*file) return -1;
if (strchr(file, '/'))
return execve(file, argv, envp);
if (!path) path = "/usr/local/bin:/bin:/usr/bin";
k = strnlen(file, NAME_MAX+1);
if (k > NAME_MAX) {
errno = ENAMETOOLONG;
return -1;
}
l = strnlen(path, PATH_MAX-1)+1;
for(p=path; ; p=z) {
char b[l+k+1];
z = __strchrnul(p, ':');
if (z-p >= l) {
if (!*z++) break;
continue;
}
memcpy(b, p, z-p);
b[z-p] = '/';
memcpy(b+(z-p)+(z>p), file, k+1);
execve(b, argv, envp);
switch (errno) {
case EACCES:
seen_eacces = 1;
case ENOENT:
case ENOTDIR:
break;
default:
return -1;
}
if (!*z++) break;
}
if (seen_eacces) errno = EACCES;
return -1;
}
#include "spawn.h"
int posix_spawnp(pid_t *restrict res, const char *restrict file,
const posix_spawn_file_actions_t *fa,
const posix_spawnattr_t *restrict attr,
char *const argv[restrict], char *const envp[restrict])
{
posix_spawnattr_t spawnp_attr = { 0 };
if (attr) spawnp_attr = *attr;
spawnp_attr.__fn = (void *)__execvpe;
return posix_spawn(res, file, fa, &spawnp_attr, argv, envp);
}
#define FDOP_CLOSE 1
#define FDOP_DUP2 2
#define FDOP_OPEN 3
#define FDOP_CHDIR 4
#define FDOP_FCHDIR 5
#define ENOMEM 12
#define EBADF 9
struct fdop {
struct fdop *next, *prev;
int cmd, fd, srcfd, oflag;
mode_t mode;
char path[];
};
int posix_spawn_file_actions_init(posix_spawn_file_actions_t *fa) {
fa->__actions = 0;
return 0;
}
int posix_spawn_file_actions_addchdir_np(posix_spawn_file_actions_t *restrict fa, const char *restrict path) {
struct fdop *op = malloc(sizeof *op + strlen(path) + 1);
if (!op) return ENOMEM;
op->cmd = FDOP_CHDIR;
op->fd = -1;
strcpy(op->path, path);
if ((op->next = fa->__actions)) op->next->prev = op;
op->prev = 0;
fa->__actions = op;
return 0;
}
int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *fa, int fd) {
if (fd < 0) return EBADF;
struct fdop *op = malloc(sizeof *op);
if (!op) return ENOMEM;
op->cmd = FDOP_CLOSE;
op->fd = fd;
if ((op->next = fa->__actions)) op->next->prev = op;
op->prev = 0;
fa->__actions = op;
return 0;
}
int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int srcfd, int fd) {
if (srcfd < 0 || fd < 0) return EBADF;
struct fdop *op = malloc(sizeof *op);
if (!op) return ENOMEM;
op->cmd = FDOP_DUP2;
op->srcfd = srcfd;
op->fd = fd;
if ((op->next = fa->__actions)) op->next->prev = op;
op->prev = 0;
fa->__actions = op;
return 0;
}
int posix_spawn_file_actions_addfchdir_np(posix_spawn_file_actions_t *fa, int fd) {
if (fd < 0) return EBADF;
struct fdop *op = malloc(sizeof *op);
if (!op) return ENOMEM;
op->cmd = FDOP_FCHDIR;
op->fd = fd;
if ((op->next = fa->__actions)) op->next->prev = op;
op->prev = 0;
fa->__actions = op;
return 0;
}
int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *restrict fa, int fd, const char *restrict path, int flags, mode_t mode) {
if (fd < 0) return EBADF;
struct fdop *op = malloc(sizeof *op + strlen(path) + 1);
if (!op) return ENOMEM;
op->cmd = FDOP_OPEN;
op->fd = fd;
op->oflag = flags;
op->mode = mode;
strcpy(op->path, path);
if ((op->next = fa->__actions)) op->next->prev = op;
op->prev = 0;
fa->__actions = op;
return 0;
}
int posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *fa) {
struct fdop *op = fa->__actions, *next;
while (op) {
next = op->next;
free(op);
op = next;
}
return 0;
}
#if defined (__cplusplus)
}
#endif

View File

@ -0,0 +1,32 @@
#ifndef _SPAWN_H
#define _SPAWN_H
#ifdef __cplusplus
extern "C" {
#endif
#include <features.h>
typedef struct {
int __flags;
pid_t __pgrp;
sigset_t __def, __mask;
int __prio, __pol;
void *__fn;
char __pad[64-sizeof(void *)];
} posix_spawnattr_t;
typedef struct {
int __pad0[2];
void *__actions;
int __pad[16];
} posix_spawn_file_actions_t;
int posix_spawn(pid_t *__restrict, const char *__restrict, const posix_spawn_file_actions_t *,
const posix_spawnattr_t *__restrict, char *const *__restrict, char *const *__restrict);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -92,6 +92,8 @@ add_contrib (openldap-cmake openldap)
add_contrib (grpc-cmake grpc)
add_contrib (msgpack-c-cmake msgpack-c)
add_contrib (corrosion-cmake corrosion)
if (ENABLE_FUZZING)
add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator)
endif()

1
contrib/corrosion vendored Submodule

@ -0,0 +1 @@
Subproject commit d9dfdefaa3d9ec4ba1245c7070727359c65c7869

View File

@ -0,0 +1,46 @@
if (NOT ENABLE_LIBRARIES)
set(DEFAULT_ENABLE_RUST FALSE)
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
message(STATUS "Rust is not available on aarch64-apple-darwin")
set(DEFAULT_ENABLE_RUST FALSE)
else()
list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
find_package(Rust)
set(DEFAULT_ENABLE_RUST ${Rust_FOUND})
endif()
option(ENABLE_RUST "Enable rust" ${DEFAULT_ENABLE_RUST})
message(STATUS ${ENABLE_RUST})
if(NOT ENABLE_RUST)
message(STATUS "Not using rust")
return()
endif()
message(STATUS "Checking Rust toolchain for current target")
if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
endif()
if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
endif()
if((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
endif()
if((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
endif()
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
endif()
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
# Define function corrosion_import_crate()
include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake")

View File

@ -3,6 +3,33 @@
ARG FROM_TAG=latest
FROM clickhouse/test-util:$FROM_TAG
# Rust toolchain and libraries
ENV RUSTUP_HOME=/rust/rustup
ENV CARGO_HOME=/rust/cargo
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN chmod 777 -R /rust
ENV PATH="/rust/cargo/env:${PATH}"
ENV PATH="/rust/cargo/bin:${PATH}"
RUN rustup target add aarch64-unknown-linux-gnu && \
rustup target add x86_64-apple-darwin && \
rustup target add x86_64-unknown-freebsd && \
rustup target add aarch64-apple-darwin && \
rustup target add powerpc64le-unknown-linux-gnu
RUN apt-get install \
gcc-aarch64-linux-gnu \
build-essential \
libc6 \
libc6-dev \
libc6-dev-arm64-cross \
--yes
# Install CMake 3.20+ for Rust compilation
# Used https://askubuntu.com/a/1157132 as reference
RUN apt purge cmake --yes
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
RUN apt update && apt install cmake --yes
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}

View File

@ -19,6 +19,12 @@ RUN apt-get update \
pv \
--yes --no-install-recommends
# Install CMake 3.20+ for Rust compilation
RUN apt purge cmake --yes
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
RUN apt update && apt install cmake --yes
RUN pip3 install numpy scipy pandas Jinja2
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"

View File

@ -35,6 +35,8 @@ RUN apt-get update \
tzdata \
vim \
wget \
rustc \
cargo \
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
&& apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \

View File

@ -35,12 +35,13 @@ RUN apt-get update -y \
tree \
unixodbc \
wget \
rustc \
cargo \
zstd \
file \
pv \
&& apt-get clean
RUN pip3 install numpy scipy pandas Jinja2
RUN mkdir -p /tmp/clickhouse-odbc-tmp \

View File

@ -6,16 +6,32 @@ sidebar_label: MySQL Interface
# MySQL Interface
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file:
ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
``` xml
<mysql_port>9004</mysql_port>
<clickhouse>
<mysql_port>9004</mysql_port>
</clickhouse>
```
Example of connecting using command-line tool `mysql`:
Startup your ClickHouse server and look for a log message similar to the following that mentions Listening for MySQL compatibility protocol:
```
{} <Information> Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004
```
## Connect mysql to ClickHouse
The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse:
```bash
mysql --protocol tcp -h [hostname] -u [username] -P [port_number] [database_name]
```
For example:
``` bash
$ mysql --protocol tcp -u default -P 9004
$ mysql --protocol tcp -h 127.0.0.1 -u default -P 9004 default
```
Output if a connection succeeded:

View File

@ -10,7 +10,7 @@ Creates new [roles](../../../operations/access-rights.md#role-management). Role
Syntax:
``` sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,7 +13,7 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia
Syntax:
``` sql
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
```
Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.

View File

@ -430,9 +430,9 @@ FROM
### Cumulative sum.
```sql
CREATE TABLE events
CREATE TABLE warehouse
(
`metric` String,
`item` String,
`ts` DateTime,
`value` Float
)

View File

@ -11,7 +11,7 @@ sidebar_label: "Роль"
Синтаксис:
```sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
@ -47,4 +47,4 @@ SET ROLE accountant;
SELECT * FROM db.*;
```
<!--hide-->
<!--hide-->

View File

@ -11,7 +11,7 @@ sidebar_label: "Представление"
## Обычные представления {#normal}
``` sql
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
```
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление — это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).

View File

@ -13,7 +13,7 @@ sidebar_label: VIEW
语法:
``` sql
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
```
普通视图不存储任何数据。 他们只是在每次访问时从另一个表执行读取。换句话说,普通视图只不过是一个保存的查询。 从视图中读取时,此保存的查询用作[FROM](../../../sql-reference/statements/select/from.md)子句中的子查询.

View File

@ -927,7 +927,11 @@ namespace
executable.string(), config.string(), pid_file.string());
if (!user.empty())
command = fmt::format("clickhouse su '{}' {}", user, command);
{
/// sudo respects limits in /etc/security/limits.conf e.g. open files,
/// that's why we are using it instead of the 'clickhouse su' tool.
command = fmt::format("sudo -u '{}' {}", user, command);
}
fmt::print("Will run {}\n", command);
executeScript(command, true);

4
rust/BLAKE3/CMakeLists.txt Executable file
View File

@ -0,0 +1,4 @@
corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
target_include_directories(_ch_rust_blake3 INTERFACE include)
add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)

92
rust/BLAKE3/Cargo.lock generated Normal file
View File

@ -0,0 +1,92 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_blake3"
version = "0.1.0"
dependencies = [
"blake3",
"libc",
]
[[package]]
name = "arrayref"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "blake3"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
"digest",
]
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array",
]
[[package]]
name = "generic-array"
version = "0.14.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "libc"
version = "0.2.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

13
rust/BLAKE3/Cargo.toml Normal file
View File

@ -0,0 +1,13 @@
[package]
name = "_ch_rust_blake3"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
blake3 = "1.2.0"
libc = "0.2.132"
[lib]
crate-type = ["staticlib"]

View File

@ -0,0 +1,17 @@
#ifndef BLAKE3_H
#define BLAKE3_H
#include <cstdint>
extern "C" {
char *blake3_apply_shim(const char *begin, uint32_t _size, uint8_t *out_char_data);
char *blake3_apply_shim_msan_compat(const char *begin, uint32_t size, uint8_t *out_char_data);
void blake3_free_char_pointer(char *ptr_to_free);
} // extern "C"
#endif /* BLAKE3_H */

55
rust/BLAKE3/src/lib.rs Normal file
View File

@ -0,0 +1,55 @@
extern crate blake3;
extern crate libc;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use std::mem;
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = blake3::Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
std::ptr::null_mut()
}
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim_msan_compat(
mut begin: *const c_char,
size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
libc::memset(out_char_data as *mut libc::c_void, 0, mem::size_of::<u8>());
let mut hasher = blake3::Hasher::new();
let mut vec = Vec::<u8>::new();
for _ in 0..size {
vec.push(*begin as u8);
begin = begin.add(1);
}
let input_res = vec.as_mut_slice();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
std::ptr::null_mut()
}
// Freeing memory according to docs: https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_raw
#[no_mangle]
pub unsafe extern "C" fn blake3_free_char_pointer(ptr_to_free: *mut c_char) {
std::mem::drop(CString::from_raw(ptr_to_free));
}

1
rust/CMakeLists.txt Normal file
View File

@ -0,0 +1 @@
add_subdirectory (BLAKE3)

View File

@ -279,7 +279,7 @@ void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start
size_t end = start + length;
for (size_t i = start; i < end; ++i)
insertFrom(from, i);
insertFromWithOwnership(from, i);
}
else
{
@ -448,7 +448,7 @@ void ColumnAggregateFunction::insertData(const char * pos, size_t /*length*/)
data.push_back(*reinterpret_cast<const AggregateDataPtr *>(pos));
}
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size_t n)
{
/// Must create new state of aggregate function and take ownership of it,
/// because ownership of states of aggregate function cannot be shared for individual rows,
@ -458,6 +458,11 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
insertMergeFrom(from, n);
}
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
{
insertRangeFrom(from, n, 1);
}
void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
{
ensureOwnership();

View File

@ -98,6 +98,8 @@ private:
ColumnAggregateFunction(const ColumnAggregateFunction & src_);
void insertFromWithOwnership(const IColumn & from, size_t n);
public:
~ColumnAggregateFunction() override;

View File

@ -261,8 +261,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, fallback_to_stale_replicas_for_distributed_queries, true, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \
M(UInt64, preferred_max_column_in_block_size_bytes, 0, "Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size.", 0) \
\
M(UInt64, parts_to_delay_insert, 150, "If the destination table contains at least that many active parts in a single partition, artificially slow down insert into table.", 0) \
M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in a single partition of the destination table, throw 'Too many parts ...' exception.", 0) \
M(UInt64, parts_to_delay_insert, 0, "If the destination table contains at least that many active parts in a single partition, artificially slow down insert into table.", 0) \
M(UInt64, parts_to_throw_insert, 0, "If more than this number active parts in a single partition of the destination table, throw 'Too many parts ...' exception.", 0) \
M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \
M(UInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) \
M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", 0) \

View File

@ -33,6 +33,13 @@ list (APPEND PRIVATE_LIBS
divide_impl
)
if (TARGET ch_rust::blake3)
list (APPEND PUBLIC_LIBS
ch_rust::blake3
)
endif()
if (TARGET OpenSSL::Crypto)
list (APPEND PUBLIC_LIBS OpenSSL::Crypto)
endif()

View File

@ -41,5 +41,22 @@ REGISTER_FUNCTION(Hashing)
factory.registerFunction<FunctionXxHash64>();
factory.registerFunction<FunctionWyHash64>();
#if USE_BLAKE3
factory.registerFunction<FunctionBLAKE3>(
{
R"(
Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
It returns a BLAKE3 hash as a byte array with type FixedString(32).
)",
Documentation::Examples{
{"hash", "SELECT hex(blake3('ABC'))"}},
Documentation::Categories{"Hash"}
},
FunctionFactory::CaseSensitive);
#endif
}
}

View File

@ -10,6 +10,10 @@
#include "config_functions.h"
#include "config_core.h"
#if USE_BLAKE3
# include <blake3.h>
#endif
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <Common/HashTable/Hash.h>
@ -615,6 +619,32 @@ struct ImplXxHash64
static constexpr bool use_int_hash_for_pods = false;
};
#if USE_BLAKE3
struct ImplBLAKE3
{
static constexpr auto name = "blake3";
enum { length = 32 };
static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
{
#if defined(MEMORY_SANITIZER)
auto err_msg = blake3_apply_shim_msan_compat(begin, size, out_char_data);
__msan_unpoison(out_char_data, length);
#else
auto err_msg = blake3_apply_shim(begin, size, out_char_data);
#endif
if (err_msg != nullptr)
{
auto err_st = std::string(err_msg);
blake3_free_char_pointer(err_msg);
throw Exception("Function returned error message: " + std::string(err_msg), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
};
#endif
template <typename Impl>
class FunctionStringHashFixedString : public IFunction
{
@ -1474,4 +1504,8 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
#if USE_BLAKE3
using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
#endif
}

View File

@ -8,5 +8,6 @@
#cmakedefine01 USE_H3
#cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_BLAKE3
#cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN

View File

@ -3521,8 +3521,8 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr q
k_inactive = static_cast<ssize_t>(inactive_parts_count_in_partition) - static_cast<ssize_t>(settings->inactive_parts_to_delay_insert);
}
auto parts_to_delay_insert = query_settings.parts_to_delay_insert.changed ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert;
auto parts_to_throw_insert = query_settings.parts_to_throw_insert.changed ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert;
auto parts_to_delay_insert = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert;
auto parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert;
if (parts_count_in_partition >= parts_to_throw_insert)
{

View File

@ -19,6 +19,9 @@ endif()
if (TARGET ch_contrib::rdkafka)
set(USE_RDKAFKA 1)
endif()
if (TARGET ch_rust::blake3)
set(USE_BLAKE3 1)
endif()
if (TARGET OpenSSL::SSL)
set(USE_SSL 1)
endif()

View File

@ -0,0 +1,12 @@
<test>
<query>
WITH
(
SELECT bitmapBuild(groupArray(number))
FROM numbers(3000000)
) AS a,
[a, a, a] AS b
SELECT sum(bitmapCardinality(b[(number % 3) + 1]))
FROM numbers(10000)
</query>
</test>

View File

@ -8,6 +8,7 @@
<value>SHA224</value>
<value>SHA256</value>
<value>halfMD5</value>
<value>blake3</value>
</values>
</substitution>
<substitution>

View File

@ -0,0 +1,3 @@
0C673DA1EF75D2DAA895483138340F041881EA975D57C1435D487F454A111B74
007ED777B7A1CBA08D37BDA339EFABB42FA460D953070779903125B0F4D5FB5F
E25232688E2A4D3A55174DECB33815A27B2A92DC8839E3CDA456105C259BB071

View File

@ -0,0 +1,5 @@
-- Tags: no-fasttest
SELECT hex(blake3('test_1'));
SELECT hex(blake3('test_2'));
SELECT hex(blake3('test_3'));

View File

@ -13,15 +13,15 @@ export CLICKHOUSE_TEST_NAME
export CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}"
export CLICKHOUSE_TEST_UNIQUE_NAME="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}"
[ -v CLICKHOUSE_CONFIG_CLIENT ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} "
[ -v CLICKHOUSE_HOST ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} "
[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
[ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) "
[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) "
[ -n "${CLICKHOUSE_CONFIG_CLIENT:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} "
[ -n "${CLICKHOUSE_HOST:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} "
[ -n "${CLICKHOUSE_PORT_TCP:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
[ -n "${CLICKHOUSE_PORT_TCP:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} "
[ -n "${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} "
[ -n "${CLICKHOUSE_DATABASE:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
[ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) "
[ -n "${CLICKHOUSE_DATABASE:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} "
[ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment $(printf '%q' ${CLICKHOUSE_LOG_COMMENT}) "
export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
# client
@ -81,20 +81,20 @@ export CLICKHOUSE_PORT_KEEPER=${CLICKHOUSE_PORT_KEEPER:="9181"}
export CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:=$(echo "${CLICKHOUSE_CLIENT}" | sed 's/--secure //' | sed 's/'"--port=${CLICKHOUSE_PORT_TCP}"'//g; s/$/'"--secure --accept-invalid-certificate --port=${CLICKHOUSE_PORT_TCP_SECURE}"'/g')}
# Add database and log comment to url params
if [ -v CLICKHOUSE_URL_PARAMS ]
if [ -n "${CLICKHOUSE_URL_PARAMS:-}" ]
then
export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}"
else
export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}"
fi
# Note: missing url encoding of the log comment.
[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
[ -n "${CLICKHOUSE_LOG_COMMENT:-}" ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}"
export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"}
export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"}
# Add url params to url
if [ -v CLICKHOUSE_URL_PARAMS ]
if [ -n "${CLICKHOUSE_URL_PARAMS:-}" ]
then
export CLICKHOUSE_URL="${CLICKHOUSE_URL}?${CLICKHOUSE_URL_PARAMS}"
export CLICKHOUSE_URL_HTTPS="${CLICKHOUSE_URL_HTTPS}?${CLICKHOUSE_URL_PARAMS}"
@ -117,10 +117,10 @@ mkdir -p ${CLICKHOUSE_TMP}
export MYSQL_CLIENT_BINARY=${MYSQL_CLIENT_BINARY:="mysql"}
export MYSQL_CLIENT_CLICKHOUSE_USER=${MYSQL_CLIENT_CLICKHOUSE_USER:="default"}
# Avoids "Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'" when connecting to localhost
[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --protocol tcp "
[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} "
[ -v CLICKHOUSE_PORT_MYSQL ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} "
[ -v CLICKHOUSE_DATABASE ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} "
[ -n "${CLICKHOUSE_HOST:-}" ] && MYSQL_CLIENT_OPT0+=" --protocol tcp "
[ -n "${CLICKHOUSE_HOST:-}" ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} "
[ -n "${CLICKHOUSE_PORT_MYSQL:-}" ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} "
[ -n "${CLICKHOUSE_DATABASE:-}" ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} "
MYSQL_CLIENT_OPT0+=" --user ${MYSQL_CLIENT_CLICKHOUSE_USER} "
export MYSQL_CLIENT_OPT="${MYSQL_CLIENT_OPT0:-} ${MYSQL_CLIENT_OPT:-}"
export MYSQL_CLIENT=${MYSQL_CLIENT:="$MYSQL_CLIENT_BINARY ${MYSQL_CLIENT_OPT:-}"}