mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge branch 'master' into custom-key-parallel-replicas
This commit is contained in:
commit
c99efa75b7
2013
CHANGELOG.md
2013
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,5 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming events
|
||||
* **Recording available**: [**v22.12 Release Webinar**](https://www.youtube.com/watch?v=sREupr6uc2k) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion!
|
||||
* [**ClickHouse Meetup at Microsoft Office in Seattle**](https://www.meetup.com/clickhouse-seattle-user-group/events/290310025/) - Jan 18 - Keep an eye on this space as we will be announcing speakers soon!
|
||||
* **Recording available**: [**v23.1 Release Webinar**](https://www.youtube.com/watch?v=zYSZXBnTMSE) 23.1 is the ClickHouse New Year release. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. Inverted indices, query cache, and so -- very -- much more.
|
||||
* **Recording available**: [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion!
|
||||
|
2
contrib/krb5
vendored
2
contrib/krb5
vendored
@ -1 +1 @@
|
||||
Subproject commit b89e20367b074bd02dd118a6534099b21e88b3c3
|
||||
Subproject commit f8262a1b548eb29d97e059260042036255d07f8d
|
@ -15,6 +15,10 @@ if(NOT AWK_PROGRAM)
|
||||
message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.")
|
||||
endif()
|
||||
|
||||
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
|
||||
add_compile_definitions(USE_BORINGSSL=1)
|
||||
endif ()
|
||||
|
||||
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
|
||||
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
|
||||
|
||||
@ -578,12 +582,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c")
|
||||
endif()
|
||||
|
||||
if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
|
||||
list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c")
|
||||
endif ()
|
||||
|
||||
|
||||
target_sources(_krb5 PRIVATE
|
||||
${ALL_SRCS}
|
||||
)
|
||||
|
@ -1,302 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
/* lib/crypto/openssl/enc_provider/aes.c */
|
||||
/*
|
||||
* Copyright (C) 2003, 2007, 2008, 2009 by the Massachusetts Institute of Technology.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Export of this software from the United States of America may
|
||||
* require a specific license from the United States Government.
|
||||
* It is the responsibility of any person or organization contemplating
|
||||
* export to obtain such a license before exporting.
|
||||
*
|
||||
* WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
|
||||
* distribute this software and its documentation for any purpose and
|
||||
* without fee is hereby granted, provided that the above copyright
|
||||
* notice appear in all copies and that both that copyright notice and
|
||||
* this permission notice appear in supporting documentation, and that
|
||||
* the name of M.I.T. not be used in advertising or publicity pertaining
|
||||
* to distribution of the software without specific, written prior
|
||||
* permission. Furthermore if you modify this software you must label
|
||||
* your software as modified software and not distribute it in such a
|
||||
* fashion that it might be confused with the original M.I.T. software.
|
||||
* M.I.T. makes no representations about the suitability of
|
||||
* this software for any purpose. It is provided "as is" without express
|
||||
* or implied warranty.
|
||||
*/
|
||||
|
||||
#include "crypto_int.h"
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/aes.h>
|
||||
|
||||
/* proto's */
|
||||
static krb5_error_code
|
||||
cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data);
|
||||
static krb5_error_code
|
||||
cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data);
|
||||
static krb5_error_code
|
||||
cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen);
|
||||
static krb5_error_code
|
||||
cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen);
|
||||
|
||||
#define BLOCK_SIZE 16
|
||||
#define NUM_BITS 8
|
||||
#define IV_CTS_BUF_SIZE 16 /* 16 - hardcoded in CRYPTO_cts128_en/decrypt */
|
||||
|
||||
static const EVP_CIPHER *
|
||||
map_mode(unsigned int len)
|
||||
{
|
||||
if (len==16)
|
||||
return EVP_aes_128_cbc();
|
||||
if (len==32)
|
||||
return EVP_aes_256_cbc();
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Encrypt one block using CBC. */
|
||||
static krb5_error_code
|
||||
cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data)
|
||||
{
|
||||
int ret, olen = BLOCK_SIZE;
|
||||
unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE];
|
||||
EVP_CIPHER_CTX *ctx;
|
||||
struct iov_cursor cursor;
|
||||
|
||||
ctx = EVP_CIPHER_CTX_new();
|
||||
if (ctx == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
ret = EVP_EncryptInit_ex(ctx, map_mode(key->keyblock.length),
|
||||
NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL);
|
||||
if (ret == 0) {
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE);
|
||||
k5_iov_cursor_get(&cursor, iblock);
|
||||
EVP_CIPHER_CTX_set_padding(ctx,0);
|
||||
ret = EVP_EncryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE);
|
||||
if (ret == 1)
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
|
||||
zap(iblock, BLOCK_SIZE);
|
||||
zap(oblock, BLOCK_SIZE);
|
||||
return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
/* Decrypt one block using CBC. */
|
||||
static krb5_error_code
|
||||
cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data)
|
||||
{
|
||||
int ret = 0, olen = BLOCK_SIZE;
|
||||
unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE];
|
||||
EVP_CIPHER_CTX *ctx;
|
||||
struct iov_cursor cursor;
|
||||
|
||||
ctx = EVP_CIPHER_CTX_new();
|
||||
if (ctx == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
ret = EVP_DecryptInit_ex(ctx, map_mode(key->keyblock.length),
|
||||
NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL);
|
||||
if (ret == 0) {
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE);
|
||||
k5_iov_cursor_get(&cursor, iblock);
|
||||
EVP_CIPHER_CTX_set_padding(ctx,0);
|
||||
ret = EVP_DecryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE);
|
||||
if (ret == 1)
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
|
||||
zap(iblock, BLOCK_SIZE);
|
||||
zap(oblock, BLOCK_SIZE);
|
||||
return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t size = 0;
|
||||
unsigned char *oblock = NULL, *dbuf = NULL;
|
||||
unsigned char iv_cts[IV_CTS_BUF_SIZE];
|
||||
struct iov_cursor cursor;
|
||||
AES_KEY enck;
|
||||
|
||||
memset(iv_cts,0,sizeof(iv_cts));
|
||||
if (ivec && ivec->data){
|
||||
if (ivec->length != sizeof(iv_cts))
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
memcpy(iv_cts, ivec->data,ivec->length);
|
||||
}
|
||||
|
||||
oblock = OPENSSL_malloc(dlen);
|
||||
if (!oblock){
|
||||
return ENOMEM;
|
||||
}
|
||||
dbuf = OPENSSL_malloc(dlen);
|
||||
if (!dbuf){
|
||||
OPENSSL_free(oblock);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE);
|
||||
k5_iov_cursor_get(&cursor, dbuf);
|
||||
|
||||
AES_set_encrypt_key(key->keyblock.contents,
|
||||
NUM_BITS * key->keyblock.length, &enck);
|
||||
|
||||
size = CRYPTO_cts128_encrypt((unsigned char *)dbuf, oblock, dlen, &enck,
|
||||
iv_cts, AES_cbc_encrypt);
|
||||
if (size <= 0)
|
||||
ret = KRB5_CRYPTO_INTERNAL;
|
||||
else
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
|
||||
if (!ret && ivec && ivec->data)
|
||||
memcpy(ivec->data, iv_cts, sizeof(iv_cts));
|
||||
|
||||
zap(oblock, dlen);
|
||||
zap(dbuf, dlen);
|
||||
OPENSSL_free(oblock);
|
||||
OPENSSL_free(dbuf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t size = 0;
|
||||
unsigned char *oblock = NULL;
|
||||
unsigned char *dbuf = NULL;
|
||||
unsigned char iv_cts[IV_CTS_BUF_SIZE];
|
||||
struct iov_cursor cursor;
|
||||
AES_KEY deck;
|
||||
|
||||
memset(iv_cts,0,sizeof(iv_cts));
|
||||
if (ivec && ivec->data){
|
||||
if (ivec->length != sizeof(iv_cts))
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
memcpy(iv_cts, ivec->data,ivec->length);
|
||||
}
|
||||
|
||||
oblock = OPENSSL_malloc(dlen);
|
||||
if (!oblock)
|
||||
return ENOMEM;
|
||||
dbuf = OPENSSL_malloc(dlen);
|
||||
if (!dbuf){
|
||||
OPENSSL_free(oblock);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
AES_set_decrypt_key(key->keyblock.contents,
|
||||
NUM_BITS * key->keyblock.length, &deck);
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE);
|
||||
k5_iov_cursor_get(&cursor, dbuf);
|
||||
|
||||
size = CRYPTO_cts128_decrypt((unsigned char *)dbuf, oblock,
|
||||
dlen, &deck,
|
||||
iv_cts, AES_cbc_encrypt);
|
||||
if (size <= 0)
|
||||
ret = KRB5_CRYPTO_INTERNAL;
|
||||
else
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
|
||||
if (!ret && ivec && ivec->data)
|
||||
memcpy(ivec->data, iv_cts, sizeof(iv_cts));
|
||||
|
||||
zap(oblock, dlen);
|
||||
zap(dbuf, dlen);
|
||||
OPENSSL_free(oblock);
|
||||
OPENSSL_free(dbuf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
krb5_error_code
|
||||
krb5int_aes_encrypt(krb5_key key, const krb5_data *ivec,
|
||||
krb5_crypto_iov *data, size_t num_data)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t input_length, nblocks;
|
||||
|
||||
input_length = iov_total_length(data, num_data, FALSE);
|
||||
nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
if (nblocks == 1) {
|
||||
if (input_length != BLOCK_SIZE)
|
||||
return KRB5_BAD_MSIZE;
|
||||
ret = cbc_enc(key, ivec, data, num_data);
|
||||
} else if (nblocks > 1) {
|
||||
ret = cts_encr(key, ivec, data, num_data, input_length);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
krb5_error_code
|
||||
krb5int_aes_decrypt(krb5_key key, const krb5_data *ivec,
|
||||
krb5_crypto_iov *data, size_t num_data)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t input_length, nblocks;
|
||||
|
||||
input_length = iov_total_length(data, num_data, FALSE);
|
||||
nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
if (nblocks == 1) {
|
||||
if (input_length != BLOCK_SIZE)
|
||||
return KRB5_BAD_MSIZE;
|
||||
ret = cbc_decr(key, ivec, data, num_data);
|
||||
} else if (nblocks > 1) {
|
||||
ret = cts_decr(key, ivec, data, num_data, input_length);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
krb5int_aes_init_state (const krb5_keyblock *key, krb5_keyusage usage,
|
||||
krb5_data *state)
|
||||
{
|
||||
state->length = 16;
|
||||
state->data = (void *) malloc(16);
|
||||
if (state->data == NULL)
|
||||
return ENOMEM;
|
||||
memset(state->data, 0, state->length);
|
||||
return 0;
|
||||
}
|
||||
const struct krb5_enc_provider krb5int_enc_aes128 = {
|
||||
16,
|
||||
16, 16,
|
||||
krb5int_aes_encrypt,
|
||||
krb5int_aes_decrypt,
|
||||
NULL,
|
||||
krb5int_aes_init_state,
|
||||
krb5int_default_free_state
|
||||
};
|
||||
|
||||
const struct krb5_enc_provider krb5int_enc_aes256 = {
|
||||
16,
|
||||
32, 32,
|
||||
krb5int_aes_encrypt,
|
||||
krb5int_aes_decrypt,
|
||||
NULL,
|
||||
krb5int_aes_init_state,
|
||||
krb5int_default_free_state
|
||||
};
|
@ -1,3 +1,9 @@
|
||||
# Note: ClickHouse uses BoringSSL. The presence of OpenSSL is only due to IBM's port of ClickHouse to s390x. BoringSSL does not support
|
||||
# s390x, also FIPS validation provided by the OS vendor (Red Hat, Ubuntu) requires (preferrably dynamic) linking with OS packages which
|
||||
# ClickHouse generally avoids.
|
||||
#
|
||||
# Furthermore, the in-source OpenSSL dump in this directory is due to development purposes and non FIPS-compliant.
|
||||
|
||||
if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL)
|
||||
set(ENABLE_SSL 1 CACHE INTERNAL "")
|
||||
set(OPENSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/openssl)
|
||||
|
2
contrib/poco
vendored
2
contrib/poco
vendored
@ -1 +1 @@
|
||||
Subproject commit 4b1c8dd9913d2a16db62df0e509fa598da5c8219
|
||||
Subproject commit 7fefdf30244a9bf8eb58562a9b2a51cc59a8877a
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.1.1.3077"
|
||||
ARG VERSION="23.1.2.9"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.1.1.3077"
|
||||
ARG VERSION="23.1.2.9"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -138,6 +138,7 @@ function clone_submodules
|
||||
contrib/c-ares
|
||||
contrib/morton-nd
|
||||
contrib/xxHash
|
||||
contrib/simdjson
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
@ -158,6 +159,7 @@ function run_cmake
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
"-DENABLE_NURAFT=1"
|
||||
"-DENABLE_SIMDJSON=1"
|
||||
"-DENABLE_JEMALLOC=1"
|
||||
)
|
||||
|
||||
@ -234,6 +236,7 @@ function run_tests
|
||||
--check-zookeeper-session
|
||||
--order random
|
||||
--print-time
|
||||
--report-logs-stats
|
||||
--jobs "${NPROC}"
|
||||
)
|
||||
time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \
|
||||
|
@ -528,6 +528,7 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
-e "MutateFromLogEntryTask" \
|
||||
-e "No connection to ZooKeeper, cannot get shared table ID" \
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
|
22
docs/changelogs/v22.11.5.15-stable.md
Normal file
22
docs/changelogs/v22.11.5.15-stable.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.11.5.15-stable (d763e5a9239) FIXME as compared to v22.11.4.3-stable (7f4cf554f69)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#44999](https://github.com/ClickHouse/ClickHouse/issues/44999): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45552](https://github.com/ClickHouse/ClickHouse/issues/45552): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
24
docs/changelogs/v22.8.13.20-lts.md
Normal file
24
docs/changelogs/v22.8.13.20-lts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.13.20-lts (e4817946d18) FIXME as compared to v22.8.12.45-lts (86b0ecd5d51)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45565](https://github.com/ClickHouse/ClickHouse/issues/45565): Fix positional arguments exception Positional argument out of bounds. Closes [#40634](https://github.com/ClickHouse/ClickHouse/issues/40634). [#41189](https://github.com/ClickHouse/ClickHouse/pull/41189) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#44997](https://github.com/ClickHouse/ClickHouse/issues/44997): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45550](https://github.com/ClickHouse/ClickHouse/issues/45550): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)).
|
||||
|
23
docs/changelogs/v23.1.2.9-stable.md
Normal file
23
docs/changelogs/v23.1.2.9-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.2.9-stable (8dfb1700858) FIXME as compared to v23.1.1.3077-stable (dcaac477025)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45705](https://github.com/ClickHouse/ClickHouse/issues/45705): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#45673](https://github.com/ClickHouse/ClickHouse/issues/45673): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45730](https://github.com/ClickHouse/ClickHouse/issues/45730): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -1,15 +1,22 @@
|
||||
# Inverted indexes [experimental] {#table_engines-ANNIndex}
|
||||
---
|
||||
slug: /en/engines/table-engines/mergetree-family/invertedindexes
|
||||
sidebar_label: Inverted Indexes
|
||||
description: Quickly find search terms in text.
|
||||
keywords: [full-text search, text search]
|
||||
---
|
||||
|
||||
Inverted indexes are an experimental type of [secondary indexes](mergetree.md#available-types-of-indices) which provide fast text search
|
||||
capabilities for [String](../../../sql-reference/data-types/string.md) or [FixedString](../../../sql-reference/data-types/fixedstring.md)
|
||||
columns. The main idea of an inverted indexes is to store a mapping from "terms" to the rows which contains these terms. "Terms" are
|
||||
tokenized cells of the string column. For example, string cell "I will be a little late" is by default tokenized into six terms "I", "will",
|
||||
"be", "a", "little" and "late". Another kind of tokenizer are n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w",
|
||||
# Inverted indexes [experimental]
|
||||
|
||||
Inverted indexes are an experimental type of [secondary indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#available-types-of-indices) which provide fast text search
|
||||
capabilities for [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)
|
||||
columns. The main idea of an inverted index is to store a mapping from "terms" to the rows which contain these terms. "Terms" are
|
||||
tokenized cells of the string column. For example, the string cell "I will be a little late" is by default tokenized into six terms "I", "will",
|
||||
"be", "a", "little" and "late". Another kind of tokenizer is n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w",
|
||||
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
|
||||
useful the resulting inverted index will be.
|
||||
|
||||
:::warning
|
||||
Inverted indexes are experimental and should not be used in production environment yet. They may change in future in backwards-incompatible
|
||||
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
|
||||
ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.
|
||||
:::
|
||||
|
||||
@ -24,7 +31,14 @@ SET allow_experimental_inverted_index = true;
|
||||
An inverted index can be defined on a string column using the following syntax
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tab (key UInt64, str String, INDEX inv_idx(s) TYPE inverted(N) GRANULARITY 1) Engine=MergeTree ORDER BY (k);
|
||||
CREATE TABLE tab
|
||||
(
|
||||
`key` UInt64,
|
||||
`str` String,
|
||||
INDEX inv_idx(str) TYPE inverted(0) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY key
|
||||
```
|
||||
|
||||
where `N` specifies the tokenizer:
|
||||
@ -32,35 +46,35 @@ where `N` specifies the tokenizer:
|
||||
- `inverted(0)` (or shorter: `inverted()`) set the tokenizer to "tokens", i.e. split strings along spaces,
|
||||
- `inverted(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)"
|
||||
|
||||
Being a type of skipping indexes, inverted indexes can be dropped or added to a column after table creation:
|
||||
Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE tbl DROP INDEX inv_idx;
|
||||
ALTER TABLE tbl ADD INDEX inv_idx(s) TYPE inverted(2) GRANULARITY 1;
|
||||
ALTER TABLE tab DROP INDEX inv_idx;
|
||||
ALTER TABLE tab ADD INDEX inv_idx(s) TYPE inverted(2) GRANULARITY 1;
|
||||
```
|
||||
|
||||
To use the index, no special functions or syntax are required. Typical string search predicates automatically leverage the index. As
|
||||
examples, consider:
|
||||
|
||||
```sql
|
||||
SELECT * from tab WHERE s == 'Hello World;
|
||||
SELECT * from tab WHERE s IN (‘Hello’, ‘World’);
|
||||
SELECT * from tab WHERE s LIKE ‘%Hello%’;
|
||||
SELECT * from tab WHERE multiSearchAny(s, ‘Hello’, ‘World’);
|
||||
SELECT * from tab WHERE hasToken(s, ‘Hello’);
|
||||
SELECT * from tab WHERE multiSearchAll(s, [‘Hello’, ‘World’]);
|
||||
INSERT INTO tab(key, str) values (1, 'Hello World');
|
||||
SELECT * from tab WHERE str == 'Hello World';
|
||||
SELECT * from tab WHERE str IN ('Hello', 'World');
|
||||
SELECT * from tab WHERE str LIKE '%Hello%';
|
||||
SELECT * from tab WHERE multiSearchAny(str, ['Hello', 'World']);
|
||||
SELECT * from tab WHERE hasToken(str, 'Hello');
|
||||
```
|
||||
|
||||
The inverted index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`.
|
||||
|
||||
Like for other secondary indices, each column part has its own inverted index. Furthermore, each inverted index is internally divided into
|
||||
"segments". The existence and size of the segments is generally transparent to users but the segment size determines the memory consumption
|
||||
"segments". The existence and size of the segments are generally transparent to users but the segment size determines the memory consumption
|
||||
during index construction (e.g. when two parts are merged). Configuration parameter "max_digestion_size_per_segment" (default: 256 MB)
|
||||
controls the amount of data read consumed from the underlying column before a new segment is created. Incrementing the parameter raises the
|
||||
intermediate memory consumption for index constuction but also improves lookup performance since fewer segments need to be checked on
|
||||
intermediate memory consumption for index construction but also improves lookup performance since fewer segments need to be checked on
|
||||
average to evaluate a query.
|
||||
|
||||
Unlike other secondary indices, inverted indexes (for now) map to row numbers (row ids) instead of granule ids. The reason for this design
|
||||
is performance. In practice, users often search for multiple terms at once. For example, filter predicate `WHERE s LIKE '%little%' OR s LIKE
|
||||
'%big%'` can be evaluated directly using an inverted index by forming the union of the rowid lists for terms "little" and "big". This also
|
||||
means that parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in future).
|
||||
'%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also
|
||||
means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future).
|
||||
|
@ -923,15 +923,25 @@ Configuration markup:
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
:::note cache configuration
|
||||
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
|
||||
:::
|
||||
|
||||
### Configuring the S3 disk
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
@ -951,8 +961,6 @@ Optional parameters:
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
@ -960,6 +968,30 @@ Optional parameters:
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
This is the cache configuration from above:
|
||||
```xml
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
```
|
||||
|
||||
These parameters define the cache layer:
|
||||
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
|
||||
- `disk` — The name of the disk that will be cached.
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of memory) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
:::
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
|
@ -19,7 +19,7 @@ ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
|
||||
```
|
||||
|
||||
The `max_array_length` and `max_string_length` parameters specify maximum length of all
|
||||
array columns and strings correspondingly in generated data.
|
||||
array or map columns and strings correspondingly in generated data.
|
||||
|
||||
Generate table engine supports only `SELECT` queries.
|
||||
|
||||
|
259
docs/en/getting-started/example-datasets/laion.md
Normal file
259
docs/en/getting-started/example-datasets/laion.md
Normal file
@ -0,0 +1,259 @@
|
||||
# Laion-400M dataset
|
||||
|
||||
The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar.
|
||||
|
||||
The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md).
|
||||
|
||||
## Prepare data
|
||||
|
||||
Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data.
|
||||
|
||||
Download data and process it with simple `download.sh` script:
|
||||
|
||||
```bash
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet
|
||||
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy
|
||||
python3 process.py ${1}
|
||||
```
|
||||
|
||||
Where `process.py`:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
str_i = str(sys.argv[1])
|
||||
npy_file = "img_emb_" + str_i + '.npy'
|
||||
metadata_file = "metadata_" + str_i + '.parquet'
|
||||
text_npy = "text_emb_" + str_i + '.npy'
|
||||
|
||||
# load all files
|
||||
im_emb = np.load(npy_file)
|
||||
text_emb = np.load(text_npy)
|
||||
data = pd.read_parquet(metadata_file)
|
||||
|
||||
# combine them
|
||||
data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False)
|
||||
|
||||
# you can save more columns
|
||||
data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]]
|
||||
|
||||
# transform np.arrays to lists
|
||||
data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x))
|
||||
data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x))
|
||||
|
||||
# this small hack is needed becase caption sometimes contains all kind of quotes
|
||||
data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " "))
|
||||
|
||||
# save data to file
|
||||
data.to_csv(str_i + '.csv', header=False)
|
||||
|
||||
# previous files can be removed
|
||||
os.system(f"rm {npy_file} {metadata_file} {text_npy}")
|
||||
```
|
||||
|
||||
You can download data with
|
||||
```bash
|
||||
seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}'
|
||||
```
|
||||
|
||||
The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits.
|
||||
|
||||
## Create table for laion
|
||||
|
||||
Without indexes table can be created by
|
||||
|
||||
```sql
|
||||
CREATE TABLE laion_dataset
|
||||
(
|
||||
`id` Int64,
|
||||
`url` String,
|
||||
`caption` String,
|
||||
`similarity` Float32,
|
||||
`image_embedding` Array(Float32),
|
||||
`text_embedding` Array(Float32)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
Fill table with data:
|
||||
|
||||
```sql
|
||||
INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv'
|
||||
```
|
||||
|
||||
## Check data in table without indexes
|
||||
|
||||
Let's check the work of the following query on the part of the dataset (8 million records):
|
||||
|
||||
```sql
|
||||
select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30
|
||||
```
|
||||
|
||||
Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector.
|
||||
|
||||
**The result**
|
||||
|
||||
```
|
||||
┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐
|
||||
│ https://s3.amazonaws.com/filestore.rescuegroups.org/6685/pictures/animals/13884/13884995/63318230_463x463.jpg │ Adoptable Female Domestic Short Hair │
|
||||
│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/8/b/6/239905226.jpg │ Adopt A Pet :: Marzipan - New York, NY │
|
||||
│ http://d1n3ar4lqtlydb.cloudfront.net/9/2/4/248407625.jpg │ Adopt A Pet :: Butterscotch - New Castle, DE │
|
||||
│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/e/e/c/245615237.jpg │ Adopt A Pet :: Tiggy - Chicago, IL │
|
||||
│ http://pawsofcoronado.org/wp-content/uploads/2012/12/rsz_pumpkin.jpg │ Pumpkin an orange tabby kitten for adoption │
|
||||
│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/7/8/3/188700997.jpg │ Adopt A Pet :: Brian the Brad Pitt of cats - Frankfort, IL │
|
||||
│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/8/b/d/191533561.jpg │ Domestic Shorthair Cat for adoption in Mesa, Arizona - Charlie │
|
||||
│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/0/1/2/221698235.jpg │ Domestic Shorthair Cat for adoption in Marietta, Ohio - Daisy (Spayed) │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.)
|
||||
```
|
||||
|
||||
## Add indexes
|
||||
|
||||
Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md).
|
||||
|
||||
```sql
|
||||
CREATE TABLE laion_dataset
|
||||
(
|
||||
`id` Int64,
|
||||
`url` String,
|
||||
`caption` String,
|
||||
`similarity` Float32,
|
||||
`image_embedding` Array(Float32),
|
||||
`text_embedding` Array(Float32),
|
||||
INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000,
|
||||
INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search.
|
||||
Now let's check again with the same query:
|
||||
|
||||
```sql
|
||||
select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8
|
||||
```
|
||||
|
||||
**Result**
|
||||
|
||||
```
|
||||
┌─url──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption──────────────────────────────────────────────────────────────┐
|
||||
│ http://tse1.mm.bing.net/th?id=OIP.R1CUoYp_4hbeFSHBaaB5-gHaFj │ bed bugs and pets can cats carry bed bugs pets adviser │
|
||||
│ http://pet-uploads.adoptapet.com/1/9/c/1963194.jpg?336w │ Domestic Longhair Cat for adoption in Quincy, Massachusetts - Ashley │
|
||||
│ https://thumbs.dreamstime.com/t/cat-bed-12591021.jpg │ Cat on bed Stock Image │
|
||||
│ https://us.123rf.com/450wm/penta/penta1105/penta110500004/9658511-portrait-of-british-short-hair-kitten-lieing-at-sofa-on-sun.jpg │ Portrait of british short hair kitten lieing at sofa on sun. │
|
||||
│ https://www.easypetmd.com/sites/default/files/Wirehaired%20Vizsla%20(2).jpg │ Vizsla (Wirehaired) image 3 │
|
||||
│ https://images.ctfassets.net/yixw23k2v6vo/0000000200009b8800000000/7950f4e1c1db335ef91bb2bc34428de9/dog-cat-flickr-Impatience_1.jpg?w=600&h=400&fm=jpg&fit=thumb&q=65&fl=progressive │ dog and cat image │
|
||||
│ https://i1.wallbox.ru/wallpapers/small/201523/eaa582ee76a31fd.jpg │ cats, kittens, faces, tonkinese │
|
||||
│ https://www.baxterboo.com/images/breeds/medium/cairn-terrier.jpg │ Cairn Terrier Photo │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.)
|
||||
```
|
||||
|
||||
The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching.
|
||||
|
||||
## Scripts for embeddings
|
||||
|
||||
Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model.
|
||||
|
||||
### Text embeddings
|
||||
|
||||
`encode_text.py`:
|
||||
```python
|
||||
#!/usr/bin/python3
|
||||
import clip
|
||||
import torch
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
model, preprocess = clip.load("ViT-B/32", device=device)
|
||||
for text in sys.stdin:
|
||||
inputs = clip.tokenize(text)
|
||||
with torch.no_grad():
|
||||
text_features = model.encode_text(inputs)[0].tolist()
|
||||
sys.stdout.flush()
|
||||
```
|
||||
|
||||
`encode_text_function.xml`:
|
||||
```xml
|
||||
<functions>
|
||||
<function>
|
||||
<type>executable</type>
|
||||
<name>encode_text</name>
|
||||
<return_type>Array(Float32)</return_type>
|
||||
<argument>
|
||||
<type>String</type>
|
||||
<name>text</name>
|
||||
</argument>
|
||||
<format>TabSeparated</format>
|
||||
<command>encode_text.py</command>
|
||||
<command_read_timeout>1000000</command_read_timeout>
|
||||
</function>
|
||||
</functions>
|
||||
```
|
||||
|
||||
Now we can simply use:
|
||||
|
||||
```sql
|
||||
SELECT encode_text('cat');
|
||||
```
|
||||
|
||||
The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries
|
||||
|
||||
### Image embeddings
|
||||
|
||||
For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer)
|
||||
|
||||
`encode_picture.py`
|
||||
```python
|
||||
#!/usr/bin/python3
|
||||
import clip
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
model, preprocess = clip.load("ViT-B/32", device=device)
|
||||
for text in sys.stdin:
|
||||
image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device)
|
||||
with torch.no_grad():
|
||||
image_features = model.encode_image(image)[0].tolist()
|
||||
print(image_features)
|
||||
sys.stdout.flush()
|
||||
```
|
||||
|
||||
`encode_picture_function.xml`
|
||||
```xml
|
||||
<functions>
|
||||
<function>
|
||||
<type>executable_pool</type>
|
||||
<name>encode_picture</name>
|
||||
<return_type>Array(Float32)</return_type>
|
||||
<argument>
|
||||
<type>String</type>
|
||||
<name>path</name>
|
||||
</argument>
|
||||
<format>TabSeparated</format>
|
||||
<command>encode_picture.py</command>
|
||||
<command_read_timeout>1000000</command_read_timeout>
|
||||
</function>
|
||||
</functions>
|
||||
```
|
||||
|
||||
The query:
|
||||
```sql
|
||||
SELECT encode_picture('some/path/to/your/picture');
|
||||
```
|
@ -22,6 +22,7 @@ functions in ClickHouse. The sample datasets include:
|
||||
- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse
|
||||
- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
|
||||
- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
|
||||
- The [Laion dataset](../getting-started/example-datasets/laion.md) has an example of [Approximate nearest neighbor search indexes](../engines/table-engines/mergetree-family/annindexes.md) usage
|
||||
- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset
|
||||
- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3
|
||||
- [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) shows how to generate random data if none of the above fit your needs.
|
||||
|
@ -83,6 +83,7 @@ The supported formats are:
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [MySQLDump](#mysqldump) | ✔ | ✗ |
|
||||
| [Markdown](#markdown) | ✗ | ✔ |
|
||||
|
||||
|
||||
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section.
|
||||
@ -1209,6 +1210,7 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
|
||||
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
|
||||
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
|
||||
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
|
||||
@ -2347,3 +2349,26 @@ FROM file(dump.sql, MySQLDump)
|
||||
│ 3 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## Markdown {#markdown}
|
||||
|
||||
You can export results using [Markdown](https://en.wikipedia.org/wiki/Markdown) format to generate output ready to be pasted into your `.md` files:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
number,
|
||||
number * 2
|
||||
FROM numbers(5)
|
||||
FORMAT Markdown
|
||||
```
|
||||
```results
|
||||
| number | multiply(number, 2) |
|
||||
|-:|-:|
|
||||
| 0 | 0 |
|
||||
| 1 | 2 |
|
||||
| 2 | 4 |
|
||||
| 3 | 6 |
|
||||
| 4 | 8 |
|
||||
```
|
||||
|
||||
Markdown table will be generated automatically and can be used on markdown-enabled platforms, like Github. This format is used only for output.
|
||||
|
@ -56,6 +56,19 @@ sudo apt-get clean
|
||||
sudo apt-get autoclean
|
||||
```
|
||||
|
||||
### You Can't Get Packages With Yum Because Of Wrong Signature
|
||||
|
||||
Possible issue: the cache is wrong, maybe it's broken after updated GPG key in 2022-09.
|
||||
|
||||
The solution is to clean out the cache and lib directory for yum:
|
||||
|
||||
```
|
||||
sudo find /var/lib/yum/repos/ /var/cache/yum/ -name 'clickhouse-*' -type d -exec rm -rf {} +
|
||||
sudo rm -f /etc/yum.repos.d/clickhouse.repo
|
||||
```
|
||||
|
||||
After that follow the [install guide](../getting-started/install.md#from-rpm-packages)
|
||||
|
||||
## Connecting to the Server {#troubleshooting-accepts-no-connections}
|
||||
|
||||
Possible issues:
|
||||
|
@ -79,7 +79,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- ASYNC: backup or restore asynchronously
|
||||
- PARTITIONS: a list of partitions to restore
|
||||
- SETTINGS:
|
||||
- [`compression_method`](en/sql-reference/statements/create/table/#column-compression-codecs) and compression_level
|
||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
|
||||
|
@ -239,7 +239,7 @@ Example of configuration:
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<remote1>
|
||||
<host>localhost</host>
|
||||
<host>remote_host</host>
|
||||
<port>9000</port>
|
||||
<database>system</database>
|
||||
<user>foo</user>
|
||||
|
@ -6,14 +6,14 @@ sidebar_label: Query Result Cache [experimental]
|
||||
|
||||
# Query Result Cache [experimental]
|
||||
|
||||
The query result cache allows to compute SELECT queries just once and to serve further executions of the same query directly from the cache.
|
||||
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
The query result cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the
|
||||
cache. Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
|
||||
## Background, Design and Limitations
|
||||
|
||||
Query result caches can generally be viewed as transactionally consistent or inconsistent.
|
||||
|
||||
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the SELECT query changes
|
||||
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes
|
||||
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
|
||||
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
|
||||
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query result cache after v8.0) and
|
||||
@ -22,7 +22,7 @@ Query result caches can generally be viewed as transactionally consistent or inc
|
||||
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
|
||||
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
|
||||
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
|
||||
slowly enough that the database only needs to compute the report once (represented by the first SELECT query). Further queries can be
|
||||
slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be
|
||||
served directly from the query result cache. In this example, a reasonable validity period could be 30 min.
|
||||
|
||||
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
|
||||
@ -36,32 +36,45 @@ processing) where wrong results are returned.
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
Parameter [enable_experimental_query_result_cache](settings/settings.md#enable-experimental-query-result-cache) controls whether query
|
||||
results are inserted into / retrieved from the cache for the current query or session. For example, the first execution of query
|
||||
As long as the result cache is experimental it must be activated using the following configuration setting:
|
||||
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS enable_experimental_query_result_cache = true;
|
||||
```sql
|
||||
SET allow_experimental_query_result_cache = true;
|
||||
```
|
||||
|
||||
stores the query result into the query result cache. Subsequent executions of the same query (also with parameter
|
||||
`enable_experimental_query_result_cache = true`) will read the computed result directly from the cache.
|
||||
Afterwards, setting [use_query_result_cache](settings/settings.md#use-query-result-cache) can be used to control whether a specific query or
|
||||
all queries of the current session should utilize the query result cache. For example, the first execution of query
|
||||
|
||||
Sometimes, it is desirable to use the query result cache only passively, i.e. to allow reading from it but not writing into it (if the cache
|
||||
result is not stored yet). Parameter [enable_experimental_query_result_cache_passive_usage](settings/settings.md#enable-experimental-query-result-cache-passive-usage)
|
||||
instead of 'enable_experimental_query_result_cache' can be used for that.
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_result_cache = true;
|
||||
```
|
||||
|
||||
For maximum control, it is generally recommended to provide settings "enable_experimental_query_result_cache" or
|
||||
"enable_experimental_query_result_cache_passive_usage" only with specific queries. It is also possible to enable caching at user or profile
|
||||
level but one should keep in mind that all SELECT queries may return a cached results, including monitoring or debugging queries to system
|
||||
tables.
|
||||
will store the query result in the query result cache. Subsequent executions of the same query (also with parameter `use_query_result_cache
|
||||
= true`) will read the computed result from the cache and return it immediately.
|
||||
|
||||
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_result_cache](settings/settings.md#enable-writes-to-query-result-cache)
|
||||
and [enable_reads_from_query_result_cache](settings/settings.md#enable-reads-from-query-result-cache) (both `true` by default). The first
|
||||
settings controls whether query results are stored in the cache, whereas the second parameter determines if the database should try to
|
||||
retrieve query results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but
|
||||
not store its result in it:
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_result_cache = true, enable_writes_to_query_result_cache = false;
|
||||
```
|
||||
|
||||
For maximum control, it is generally recommended to provide settings "use_query_result_cache", "enable_writes_to_query_result_cache" and
|
||||
"enable_reads_from_query_result_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via
|
||||
`SET use_query_result_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to
|
||||
system tables may return cached results then.
|
||||
|
||||
The query result cache can be cleared using statement `SYSTEM DROP QUERY RESULT CACHE`. The content of the query result cache is displayed
|
||||
in system table `SYSTEM.QUERY_RESULT_CACHE`. The number of query result cache hits and misses are shown as events "QueryResultCacheHits" and
|
||||
"QueryResultCacheMisses" in system table `SYSTEM.EVENTS`. Both counters are only updated for SELECT queries which run with settings
|
||||
"enable_experimental_query_result_cache = true" or "enable_experimental_query_result_cache_passive_usage = true". Other queries do not
|
||||
affect the cache miss counter.
|
||||
"QueryResultCacheMisses" in system table `SYSTEM.EVENTS`. Both counters are only updated for `SELECT` queries which run with setting
|
||||
"use_query_result_cache = true". Other queries do not affect the cache miss counter.
|
||||
|
||||
The query result cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can
|
||||
be changed (see below) but doing so is not recommended for security reasons.
|
||||
@ -81,7 +94,7 @@ To define how long a query must run at least such that its result can be cached,
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_duration = 5000;
|
||||
SETTINGS use_query_result_cache = true, query_result_cache_min_query_duration = 5000;
|
||||
```
|
||||
|
||||
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
|
||||
@ -96,4 +109,4 @@ setting [query_result_cache_store_results_of_queries_with_nondeterministic_funct
|
||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||
be marked accessible by other users (i.e. shared) by supplying setting
|
||||
[query_result_cache_share_between_users]{settings/settings.md#query-result-cache-share-between-users}.
|
||||
[query_result_cache_share_between_users](settings/settings.md#query-result-cache-share-between-users).
|
||||
|
@ -106,21 +106,20 @@ Possible values:
|
||||
Default value: 1.
|
||||
|
||||
The delay (in milliseconds) for `INSERT` is calculated by the formula:
|
||||
|
||||
```code
|
||||
max_k = parts_to_throw_insert - parts_to_delay_insert
|
||||
k = 1 + parts_count_in_partition - parts_to_delay_insert
|
||||
delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k)
|
||||
```
|
||||
For example, if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds.
|
||||
|
||||
Starting from version 23.1 formula has been changed to:
|
||||
```code
|
||||
allowed_parts_over_threshold = parts_to_throw_insert - parts_to_delay_insert + 1
|
||||
allowed_parts_over_threshold = parts_to_throw_insert - parts_to_delay_insert
|
||||
parts_over_threshold = parts_count_in_partition - parts_to_delay_insert + 1
|
||||
delay_milliseconds = max(min_delay_to_insert_ms, (max_delay_to_insert * 1000) * parts_over_threshold / allowed_parts_over_threshold)
|
||||
```
|
||||
|
||||
For example if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds.
|
||||
For example, if a partition has 224 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, min_delay_to_insert_ms = 10, `INSERT` is delayed for `max( 10, 1 * 1000 * (224 - 150 + 1) / (300 - 150) ) = 500` milliseconds.
|
||||
|
||||
## max_parts_in_total {#max-parts-in-total}
|
||||
|
||||
|
@ -500,6 +500,12 @@ Parse named tuple columns as JSON objects.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_ignore_unknown_keys_in_named_tuple {#input_format_json_ignore_unknown_keys_in_named_tuple}
|
||||
|
||||
Ignore unknown keys in json object for named tuples.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
|
||||
|
||||
Insert default values for missing elements in JSON object while parsing named tuple.
|
||||
|
@ -1355,27 +1355,39 @@ Possible values:
|
||||
|
||||
Default value: `3`.
|
||||
|
||||
## enable_experimental_query_result_cache {#enable-experimental-query-result-cache}
|
||||
## use_query_result_cache {#use-query-result-cache}
|
||||
|
||||
If turned on, results of SELECT queries are stored in and (if available) retrieved from the [query result cache](../query-result-cache.md).
|
||||
If turned on, SELECT queries may utilize the [query result cache](../query-result-cache.md). Parameters [enable_reads_from_query_result_cache](#enable-reads-from-query-result-cache)
|
||||
and [enable_writes_to_query_result_cache](#enable-writes-to-query-result-cache) control in more detail how the cache is used.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Yes
|
||||
- 1 - No
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## enable_reads_from_query_result_cache {#enable-reads-from-query-result-cache}
|
||||
|
||||
If turned on, results of SELECT queries are retrieved from the [query result cache](../query-result-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled
|
||||
- 1 - Enabled
|
||||
|
||||
Default value: `0`.
|
||||
Default value: `1`.
|
||||
|
||||
## enable_experimental_query_result_cache_passive_usage {#enable-experimental-query-result-cache-passive-usage}
|
||||
## enable_writes_to_query_result_cache {#enable-writes-to-query-result-cache}
|
||||
|
||||
If turned on, results of SELECT queries are (if available) retrieved from the [query result cache](../query-result-cache.md).
|
||||
If turned on, results of SELECT queries are stored in the [query result cache](../query-result-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled
|
||||
- 1 - Enabled
|
||||
|
||||
Default value: `0`.
|
||||
Default value: `1`.
|
||||
|
||||
## query_result_cache_store_results_of_queries_with_nondeterministic_functions {#query-result-cache-store-results-of-queries-with-nondeterministic-functions}
|
||||
|
||||
@ -1686,6 +1698,49 @@ SELECT * FROM test_table
|
||||
└───┘
|
||||
```
|
||||
|
||||
## insert_keeper_max_retries
|
||||
|
||||
The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Retries are disabled
|
||||
|
||||
Default value: 0
|
||||
|
||||
Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
|
||||
The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
|
||||
```
|
||||
timeout = min(insert_keeper_retry_max_backoff_ms, latest_timeout * 2)
|
||||
```
|
||||
|
||||
For example, if `insert_keeper_retry_initial_backoff_ms=100`, `insert_keeper_retry_max_backoff_ms=10000` and `insert_keeper_max_retries=8` then timeouts will be `100, 200, 400, 800, 1600, 3200, 6400, 10000`.
|
||||
|
||||
Apart from fault tolerance, the retries aim to provide a better user experience - they allow to avoid returning an error during INSERT execution if Keeper is restarted, for example, due to an upgrade.
|
||||
|
||||
## insert_keeper_retry_initial_backoff_ms {#insert_keeper_retry_initial_backoff_ms}
|
||||
|
||||
Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT query execution
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No timeout
|
||||
|
||||
Default value: 100
|
||||
|
||||
## insert_keeper_retry_max_backoff_ms {#insert_keeper_retry_max_backoff_ms}
|
||||
|
||||
Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT query execution
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Maximum timeout is not limited
|
||||
|
||||
Default value: 10000
|
||||
|
||||
## max_network_bytes {#settings-max-network-bytes}
|
||||
|
||||
Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
|
||||
@ -1969,6 +2024,21 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## optimize_skip_merged_partitions {#optimize-skip-merged-partitions}
|
||||
|
||||
Enables or disables optimization for [OPTIMIZE TABLE ... FINAL](../../sql-reference/statements/optimize.md) query if there is only one part with level > 0 and it doesn't have expired TTL.
|
||||
|
||||
- `OPTIMIZE TABLE ... FINAL SETTINGS optimize_skip_merged_partitions=1`
|
||||
|
||||
By default, `OPTIMIZE TABLE ... FINAL` query rewrites the one part even if there is only a single part.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 1 - Enable optimization.
|
||||
- 0 - Disable optimization.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns}
|
||||
|
||||
Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read.
|
||||
|
@ -19,11 +19,11 @@ Example:
|
||||
|
||||
``` sql
|
||||
SELECT maxMap(a, b)
|
||||
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
|
||||
FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1]))
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─maxMap(a, b)──────┐
|
||||
│ ([1,2,3],[2,2,1]) │
|
||||
└───────────────────┘
|
||||
┌─maxMap(a, b)───────────┐
|
||||
│ [['x','y','z'],[2,3,1]]│
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -6,3 +6,7 @@ sidebar_position: 4
|
||||
# sum
|
||||
|
||||
Calculates the sum. Only works for numbers.
|
||||
|
||||
```
|
||||
SELECT sum(salary) FROM employees;
|
||||
```
|
||||
|
@ -28,15 +28,16 @@ Returns an array of the values with maximum approximate sum of weights.
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT topKWeighted(10)(number, number) FROM numbers(1000)
|
||||
SELECT topKWeighted(2)(k, w) FROM
|
||||
VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─topKWeighted(10)(number, number)──────────┐
|
||||
│ [999,998,997,996,995,994,993,992,991,990] │
|
||||
└───────────────────────────────────────────┘
|
||||
┌─topKWeighted(2)(k, w)──┐
|
||||
│ ['z','x'] │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
@ -290,15 +290,11 @@ This storage method works the same way as hashed and allows using date/time (arb
|
||||
Example: The table contains discounts for each advertiser in the format:
|
||||
|
||||
``` text
|
||||
+---------|-------------|-------------|------+
|
||||
| advertiser id | discount start date | discount end date | amount |
|
||||
+===============+=====================+===================+========+
|
||||
| 123 | 2015-01-01 | 2015-01-15 | 0.15 |
|
||||
+---------|-------------|-------------|------+
|
||||
| 123 | 2015-01-16 | 2015-01-31 | 0.25 |
|
||||
+---------|-------------|-------------|------+
|
||||
| 456 | 2015-01-01 | 2015-01-15 | 0.05 |
|
||||
+---------|-------------|-------------|------+
|
||||
┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐
|
||||
│ 123 │ 2015-01-16 │ 2015-01-31 │ 0.25 │
|
||||
│ 123 │ 2015-01-01 │ 2015-01-15 │ 0.15 │
|
||||
│ 456 │ 2015-01-01 │ 2015-01-15 │ 0.05 │
|
||||
└───────────────┴─────────────────────┴───────────────────┴────────┘
|
||||
```
|
||||
|
||||
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
|
||||
@ -310,16 +306,22 @@ Values of `range_min` and `range_max` should fit in `Int64` type.
|
||||
Example:
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
<range_hashed>
|
||||
<!-- Strategy for overlapping ranges (min/max). Default: min (return a matching range with the min(range_min -> range_max) value) -->
|
||||
<range_lookup_strategy>min</range_lookup_strategy>
|
||||
</range_hashed>
|
||||
</layout>
|
||||
<structure>
|
||||
<id>
|
||||
<name>Id</name>
|
||||
<name>advertiser_id</name>
|
||||
</id>
|
||||
<range_min>
|
||||
<name>first</name>
|
||||
<name>discount_start_date</name>
|
||||
<type>Date</type>
|
||||
</range_min>
|
||||
<range_max>
|
||||
<name>last</name>
|
||||
<name>discount_end_date</name>
|
||||
<type>Date</type>
|
||||
</range_max>
|
||||
...
|
||||
@ -328,17 +330,17 @@ Example:
|
||||
or
|
||||
|
||||
``` sql
|
||||
CREATE DICTIONARY somedict (
|
||||
id UInt64,
|
||||
first Date,
|
||||
last Date,
|
||||
advertiser_id UInt64
|
||||
CREATE DICTIONARY discounts_dict (
|
||||
advertiser_id UInt64,
|
||||
discount_start_date Date,
|
||||
discount_end_date Date,
|
||||
amount Float64
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'date_table'))
|
||||
SOURCE(CLICKHOUSE(TABLE 'discounts'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN first MAX last)
|
||||
LAYOUT(RANGE_HASHED(range_lookup_strategy 'max'))
|
||||
RANGE(MIN discount_start_date MAX discount_end_date)
|
||||
```
|
||||
|
||||
To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected:
|
||||
@ -349,16 +351,17 @@ dictGet('dict_name', 'attr_name', id, date)
|
||||
Query example:
|
||||
|
||||
``` sql
|
||||
SELECT dictGet('somedict', 'advertiser_id', 1, '2022-10-20 23:20:10.000'::DateTime64::UInt64);
|
||||
SELECT dictGet('discounts_dict', 'amount', 1, '2022-10-20'::Date);
|
||||
```
|
||||
|
||||
This function returns the value for the specified `id`s and the date range that includes the passed date.
|
||||
|
||||
Details of the algorithm:
|
||||
|
||||
- If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary.
|
||||
- If there are overlapping ranges, it returns value for any (random) range.
|
||||
- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides.
|
||||
- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type.
|
||||
- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them.
|
||||
- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them.
|
||||
- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value.
|
||||
|
||||
Configuration example:
|
||||
|
||||
@ -407,6 +410,108 @@ PRIMARY KEY Abcdef
|
||||
RANGE(MIN StartTimeStamp MAX EndTimeStamp)
|
||||
```
|
||||
|
||||
Configuration example with overlapping ranges and open ranges:
|
||||
|
||||
```sql
|
||||
CREATE TABLE discounts
|
||||
(
|
||||
advertiser_id UInt64,
|
||||
discount_start_date Date,
|
||||
discount_end_date Nullable(Date),
|
||||
amount Float64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1);
|
||||
INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2);
|
||||
INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3);
|
||||
INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4);
|
||||
INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5);
|
||||
INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6);
|
||||
|
||||
SELECT * FROM discounts ORDER BY advertiser_id, discount_start_date;
|
||||
┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐
|
||||
│ 1 │ 2015-01-01 │ ᴺᵁᴸᴸ │ 0.1 │
|
||||
│ 1 │ 2015-01-15 │ ᴺᵁᴸᴸ │ 0.2 │
|
||||
│ 2 │ 2015-01-01 │ 2015-01-15 │ 0.3 │
|
||||
│ 2 │ 2015-01-04 │ 2015-01-10 │ 0.4 │
|
||||
│ 3 │ 1970-01-01 │ 2015-01-15 │ 0.5 │
|
||||
│ 3 │ 1970-01-01 │ 2015-01-10 │ 0.6 │
|
||||
└───────────────┴─────────────────────┴───────────────────┴────────┘
|
||||
|
||||
-- RANGE_LOOKUP_STRATEGY 'max'
|
||||
|
||||
CREATE DICTIONARY discounts_dict
|
||||
(
|
||||
advertiser_id UInt64,
|
||||
discount_start_date Date,
|
||||
discount_end_date Nullable(Date),
|
||||
amount Float64
|
||||
)
|
||||
PRIMARY KEY advertiser_id
|
||||
SOURCE(CLICKHOUSE(TABLE discounts))
|
||||
LIFETIME(MIN 600 MAX 900)
|
||||
LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max'))
|
||||
RANGE(MIN discount_start_date MAX discount_end_date);
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
|
||||
┌─res─┐
|
||||
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
┌─res─┐
|
||||
│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1)
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res;
|
||||
┌─res─┐
|
||||
│ 0.4 │ -- two ranges are matching, range_min 2015-01-04 (0.4) is bigger than 2015-01-01 (0.3)
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res;
|
||||
┌─res─┐
|
||||
│ 0.5 │ -- two ranges are matching, range_min are equal, 2015-01-15 (0.5) is bigger than 2015-01-10 (0.6)
|
||||
└─────┘
|
||||
|
||||
DROP DICTIONARY discounts_dict;
|
||||
|
||||
-- RANGE_LOOKUP_STRATEGY 'min'
|
||||
|
||||
CREATE DICTIONARY discounts_dict
|
||||
(
|
||||
advertiser_id UInt64,
|
||||
discount_start_date Date,
|
||||
discount_end_date Nullable(Date),
|
||||
amount Float64
|
||||
)
|
||||
PRIMARY KEY advertiser_id
|
||||
SOURCE(CLICKHOUSE(TABLE discounts))
|
||||
LIFETIME(MIN 600 MAX 900)
|
||||
LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'min'))
|
||||
RANGE(MIN discount_start_date MAX discount_end_date);
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
|
||||
┌─res─┐
|
||||
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
┌─res─┐
|
||||
│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2)
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res;
|
||||
┌─res─┐
|
||||
│ 0.3 │ -- two ranges are matching, range_min 2015-01-01 (0.3) is less than 2015-01-04 (0.4)
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res;
|
||||
┌─res─┐
|
||||
│ 0.6 │ -- two ranges are matching, range_min are equal, 2015-01-10 (0.6) is less than 2015-01-15 (0.5)
|
||||
└─────┘
|
||||
```
|
||||
|
||||
### complex_key_range_hashed
|
||||
|
||||
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values (see [range_hashed](#range-hashed)). This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md).
|
||||
|
@ -209,10 +209,25 @@ Aliases: `DAYOFMONTH`, `DAY`.
|
||||
|
||||
## toDayOfWeek
|
||||
|
||||
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
|
||||
Converts a date or date with time to a UInt8 number containing the number of the day of the week.
|
||||
|
||||
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument.
|
||||
|
||||
| Mode | First day of week | Range |
|
||||
|------|-------------------|------------------------------------------------|
|
||||
| 0 | Monday | 1-7: Monday = 1, Tuesday = 2, ..., Sunday = 7 |
|
||||
| 1 | Monday | 0-6: Monday = 0, Tuesday = 1, ..., Sunday = 6 |
|
||||
| 2 | Sunday | 0-6: Sunday = 0, Monday = 1, ..., Saturday = 6 |
|
||||
| 3 | Sunday | 1-7: Sunday = 1, Monday = 2, ..., Saturday = 7 |
|
||||
|
||||
Alias: `DAYOFWEEK`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toDayOfWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
## toHour
|
||||
|
||||
Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23).
|
||||
@ -316,11 +331,17 @@ If `toLastDayOfMonth` is called with an argument of type `Date` greater then 214
|
||||
Rounds down a date, or date with time, to the nearest Monday.
|
||||
Returns the date.
|
||||
|
||||
## toStartOfWeek(t\[,mode\])
|
||||
## toStartOfWeek
|
||||
|
||||
Rounds down a date, or date with time, to the nearest Sunday or Monday by mode.
|
||||
Rounds a date or date with time down to the nearest Sunday or Monday.
|
||||
Returns the date.
|
||||
The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used.
|
||||
The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, mode is assumed as 0.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toStartOfWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
## toStartOfDay
|
||||
|
||||
@ -455,10 +476,12 @@ Converts a date, or date with time, to a UInt16 number containing the ISO Year n
|
||||
|
||||
Converts a date, or date with time, to a UInt8 number containing the ISO Week number.
|
||||
|
||||
## toWeek(date\[,mode\])
|
||||
## toWeek
|
||||
|
||||
This function returns the week number for date or datetime. The two-argument form of `toWeek()` enables you to specify whether the week starts on Sunday or Monday and whether the return value should be in the range from 0 to 53 or from 1 to 53. If the mode argument is omitted, the default mode is 0.
|
||||
|
||||
`toISOWeek()` is a compatibility function that is equivalent to `toWeek(date,3)`.
|
||||
|
||||
This function returns the week number for date or datetime. The two-argument form of toWeek() enables you to specify whether the week starts on Sunday or Monday and whether the return value should be in the range from 0 to 53 or from 1 to 53. If the mode argument is omitted, the default mode is 0.
|
||||
`toISOWeek()`is a compatibility function that is equivalent to `toWeek(date,3)`.
|
||||
The following table describes how the mode argument works.
|
||||
|
||||
| Mode | First day of week | Range | Week 1 is the first week … |
|
||||
@ -482,13 +505,15 @@ For mode values with a meaning of “with 4 or more days this year,” weeks are
|
||||
|
||||
For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1. It does not matter how many days in the new year the week contained, even if it contained only one day.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toWeek(date, [, mode][, Timezone])
|
||||
toWeek(t[, mode[, time_zone]])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `date` – Date or DateTime.
|
||||
- `t` – Date or DateTime.
|
||||
- `mode` – Optional parameter, Range of values is \[0,9\], default is 0.
|
||||
- `Timezone` – Optional parameter, it behaves like any other conversion function.
|
||||
|
||||
@ -504,13 +529,19 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
|
||||
└────────────┴───────┴───────┴───────┘
|
||||
```
|
||||
|
||||
## toYearWeek(date\[,mode\])
|
||||
## toYearWeek
|
||||
|
||||
Returns year and week for a date. The year in the result may be different from the year in the date argument for the first and the last week of the year.
|
||||
|
||||
The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used.
|
||||
The mode argument works exactly like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used.
|
||||
|
||||
`toISOYear()`is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`.
|
||||
`toISOYear()` is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toYearWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
@ -1263,12 +1294,12 @@ Using replacement fields, you can define a pattern for the resulting string.
|
||||
|
||||
|
||||
| Placeholder | Description | Presentation | Examples |
|
||||
| ----------- | ----------- | ------------- | -------- |
|
||||
| ----------- | ---------------------------------------- | ------------- | ---------------------------------- |
|
||||
| G | era | text | AD |
|
||||
| C | century of era (>=0) | number | 20 |
|
||||
| Y | year of era (>=0) | year | 1996 |
|
||||
| x | weekyear(not supported yet) | year | 1996 |
|
||||
| w | week of weekyear(not supported yet) | number | 27 |
|
||||
| x | weekyear (not supported yet) | year | 1996 |
|
||||
| w | week of weekyear (not supported yet) | number | 27 |
|
||||
| e | day of week | number | 2 |
|
||||
| E | day of week | text | Tuesday; Tue |
|
||||
| y | year | year | 1996 |
|
||||
@ -1282,10 +1313,10 @@ Using replacement fields, you can define a pattern for the resulting string.
|
||||
| k | clockhour of day (1~24) | number | 24 |
|
||||
| m | minute of hour | number | 30 |
|
||||
| s | second of minute | number | 55 |
|
||||
| S | fraction of second(not supported yet) | number | 978 |
|
||||
| z | time zone(short name not supported yet) | text | Pacific Standard Time; PST |
|
||||
| Z | time zone offset/id(not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
|
||||
| ' | escape for text | delimiter| |
|
||||
| S | fraction of second (not supported yet) | number | 978 |
|
||||
| z | time zone (short name not supported yet) | text | Pacific Standard Time; PST |
|
||||
| Z | time zone offset/id (not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
|
||||
| ' | escape for text | delimiter | |
|
||||
| '' | single quote | literal | ' |
|
||||
|
||||
**Example**
|
||||
|
@ -45,37 +45,38 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
|
||||
|
||||
Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
|
||||
## MD5
|
||||
## MD5 {#hash_functions-md5}
|
||||
|
||||
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
|
||||
Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value.
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
```sql
|
||||
sipHash64(par1,...)
|
||||
```
|
||||
|
||||
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
|
||||
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function.
|
||||
|
||||
Function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. After hashing all the input parameters, the function gets the array of hashes.
|
||||
2. Function takes the first and the second elements and calculates a hash for the array of them.
|
||||
3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them.
|
||||
4. The previous step is repeated for all the remaining elements of the initial hash array.
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
||||
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
|
||||
The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
|
||||
|
||||
Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
@ -84,13 +85,45 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
|
||||
|
||||
```response
|
||||
┌──────────────SipHash─┬─type───┐
|
||||
│ 13726873534472839665 │ UInt64 │
|
||||
│ 11400366955626497465 │ UInt64 │
|
||||
└──────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## sipHash64Keyed
|
||||
|
||||
Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
sipHash64Keyed((k0, k1), par1,...)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─────────────SipHash─┬─type───┐
|
||||
│ 8017656310194184311 │ UInt64 │
|
||||
└─────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## sipHash128
|
||||
|
||||
Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits.
|
||||
Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -100,13 +133,11 @@ sipHash128(par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
|
||||
Same as for [sipHash64](#hash_functions-siphash64).
|
||||
|
||||
**Returned value**
|
||||
|
||||
A 128-bit `SipHash` hash value.
|
||||
|
||||
Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
|
||||
A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -124,6 +155,40 @@ Result:
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## sipHash128Keyed
|
||||
|
||||
Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
sipHash128Keyed((k0, k1), par1,...)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hex(sipHash128Keyed((506097522914230528, 1084818905618843912),'foo', '\x01', 3));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─hex(sipHash128Keyed((506097522914230528, 1084818905618843912), 'foo', '', 3))─┐
|
||||
│ B8467F65C8B4CFD9A5F8BD733917D9BF │
|
||||
└───────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## cityHash64
|
||||
|
||||
Produces a 64-bit [CityHash](https://github.com/google/cityhash) hash value.
|
||||
|
@ -95,6 +95,32 @@ Result:
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
If argument `needle` is empty the following rules apply:
|
||||
- if no `start_pos` was specified: return `1`
|
||||
- if `start_pos = 0`: return `1`
|
||||
- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos`
|
||||
- otherwise: return `0`
|
||||
|
||||
The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
position('abc', ''),
|
||||
position('abc', '', 0),
|
||||
position('abc', '', 1),
|
||||
position('abc', '', 2),
|
||||
position('abc', '', 3),
|
||||
position('abc', '', 4),
|
||||
position('abc', '', 5)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
|
||||
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
|
||||
└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
**Examples for POSITION(needle IN haystack) syntax**
|
||||
|
||||
Query:
|
||||
|
@ -12,7 +12,7 @@ The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
|
||||
|
||||
|
@ -110,7 +110,7 @@ LIFETIME(MIN 0 MAX 1000)
|
||||
### Create a dictionary from a file available by HTTP(S)
|
||||
|
||||
```sql
|
||||
statement: CREATE DICTIONARY default.taxi_zone_dictionary
|
||||
CREATE DICTIONARY default.taxi_zone_dictionary
|
||||
(
|
||||
`LocationID` UInt16 DEFAULT 0,
|
||||
`Borough` String,
|
||||
|
@ -293,7 +293,7 @@ These codecs are designed to make compression more effective by using specific f
|
||||
|
||||
#### Gorilla
|
||||
|
||||
`Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
|
||||
#### FPC
|
||||
|
||||
|
@ -23,7 +23,7 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin
|
||||
|
||||
- If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting.
|
||||
- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression).
|
||||
- If you specify `FINAL`, optimization is performed even when all the data is already in one part. Also merge is forced even if concurrent merges are performed.
|
||||
- If you specify `FINAL`, optimization is performed even when all the data is already in one part. You can control this behaviour with [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Also, the merge is forced even if concurrent merges are performed.
|
||||
- If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine.
|
||||
|
||||
You can specify how long (in seconds) to wait for inactive replicas to execute `OPTIMIZE` queries by the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting.
|
||||
|
@ -362,3 +362,15 @@ Allows to drop filesystem cache.
|
||||
```sql
|
||||
SYSTEM DROP FILESYSTEM CACHE
|
||||
```
|
||||
|
||||
### SYNC FILE CACHE
|
||||
|
||||
:::note
|
||||
It's too heavy and has potential for misuse.
|
||||
:::
|
||||
|
||||
Will do sync syscall.
|
||||
|
||||
```sql
|
||||
SYSTEM SYNC FILE CACHE
|
||||
```
|
||||
|
@ -8,7 +8,7 @@ sidebar_label: generateRandom
|
||||
|
||||
Generates random data with given schema.
|
||||
Allows to populate test tables with data.
|
||||
Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`.
|
||||
Not all types are supported.
|
||||
|
||||
``` sql
|
||||
generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]])
|
||||
@ -18,7 +18,7 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri
|
||||
|
||||
- `name` — Name of corresponding column.
|
||||
- `TypeName` — Type of corresponding column.
|
||||
- `max_array_length` — Maximum array length for all generated arrays. Defaults to `10`.
|
||||
- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`.
|
||||
- `max_string_length` — Maximum string length for all generated strings. Defaults to `10`.
|
||||
- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated.
|
||||
|
||||
|
@ -27,7 +27,7 @@ $ cat /etc/clickhouse-server/config.d/named_collections.xml
|
||||
|
||||
## Именованные соединения для доступа к S3
|
||||
|
||||
Описание параметров смотри [Табличная Функция S3](../sql-reference/table-functions/s3.md).
|
||||
Описание параметров смотрите [Табличная Функция S3](../sql-reference/table-functions/s3.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -75,7 +75,7 @@ SELECT * FROM s3_engine_table LIMIT 3;
|
||||
|
||||
## Пример использования именованных соединений с базой данных MySQL
|
||||
|
||||
Описание параметров смотри [mysql](../sql-reference/table-functions/mysql.md).
|
||||
Описание параметров смотрите [mysql](../sql-reference/table-functions/mysql.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -147,7 +147,7 @@ SELECT dictGet('dict', 'B', 2);
|
||||
|
||||
## Пример использования именованных соединений с базой данных PostgreSQL
|
||||
|
||||
Описание параметров смотри [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -227,3 +227,58 @@ SELECT dictGet('dict', 'b', 2);
|
||||
│ two │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## Пример использования именованных соединений с удалённой базой данных Сlickhouse
|
||||
|
||||
Описание параметров смотрите [remote](../sql-reference/table-functions/remote.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<remote1>
|
||||
<host>remote_host</host>
|
||||
<port>9000</port>
|
||||
<database>system</database>
|
||||
<user>foo</user>
|
||||
<password>secret</password>
|
||||
</remote1>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
### Пример использования именованных соединений с табличной функцией remote/remoteSecure
|
||||
|
||||
```sql
|
||||
SELECT * FROM remote(remote1, table = one);
|
||||
┌─dummy─┐
|
||||
│ 0 │
|
||||
└───────┘
|
||||
|
||||
SELECT * FROM remote(remote1, database = merge(system, '^one'));
|
||||
┌─dummy─┐
|
||||
│ 0 │
|
||||
└───────┘
|
||||
|
||||
INSERT INTO FUNCTION remote(remote1, database = default, table = test) VALUES (1,'a');
|
||||
|
||||
SELECT * FROM remote(remote1, database = default, table = test);
|
||||
┌─a─┬─b─┐
|
||||
│ 1 │ a │
|
||||
└───┴───┘
|
||||
```
|
||||
|
||||
### Пример использования именованных соединений с внешним словарем с источником удалённым сервером Clickhouse
|
||||
|
||||
```sql
|
||||
CREATE DICTIONARY dict(a Int64, b String)
|
||||
PRIMARY KEY a
|
||||
SOURCE(CLICKHOUSE(NAME remote1 TABLE test DB default))
|
||||
LIFETIME(MIN 1 MAX 2)
|
||||
LAYOUT(HASHED());
|
||||
|
||||
SELECT dictGet('dict', 'b', 1);
|
||||
┌─dictGet('dict', 'b', 1)─┐
|
||||
│ a │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
@ -1997,6 +1997,21 @@ SELECT * FROM test_table
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
## optimize_skip_merged_partitions {#optimize-skip-merged-partitions}
|
||||
|
||||
Включает или отключает оптимизацию для запроса [OPTIMIZE TABLE ... FINAL](../../sql-reference/statements/optimize.md), когда есть только один парт с level > 0 и неистекший TTL.
|
||||
|
||||
- `OPTIMIZE TABLE ... FINAL SETTINGS optimize_skip_merged_partitions=1`
|
||||
|
||||
По умолчанию, `OPTIMIZE TABLE ... FINAL` перезапишет даже один парт.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 1 - Включена
|
||||
- 0 - Выключена
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns}
|
||||
|
||||
Включает или отключает оптимизацию путем преобразования некоторых функций к чтению подстолбцов, таким образом уменьшая объем данных для чтения.
|
||||
|
@ -24,7 +24,7 @@ OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION I
|
||||
- По умолчанию, если запросу `OPTIMIZE` не удалось выполнить слияние, то
|
||||
ClickHouse не оповещает клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
|
||||
- Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
|
||||
- Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
|
||||
- Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Можно контролировать с помощью настройки [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
|
||||
- Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех столбцах), имеет смысл только для движка MergeTree.
|
||||
|
||||
Вы можете указать время ожидания (в секундах) выполнения запросов `OPTIMIZE` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout).
|
||||
@ -197,3 +197,4 @@ SELECT * FROM example;
|
||||
│ 1 │ 1 │ 2 │ 3 │
|
||||
└─────────────┴───────────────┴───────┴───────────────┘
|
||||
```
|
||||
|
||||
|
@ -277,7 +277,7 @@ private:
|
||||
}
|
||||
|
||||
if (queries.empty())
|
||||
throw Exception("Empty list of queries.", ErrorCodes::EMPTY_DATA_PASSED);
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Empty list of queries.");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -127,6 +127,69 @@ void Client::showWarnings()
|
||||
}
|
||||
}
|
||||
|
||||
void Client::parseConnectionsCredentials()
|
||||
{
|
||||
/// It is not possible to correctly handle multiple --host --port options.
|
||||
if (hosts_and_ports.size() >= 2)
|
||||
return;
|
||||
|
||||
String host;
|
||||
std::optional<UInt16> port;
|
||||
if (hosts_and_ports.empty())
|
||||
{
|
||||
host = config().getString("host", "localhost");
|
||||
if (config().has("port"))
|
||||
port = config().getInt("port");
|
||||
}
|
||||
else
|
||||
{
|
||||
host = hosts_and_ports.front().host;
|
||||
port = hosts_and_ports.front().port;
|
||||
}
|
||||
|
||||
Strings keys;
|
||||
config().keys("connections_credentials", keys);
|
||||
for (const auto & connection : keys)
|
||||
{
|
||||
const String & prefix = "connections_credentials." + connection;
|
||||
|
||||
const String & connection_name = config().getString(prefix + ".name", "");
|
||||
if (connection_name != host)
|
||||
continue;
|
||||
|
||||
String connection_hostname;
|
||||
if (config().has(prefix + ".hostname"))
|
||||
connection_hostname = config().getString(prefix + ".hostname");
|
||||
else
|
||||
connection_hostname = connection_name;
|
||||
|
||||
/// Set "host" unconditionally (since it is used as a "name"), while
|
||||
/// other options only if they are not set yet (config.xml/cli
|
||||
/// options).
|
||||
config().setString("host", connection_hostname);
|
||||
if (!hosts_and_ports.empty())
|
||||
hosts_and_ports.front().host = connection_hostname;
|
||||
|
||||
if (config().has(prefix + ".port") && !port.has_value())
|
||||
config().setInt("port", config().getInt(prefix + ".port"));
|
||||
if (config().has(prefix + ".secure") && !config().has("secure"))
|
||||
config().setBool("secure", config().getBool(prefix + ".secure"));
|
||||
if (config().has(prefix + ".user") && !config().has("user"))
|
||||
config().setString("user", config().getString(prefix + ".user"));
|
||||
if (config().has(prefix + ".password") && !config().has("password"))
|
||||
config().setString("password", config().getString(prefix + ".password"));
|
||||
if (config().has(prefix + ".database") && !config().has("database"))
|
||||
config().setString("database", config().getString(prefix + ".database"));
|
||||
if (config().has(prefix + ".history_file") && !config().has("history_file"))
|
||||
{
|
||||
String history_file = config().getString(prefix + ".history_file");
|
||||
if (history_file.starts_with("~") && !home_path.empty())
|
||||
history_file = home_path + "/" + history_file.substr(1);
|
||||
config().setString("history_file", history_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Make query to get all server warnings
|
||||
std::vector<String> Client::loadWarningMessages()
|
||||
{
|
||||
@ -216,6 +279,8 @@ void Client::initialize(Poco::Util::Application & self)
|
||||
if (env_password)
|
||||
config().setString("password", env_password);
|
||||
|
||||
parseConnectionsCredentials();
|
||||
|
||||
// global_context->setApplicationType(Context::ApplicationType::CLIENT);
|
||||
global_context->setQueryParameters(query_parameters);
|
||||
|
||||
@ -719,7 +784,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// uniformity.
|
||||
// Surprisingly, this is a client exception, because we get the
|
||||
// server exception w/o throwing (see onReceiveException()).
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
@ -854,7 +919,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,7 @@ protected:
|
||||
private:
|
||||
void printChangedSettings() const;
|
||||
void showWarnings();
|
||||
void parseConnectionsCredentials();
|
||||
std::vector<String> loadWarningMessages();
|
||||
};
|
||||
}
|
||||
|
@ -57,4 +57,28 @@
|
||||
|
||||
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
|
||||
-->
|
||||
|
||||
|
||||
<!-- Analog of .netrc -->
|
||||
<![CDATA[
|
||||
<connections_credentials>
|
||||
<connection>
|
||||
<!-- Name of the connection, host option for the client.
|
||||
"host" is not the same as "hostname" since you may want to have different settings for one host,
|
||||
and in this case you can add "prod" and "prod_readonly".
|
||||
|
||||
Default: "hostname" will be used. -->
|
||||
<name>default</name>
|
||||
<!-- Host that will be used for connection. -->
|
||||
<hostname>127.0.0.1</hostname>
|
||||
<port>9000</port>
|
||||
<secure>1</secure>
|
||||
<user>default</user>
|
||||
<password></password>
|
||||
<database></database>
|
||||
<!-- '~' is expanded to HOME, like in any shell -->
|
||||
<history_file></history_file>
|
||||
</connection>
|
||||
</connections_credentials>
|
||||
]]>
|
||||
</config>
|
||||
|
@ -165,9 +165,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
/// should throw exception early and make exception message more readable.
|
||||
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
|
||||
{
|
||||
throw Exception(
|
||||
"Can't format ASTInsertQuery with data, since data will be lost",
|
||||
DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA);
|
||||
throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA,
|
||||
"Can't format ASTInsertQuery with data, since data will be lost");
|
||||
}
|
||||
if (!quiet)
|
||||
{
|
||||
|
@ -196,7 +196,7 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception{message, ErrorCodes::NETWORK_ERROR};
|
||||
throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -375,7 +375,7 @@ try
|
||||
if (effective_user_id == 0)
|
||||
{
|
||||
message += " Run under 'sudo -u " + data_owner + "'.";
|
||||
throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -243,7 +243,6 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
const ColumnRawPtrs & columns,
|
||||
bool cat_features_are_strings) const
|
||||
{
|
||||
std::string error_msg = "Error occurred while applying CatBoost model: ";
|
||||
size_t column_size = columns.front()->size();
|
||||
|
||||
auto result = ColumnFloat64::create(column_size * tree_count);
|
||||
@ -265,7 +264,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL,
|
||||
"Error occurred while applying CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -288,7 +288,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL,
|
||||
"Error occurred while applying CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -304,7 +305,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL,
|
||||
"Error occurred while applying CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,13 +61,18 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
return;
|
||||
}
|
||||
|
||||
bool use_connection_pooling = params.getParsed<bool>("use_connection_pooling", true);
|
||||
|
||||
try
|
||||
{
|
||||
std::string connection_string = params.get("connection_string");
|
||||
|
||||
auto connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string),
|
||||
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
nanodbc::ConnectionHolderPtr connection;
|
||||
if (use_connection_pooling)
|
||||
connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
else
|
||||
connection = std::make_shared<nanodbc::ConnectionHolder>(validateODBCConnectionString(connection_string));
|
||||
|
||||
auto identifier = getIdentifierQuote(std::move(connection));
|
||||
|
||||
|
@ -102,7 +102,9 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
|
||||
std::string format = params.get("format", "RowBinary");
|
||||
std::string connection_string = params.get("connection_string");
|
||||
bool use_connection_pooling = params.getParsed<bool>("use_connection_pooling", true);
|
||||
LOG_TRACE(log, "Connection string: '{}'", connection_string);
|
||||
LOG_TRACE(log, "Use pooling: {}", use_connection_pooling);
|
||||
|
||||
UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
|
||||
if (params.has("max_block_size"))
|
||||
@ -134,7 +136,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
try
|
||||
{
|
||||
nanodbc::ConnectionHolderPtr connection_handler;
|
||||
if (getContext()->getSettingsRef().odbc_bridge_use_connection_pooling)
|
||||
if (use_connection_pooling)
|
||||
connection_handler = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
else
|
||||
|
@ -70,13 +70,19 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
|
||||
return;
|
||||
}
|
||||
|
||||
bool use_connection_pooling = params.getParsed<bool>("use_connection_pooling", true);
|
||||
|
||||
try
|
||||
{
|
||||
std::string connection_string = params.get("connection_string");
|
||||
|
||||
auto connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string),
|
||||
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
nanodbc::ConnectionHolderPtr connection;
|
||||
|
||||
if (use_connection_pooling)
|
||||
connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
else
|
||||
connection = std::make_shared<nanodbc::ConnectionHolder>(validateODBCConnectionString(connection_string));
|
||||
|
||||
bool result = isSchemaAllowed(std::move(connection));
|
||||
|
||||
|
@ -416,7 +416,7 @@ void Server::createServer(
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception{message, ErrorCodes::NETWORK_ERROR};
|
||||
throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -946,7 +946,7 @@ try
|
||||
if (effective_user_id == 0)
|
||||
{
|
||||
message += " Run under 'sudo -u " + data_owner + "'.";
|
||||
throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -334,6 +334,19 @@
|
||||
|
||||
<max_thread_pool_size>10000</max_thread_pool_size>
|
||||
|
||||
<!-- Configure other thread pools: -->
|
||||
<!--
|
||||
<background_buffer_flush_schedule_pool_size>16</background_buffer_flush_schedule_pool_size>
|
||||
<background_pool_size>16</background_pool_size>
|
||||
<background_merges_mutations_concurrency_ratio>2</background_merges_mutations_concurrency_ratio>
|
||||
<background_move_pool_size>8</background_move_pool_size>
|
||||
<background_fetches_pool_size>8</background_fetches_pool_size>
|
||||
<background_common_pool_size>8</background_common_pool_size>
|
||||
<background_schedule_pool_size>128</background_schedule_pool_size>
|
||||
<background_message_broker_schedule_pool_size>16</background_message_broker_schedule_pool_size>
|
||||
<background_distributed_schedule_pool_size>16</background_distributed_schedule_pool_size>
|
||||
-->
|
||||
|
||||
<!-- Number of workers to recycle connections in background (see also drain_timeout).
|
||||
If the pool is full, connection will be drained synchronously. -->
|
||||
<!-- <max_threads_for_connection_collector>10</max_threads_for_connection_collector> -->
|
||||
|
@ -171,6 +171,7 @@ enum class AccessType
|
||||
M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \
|
||||
M(SYSTEM_SYNC_DATABASE_REPLICA, "SYNC DATABASE REPLICA", DATABASE, SYSTEM) \
|
||||
M(SYSTEM_SYNC_TRANSACTION_LOG, "SYNC TRANSACTION LOG", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_SYNC_FILE_CACHE, "SYNC FILE CACHE", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
|
||||
M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \
|
||||
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
|
||||
|
@ -79,9 +79,7 @@ AuthenticationData::Digest AuthenticationData::Util::encodeSHA256(std::string_vi
|
||||
::DB::encodeSHA256(text, hash.data());
|
||||
return hash;
|
||||
#else
|
||||
throw DB::Exception(
|
||||
"SHA256 passwords support is disabled, because ClickHouse was built without SSL library",
|
||||
DB::ErrorCodes::SUPPORT_IS_DISABLED);
|
||||
throw DB::Exception(DB::ErrorCodes::SUPPORT_IS_DISABLED, "SHA256 passwords support is disabled, because ClickHouse was built without SSL library");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -484,13 +484,15 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
|
||||
return true;
|
||||
};
|
||||
|
||||
auto access_denied = [&](const String & error_msg, int error_code [[maybe_unused]])
|
||||
auto access_denied = [&]<typename... FmtArgs>(int error_code [[maybe_unused]],
|
||||
FormatStringHelper<String, FmtArgs...> fmt_string [[maybe_unused]],
|
||||
FmtArgs && ...fmt_args [[maybe_unused]])
|
||||
{
|
||||
if (trace_log)
|
||||
LOG_TRACE(trace_log, "Access denied: {}{}", (AccessRightsElement{flags, args...}.toStringWithoutOptions()),
|
||||
(grant_option ? " WITH GRANT OPTION" : ""));
|
||||
if constexpr (throw_if_denied)
|
||||
throw Exception(getUserName() + ": " + error_msg, error_code);
|
||||
throw Exception(error_code, std::move(fmt_string), getUserName(), std::forward<FmtArgs>(fmt_args)...);
|
||||
return false;
|
||||
};
|
||||
|
||||
@ -519,18 +521,16 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
|
||||
{
|
||||
if (grant_option && acs->isGranted(flags, args...))
|
||||
{
|
||||
return access_denied(
|
||||
"Not enough privileges. "
|
||||
return access_denied(ErrorCodes::ACCESS_DENIED,
|
||||
"{}: Not enough privileges. "
|
||||
"The required privileges have been granted, but without grant option. "
|
||||
"To execute this query it's necessary to have grant "
|
||||
+ AccessRightsElement{flags, args...}.toStringWithoutOptions() + " WITH GRANT OPTION",
|
||||
ErrorCodes::ACCESS_DENIED);
|
||||
"To execute this query it's necessary to have grant {} WITH GRANT OPTION",
|
||||
AccessRightsElement{flags, args...}.toStringWithoutOptions());
|
||||
}
|
||||
|
||||
return access_denied(
|
||||
"Not enough privileges. To execute this query it's necessary to have grant "
|
||||
+ AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : ""),
|
||||
ErrorCodes::ACCESS_DENIED);
|
||||
return access_denied(ErrorCodes::ACCESS_DENIED,
|
||||
"{}: Not enough privileges. To execute this query it's necessary to have grant {}",
|
||||
AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : ""));
|
||||
}
|
||||
|
||||
struct PrecalculatedFlags
|
||||
@ -557,32 +557,34 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
|
||||
if (params.readonly)
|
||||
{
|
||||
if constexpr (grant_option)
|
||||
return access_denied("Cannot change grants in readonly mode.", ErrorCodes::READONLY);
|
||||
return access_denied(ErrorCodes::READONLY, "{}: Cannot change grants in readonly mode.");
|
||||
if ((flags & precalc.not_readonly_flags) ||
|
||||
((params.readonly == 1) && (flags & precalc.not_readonly_1_flags)))
|
||||
{
|
||||
if (params.interface == ClientInfo::Interface::HTTP && params.http_method == ClientInfo::HTTPMethod::GET)
|
||||
{
|
||||
return access_denied(
|
||||
"Cannot execute query in readonly mode. "
|
||||
"For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries",
|
||||
ErrorCodes::READONLY);
|
||||
return access_denied(ErrorCodes::READONLY,
|
||||
"{}: Cannot execute query in readonly mode. "
|
||||
"For queries over HTTP, method GET implies readonly. "
|
||||
"You should use method POST for modifying queries");
|
||||
}
|
||||
else
|
||||
return access_denied("Cannot execute query in readonly mode", ErrorCodes::READONLY);
|
||||
return access_denied(ErrorCodes::READONLY, "{}: Cannot execute query in readonly mode");
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.allow_ddl && !grant_option)
|
||||
{
|
||||
if (flags & precalc.ddl_flags)
|
||||
return access_denied("Cannot execute query. DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
|
||||
return access_denied(ErrorCodes::QUERY_IS_PROHIBITED,
|
||||
"Cannot execute query. DDL queries are prohibited for the user {}");
|
||||
}
|
||||
|
||||
if (!params.allow_introspection && !grant_option)
|
||||
{
|
||||
if (flags & precalc.introspection_flags)
|
||||
return access_denied("Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
return access_denied(ErrorCodes::FUNCTION_NOT_ALLOWED, "{}: Introspection functions are disabled, "
|
||||
"because setting 'allow_introspection_functions' is set to 0");
|
||||
}
|
||||
|
||||
return access_granted();
|
||||
@ -679,11 +681,13 @@ void ContextAccess::checkGrantOption(const AccessRightsElements & elements) cons
|
||||
template <bool throw_if_denied, typename Container, typename GetNameFunction>
|
||||
bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const
|
||||
{
|
||||
auto show_error = [this](const String & msg, int error_code [[maybe_unused]])
|
||||
auto show_error = []<typename... FmtArgs>(int error_code [[maybe_unused]],
|
||||
FormatStringHelper<FmtArgs...> fmt_string [[maybe_unused]],
|
||||
FmtArgs && ...fmt_args [[maybe_unused]])
|
||||
{
|
||||
UNUSED(this);
|
||||
if constexpr (throw_if_denied)
|
||||
throw Exception(getUserName() + ": " + msg, error_code);
|
||||
throw Exception(error_code, std::move(fmt_string), std::forward<FmtArgs>(fmt_args)...);
|
||||
return false;
|
||||
};
|
||||
|
||||
if (is_full_access)
|
||||
@ -691,7 +695,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
|
||||
|
||||
if (user_was_dropped)
|
||||
{
|
||||
show_error("User has been dropped", ErrorCodes::UNKNOWN_USER);
|
||||
show_error(ErrorCodes::UNKNOWN_USER, "User has been dropped");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -716,14 +720,15 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
|
||||
role_name = "ID {" + toString(role_id) + "}";
|
||||
|
||||
if (info->enabled_roles.count(role_id))
|
||||
show_error("Not enough privileges. "
|
||||
"Role " + backQuote(*role_name) + " is granted, but without ADMIN option. "
|
||||
"To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.",
|
||||
ErrorCodes::ACCESS_DENIED);
|
||||
show_error(ErrorCodes::ACCESS_DENIED,
|
||||
"Not enough privileges. "
|
||||
"Role {} is granted, but without ADMIN option. "
|
||||
"To execute this query it's necessary to have the role {} granted with ADMIN option.",
|
||||
backQuote(*role_name), backQuoteIfNeed(*role_name));
|
||||
else
|
||||
show_error("Not enough privileges. "
|
||||
"To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.",
|
||||
ErrorCodes::ACCESS_DENIED);
|
||||
show_error(ErrorCodes::ACCESS_DENIED, "Not enough privileges. "
|
||||
"To execute this query it's necessary to have the role {} granted with ADMIN option.",
|
||||
backQuoteIfNeed(*role_name));
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -81,7 +81,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
{
|
||||
ret = krb5_cc_resolve(k5.ctx, cache_name.c_str(), &k5.out_cc);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in resolving cache{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in resolving cache: {}", fmtError(ret));
|
||||
LOG_TRACE(log,"Resolved cache");
|
||||
}
|
||||
else
|
||||
@ -89,7 +89,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Resolve the default cache and get its type and default principal (if it is initialized).
|
||||
ret = krb5_cc_default(k5.ctx, &defcache);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while getting default cache{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while getting default cache: {}", fmtError(ret));
|
||||
LOG_TRACE(log,"Resolved default cache");
|
||||
deftype = krb5_cc_get_type(k5.ctx, defcache);
|
||||
if (krb5_cc_get_principal(k5.ctx, defcache, &defcache_princ) != 0)
|
||||
@ -99,7 +99,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Use the specified principal name.
|
||||
ret = krb5_parse_name_flags(k5.ctx, principal.c_str(), 0, &k5.me);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error when parsing principal name {}", principal + fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error when parsing principal name ({}): {}", principal, fmtError(ret));
|
||||
|
||||
// Cache related commands
|
||||
if (k5.out_cc == nullptr && krb5_cc_support_switch(k5.ctx, deftype))
|
||||
@ -107,7 +107,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Use an existing cache for the client principal if we can.
|
||||
ret = krb5_cc_cache_match(k5.ctx, k5.me, &k5.out_cc);
|
||||
if (ret && ret != KRB5_CC_NOTFOUND)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while searching for cache for {}", principal + fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while searching for cache for ({}): {}", principal, fmtError(ret));
|
||||
if (0 == ret)
|
||||
{
|
||||
LOG_TRACE(log,"Using default cache: {}", krb5_cc_get_name(k5.ctx, k5.out_cc));
|
||||
@ -118,7 +118,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Create a new cache to avoid overwriting the initialized default cache.
|
||||
ret = krb5_cc_new_unique(k5.ctx, deftype, nullptr, &k5.out_cc);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while generating new cache{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while generating new cache: {}", fmtError(ret));
|
||||
LOG_TRACE(log,"Using default cache: {}", krb5_cc_get_name(k5.ctx, k5.out_cc));
|
||||
k5.switch_to_cache = 1;
|
||||
}
|
||||
@ -134,24 +134,24 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
|
||||
ret = krb5_unparse_name(k5.ctx, k5.me, &k5.name);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error when unparsing name{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error when unparsing name: {}", fmtError(ret));
|
||||
LOG_TRACE(log,"Using principal: {}", k5.name);
|
||||
|
||||
// Allocate a new initial credential options structure.
|
||||
ret = krb5_get_init_creds_opt_alloc(k5.ctx, &options);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in options allocation{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in options allocation: {}", fmtError(ret));
|
||||
|
||||
// Resolve keytab
|
||||
ret = krb5_kt_resolve(k5.ctx, keytab_file.c_str(), &keytab);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in resolving keytab {}{}", keytab_file, fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in resolving keytab ({}): {}", keytab_file, fmtError(ret));
|
||||
LOG_TRACE(log,"Using keytab: {}", keytab_file);
|
||||
|
||||
// Set an output credential cache in initial credential options.
|
||||
ret = krb5_get_init_creds_opt_set_out_ccache(k5.ctx, options, k5.out_cc);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in setting output credential cache{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in setting output credential cache: {}", fmtError(ret));
|
||||
|
||||
// Action: init or renew
|
||||
LOG_TRACE(log,"Trying to renew credentials");
|
||||
@ -165,7 +165,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Request KDC for an initial credentials using keytab.
|
||||
ret = krb5_get_init_creds_keytab(k5.ctx, &my_creds, k5.me, keytab, 0, nullptr, options);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in getting initial credentials{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error in getting initial credentials: {}", fmtError(ret));
|
||||
else
|
||||
LOG_TRACE(log,"Got initial credentials");
|
||||
}
|
||||
@ -175,7 +175,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Initialize a credential cache. Destroy any existing contents of cache and initialize it for the default principal.
|
||||
ret = krb5_cc_initialize(k5.ctx, k5.out_cc, k5.me);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error when initializing cache{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error when initializing cache: {}", fmtError(ret));
|
||||
LOG_TRACE(log,"Initialized cache");
|
||||
// Store credentials in a credential cache.
|
||||
ret = krb5_cc_store_cred(k5.ctx, k5.out_cc, &my_creds);
|
||||
@ -189,7 +189,7 @@ void KerberosInit::init(const String & keytab_file, const String & principal, co
|
||||
// Make a credential cache the primary cache for its collection.
|
||||
ret = krb5_cc_switch(k5.ctx, k5.out_cc);
|
||||
if (ret)
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while switching to new cache{}", fmtError(ret));
|
||||
throw Exception(ErrorCodes::KERBEROS_ERROR, "Error while switching to new cache: {}", fmtError(ret));
|
||||
}
|
||||
|
||||
LOG_TRACE(log,"Authenticated to Kerberos v5");
|
||||
|
@ -205,7 +205,7 @@ void LDAPClient::handleError(int result_code, String text)
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception(text, ErrorCodes::LDAP_ERROR);
|
||||
throw Exception::createDeprecated(text, ErrorCodes::LDAP_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
@ -569,7 +569,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
|
||||
message += matched_msg;
|
||||
}
|
||||
|
||||
throw Exception(message, ErrorCodes::LDAP_ERROR);
|
||||
throw Exception::createDeprecated(message, ErrorCodes::LDAP_ERROR);
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -266,7 +266,7 @@ bool SettingsConstraints::Checker::check(SettingChange & change, const Field & n
|
||||
if (!explain.empty())
|
||||
{
|
||||
if (reaction == THROW_ON_VIOLATION)
|
||||
throw Exception(explain, code);
|
||||
throw Exception::createDeprecated(explain, code);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ public:
|
||||
default:
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Map key type " + key_type->getName() + " is not is not supported by combinator " + getName());
|
||||
"Map key type {} is not is not supported by combinator {}", key_type->getName(), getName());
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -66,13 +66,13 @@ public:
|
||||
, kind(kind_)
|
||||
{
|
||||
if (!isNativeNumber(arguments[0]))
|
||||
throw Exception{getName() + ": first argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: first argument must be represented by integer", getName());
|
||||
|
||||
if (!isNativeNumber(arguments[1]))
|
||||
throw Exception{getName() + ": second argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: second argument must be represented by integer", getName());
|
||||
|
||||
if (!arguments[0]->equals(*arguments[1]))
|
||||
throw Exception{getName() + ": arguments must have the same type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{}: arguments must have the same type", getName());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
|
@ -88,9 +88,9 @@ createAggregateFunctionSequenceNode(const std::string & name, const DataTypes &
|
||||
name, toString(min_required_args + 1));
|
||||
|
||||
if (argument_types.size() > max_events_size + min_required_args)
|
||||
throw Exception(fmt::format(
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Aggregate function '{}' requires at most {} (timestamp, value_column, ...{} events) arguments.",
|
||||
name, max_events_size + min_required_args, max_events_size), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
name, max_events_size + min_required_args, max_events_size);
|
||||
|
||||
if (const auto * cond_arg = argument_types[2].get(); cond_arg && !isUInt8(cond_arg))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of aggregate function {}, "
|
||||
@ -100,9 +100,8 @@ createAggregateFunctionSequenceNode(const std::string & name, const DataTypes &
|
||||
{
|
||||
const auto * cond_arg = argument_types[i].get();
|
||||
if (!isUInt8(cond_arg))
|
||||
throw Exception(fmt::format(
|
||||
"Illegal type '{}' of {} argument of aggregate function '{}', must be UInt8", cond_arg->getName(), i + 1, name),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type '{}' of {} argument of aggregate function '{}', must be UInt8", cond_arg->getName(), i + 1, name);
|
||||
}
|
||||
|
||||
if (WhichDataType(argument_types[1].get()).idx != TypeIndex::String)
|
||||
|
@ -235,7 +235,7 @@ private:
|
||||
if (skip_degree_ == skip_degree)
|
||||
return;
|
||||
if (skip_degree_ > detail::MAX_SKIP_DEGREE)
|
||||
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
|
||||
throw DB::Exception(DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED, "skip_degree exceeds maximum value");
|
||||
skip_degree = skip_degree_;
|
||||
if (skip_degree == detail::MAX_SKIP_DEGREE)
|
||||
skip_mask = static_cast<UInt32>(-1);
|
||||
|
@ -1,14 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <Common/SettingsChanges.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/UnionNode.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -89,4 +90,134 @@ private:
|
||||
template <typename Derived>
|
||||
using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor<Derived, true /*const_visitor*/>;
|
||||
|
||||
/** Same as InDepthQueryTreeVisitor and additionally keeps track of current scope context.
|
||||
* This can be useful if your visitor has special logic that depends on current scope context.
|
||||
*/
|
||||
template <typename Derived, bool const_visitor = false>
|
||||
class InDepthQueryTreeVisitorWithContext
|
||||
{
|
||||
public:
|
||||
using VisitQueryTreeNodeType = std::conditional_t<const_visitor, const QueryTreeNodePtr, QueryTreeNodePtr>;
|
||||
|
||||
explicit InDepthQueryTreeVisitorWithContext(ContextPtr context)
|
||||
: current_context(std::move(context))
|
||||
{}
|
||||
|
||||
/// Return true if visitor should traverse tree top to bottom, false otherwise
|
||||
bool shouldTraverseTopToBottom() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return true if visitor should visit child, false otherwise
|
||||
bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
const ContextPtr & getContext() const
|
||||
{
|
||||
return current_context;
|
||||
}
|
||||
|
||||
const Settings & getSettings() const
|
||||
{
|
||||
return current_context->getSettingsRef();
|
||||
}
|
||||
|
||||
void visit(VisitQueryTreeNodeType & query_tree_node)
|
||||
{
|
||||
auto current_scope_context_ptr = current_context;
|
||||
SCOPE_EXIT(
|
||||
current_context = std::move(current_scope_context_ptr);
|
||||
);
|
||||
|
||||
if (auto * query_node = query_tree_node->template as<QueryNode>())
|
||||
current_context = query_node->getContext();
|
||||
else if (auto * union_node = query_tree_node->template as<UnionNode>())
|
||||
current_context = union_node->getContext();
|
||||
|
||||
bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom();
|
||||
if (!traverse_top_to_bottom)
|
||||
visitChildren(query_tree_node);
|
||||
|
||||
getDerived().visitImpl(query_tree_node);
|
||||
|
||||
if (traverse_top_to_bottom)
|
||||
visitChildren(query_tree_node);
|
||||
}
|
||||
private:
|
||||
Derived & getDerived()
|
||||
{
|
||||
return *static_cast<Derived *>(this);
|
||||
}
|
||||
|
||||
const Derived & getDerived() const
|
||||
{
|
||||
return *static_cast<Derived *>(this);
|
||||
}
|
||||
|
||||
void visitChildren(VisitQueryTreeNodeType & expression)
|
||||
{
|
||||
for (auto & child : expression->getChildren())
|
||||
{
|
||||
if (!child)
|
||||
continue;
|
||||
|
||||
bool need_visit_child = getDerived().needChildVisit(expression, child);
|
||||
|
||||
if (need_visit_child)
|
||||
visit(child);
|
||||
}
|
||||
}
|
||||
|
||||
ContextPtr current_context;
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
using ConstInDepthQueryTreeVisitorWithContext = InDepthQueryTreeVisitorWithContext<Derived, true /*const_visitor*/>;
|
||||
|
||||
/** Visitor that use another visitor to visit node only if condition for visiting node is true.
|
||||
* For example, your visitor need to visit only query tree nodes or union nodes.
|
||||
*
|
||||
* Condition interface:
|
||||
* struct Condition
|
||||
* {
|
||||
* bool operator()(VisitQueryTreeNodeType & node)
|
||||
* {
|
||||
* return shouldNestedVisitorVisitNode(node);
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
template <typename Visitor, typename Condition, bool const_visitor = false>
|
||||
class InDepthQueryTreeConditionalVisitor : public InDepthQueryTreeVisitor<InDepthQueryTreeConditionalVisitor<Visitor, Condition, const_visitor>, const_visitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitor<InDepthQueryTreeConditionalVisitor<Visitor, Condition, const_visitor>, const_visitor>;
|
||||
using VisitQueryTreeNodeType = typename Base::VisitQueryTreeNodeType;
|
||||
|
||||
explicit InDepthQueryTreeConditionalVisitor(Visitor & visitor_, Condition & condition_)
|
||||
: visitor(visitor_)
|
||||
, condition(condition_)
|
||||
{
|
||||
}
|
||||
|
||||
bool shouldTraverseTopToBottom() const
|
||||
{
|
||||
return visitor.shouldTraverseTopToBottom();
|
||||
}
|
||||
|
||||
void visitImpl(VisitQueryTreeNodeType & query_tree_node)
|
||||
{
|
||||
if (condition(query_tree_node))
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
Visitor & visitor;
|
||||
Condition & condition;
|
||||
};
|
||||
|
||||
template <typename Visitor, typename Condition>
|
||||
using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor<Visitor, Condition, true /*const_visitor*/>;
|
||||
|
||||
}
|
||||
|
@ -45,12 +45,11 @@ Field zeroField(const Field & value)
|
||||
* TODO: Support `groupBitAnd`, `groupBitOr`, `groupBitXor` functions.
|
||||
* TODO: Support rewrite `f((2 * n) * n)` into '2 * f(n * n)'.
|
||||
*/
|
||||
class AggregateFunctionsArithmericOperationsVisitor : public InDepthQueryTreeVisitor<AggregateFunctionsArithmericOperationsVisitor>
|
||||
class AggregateFunctionsArithmericOperationsVisitor : public InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionsArithmericOperationsVisitor(ContextPtr context_)
|
||||
: context(std::move(context_))
|
||||
{}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
/// Traverse tree bottom to top
|
||||
static bool shouldTraverseTopToBottom()
|
||||
@ -60,6 +59,9 @@ public:
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
|
||||
return;
|
||||
|
||||
auto * aggregate_function_node = node->as<FunctionNode>();
|
||||
if (!aggregate_function_node || !aggregate_function_node->isAggregateFunction())
|
||||
return;
|
||||
@ -175,7 +177,7 @@ private:
|
||||
|
||||
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
|
||||
{
|
||||
auto function = FunctionFactory::instance().get(function_name, context);
|
||||
auto function = FunctionFactory::instance().get(function_name, getContext());
|
||||
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
|
||||
}
|
||||
|
||||
@ -191,8 +193,6 @@ private:
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,17 +1,23 @@
|
||||
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
|
||||
|
||||
#include <Core/Field.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/likePatternToRegexp.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/UnionNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/likePatternToRegexp.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -19,37 +25,29 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class ConvertOrLikeChainVisitor : public InDepthQueryTreeVisitor<ConvertOrLikeChainVisitor>
|
||||
class ConvertOrLikeChainVisitor : public InDepthQueryTreeVisitorWithContext<ConvertOrLikeChainVisitor>
|
||||
{
|
||||
using FunctionNodes = std::vector<std::shared_ptr<FunctionNode>>;
|
||||
|
||||
const FunctionOverloadResolverPtr match_function_ref;
|
||||
const FunctionOverloadResolverPtr or_function_resolver;
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<ConvertOrLikeChainVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
explicit ConvertOrLikeChainVisitor(ContextPtr context)
|
||||
: InDepthQueryTreeVisitor<ConvertOrLikeChainVisitor>()
|
||||
, match_function_ref(FunctionFactory::instance().get("multiMatchAny", context))
|
||||
, or_function_resolver(FunctionFactory::instance().get("or", context))
|
||||
explicit ConvertOrLikeChainVisitor(FunctionOverloadResolverPtr or_function_resolver_,
|
||||
FunctionOverloadResolverPtr match_function_resolver_,
|
||||
ContextPtr context)
|
||||
: Base(std::move(context))
|
||||
, or_function_resolver(std::move(or_function_resolver_))
|
||||
, match_function_resolver(std::move(match_function_resolver_))
|
||||
{}
|
||||
|
||||
static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType &)
|
||||
bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType &)
|
||||
{
|
||||
ContextPtr context;
|
||||
if (auto * query = parent->as<QueryNode>())
|
||||
context = query->getContext();
|
||||
else if (auto * union_node = parent->as<UnionNode>())
|
||||
context = union_node->getContext();
|
||||
if (context)
|
||||
{
|
||||
const auto & settings = context->getSettingsRef();
|
||||
const auto & settings = getSettings();
|
||||
|
||||
return settings.optimize_or_like_chain
|
||||
&& settings.allow_hyperscan
|
||||
&& settings.max_hyperscan_regexp_length == 0
|
||||
&& settings.max_hyperscan_regexp_total_length == 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
@ -61,27 +59,28 @@ public:
|
||||
|
||||
QueryTreeNodePtrWithHashMap<Array> node_to_patterns;
|
||||
FunctionNodes match_functions;
|
||||
for (auto & arg : function_node->getArguments())
|
||||
{
|
||||
unique_elems.push_back(arg);
|
||||
|
||||
auto * arg_func = arg->as<FunctionNode>();
|
||||
if (!arg_func)
|
||||
for (auto & argument : function_node->getArguments())
|
||||
{
|
||||
unique_elems.push_back(argument);
|
||||
|
||||
auto * argument_function = argument->as<FunctionNode>();
|
||||
if (!argument_function)
|
||||
continue;
|
||||
|
||||
const bool is_like = arg_func->getFunctionName() == "like";
|
||||
const bool is_ilike = arg_func->getFunctionName() == "ilike";
|
||||
const bool is_like = argument_function->getFunctionName() == "like";
|
||||
const bool is_ilike = argument_function->getFunctionName() == "ilike";
|
||||
|
||||
/// Not {i}like -> bail out.
|
||||
if (!is_like && !is_ilike)
|
||||
continue;
|
||||
|
||||
const auto & like_arguments = arg_func->getArguments().getNodes();
|
||||
const auto & like_arguments = argument_function->getArguments().getNodes();
|
||||
if (like_arguments.size() != 2)
|
||||
continue;
|
||||
|
||||
auto identifier = like_arguments[0];
|
||||
auto * pattern = like_arguments[1]->as<ConstantNode>();
|
||||
const auto & like_first_argument = like_arguments[0];
|
||||
const auto * pattern = like_arguments[1]->as<ConstantNode>();
|
||||
if (!pattern || !isString(pattern->getResultType()))
|
||||
continue;
|
||||
|
||||
@ -91,17 +90,20 @@ public:
|
||||
regexp = "(?i)" + regexp;
|
||||
|
||||
unique_elems.pop_back();
|
||||
auto it = node_to_patterns.find(identifier);
|
||||
|
||||
auto it = node_to_patterns.find(like_first_argument);
|
||||
if (it == node_to_patterns.end())
|
||||
{
|
||||
it = node_to_patterns.insert({identifier, Array{}}).first;
|
||||
it = node_to_patterns.insert({like_first_argument, Array{}}).first;
|
||||
|
||||
/// The second argument will be added when all patterns are known.
|
||||
auto match_function = std::make_shared<FunctionNode>("multiMatchAny");
|
||||
match_function->getArguments().getNodes().push_back(identifier);
|
||||
|
||||
match_function->getArguments().getNodes().push_back(like_first_argument);
|
||||
match_functions.push_back(match_function);
|
||||
|
||||
unique_elems.push_back(std::move(match_function));
|
||||
}
|
||||
|
||||
it->second.push_back(regexp);
|
||||
}
|
||||
|
||||
@ -111,23 +113,29 @@ public:
|
||||
auto & arguments = match_function->getArguments().getNodes();
|
||||
auto & patterns = node_to_patterns.at(arguments[0]);
|
||||
arguments.push_back(std::make_shared<ConstantNode>(Field{std::move(patterns)}));
|
||||
match_function->resolveAsFunction(match_function_ref);
|
||||
match_function->resolveAsFunction(match_function_resolver);
|
||||
}
|
||||
|
||||
/// OR must have at least two arguments.
|
||||
if (unique_elems.size() == 1)
|
||||
unique_elems.push_back(std::make_shared<ConstantNode>(false));
|
||||
unique_elems.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));
|
||||
|
||||
function_node->getArguments().getNodes() = std::move(unique_elems);
|
||||
function_node->resolveAsFunction(or_function_resolver);
|
||||
}
|
||||
private:
|
||||
using FunctionNodes = std::vector<std::shared_ptr<FunctionNode>>;
|
||||
const FunctionOverloadResolverPtr or_function_resolver;
|
||||
const FunctionOverloadResolverPtr match_function_resolver;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void ConvertOrLikeChainPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
ConvertOrLikeChainVisitor visitor(context);
|
||||
auto or_function_resolver = FunctionFactory::instance().get("or", context);
|
||||
auto match_function_resolver = FunctionFactory::instance().get("multiMatchAny", context);
|
||||
ConvertOrLikeChainVisitor visitor(std::move(or_function_resolver), std::move(match_function_resolver), std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
|
@ -16,11 +16,17 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class CountDistinctVisitor : public InDepthQueryTreeVisitor<CountDistinctVisitor>
|
||||
class CountDistinctVisitor : public InDepthQueryTreeVisitorWithContext<CountDistinctVisitor>
|
||||
{
|
||||
public:
|
||||
static void visitImpl(QueryTreeNodePtr & node)
|
||||
using Base = InDepthQueryTreeVisitorWithContext<CountDistinctVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().count_distinct_optimization)
|
||||
return;
|
||||
|
||||
auto * query_node = node->as<QueryNode>();
|
||||
|
||||
/// Check that query has only SELECT clause
|
||||
@ -78,9 +84,9 @@ public:
|
||||
|
||||
}
|
||||
|
||||
void CountDistinctPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
|
||||
void CountDistinctPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
CountDistinctVisitor visitor;
|
||||
CountDistinctVisitor visitor(std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
|
@ -16,12 +16,11 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class CustomizeFunctionsVisitor : public InDepthQueryTreeVisitor<CustomizeFunctionsVisitor>
|
||||
class CustomizeFunctionsVisitor : public InDepthQueryTreeVisitorWithContext<CustomizeFunctionsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit CustomizeFunctionsVisitor(ContextPtr & context_)
|
||||
: context(context_)
|
||||
{}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<CustomizeFunctionsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node) const
|
||||
{
|
||||
@ -29,7 +28,7 @@ public:
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
const auto & settings = context->getSettingsRef();
|
||||
const auto & settings = getSettings();
|
||||
|
||||
/// After successful function replacement function name and function name lowercase must be recalculated
|
||||
auto function_name = function_node->getFunctionName();
|
||||
@ -154,19 +153,16 @@ public:
|
||||
|
||||
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
|
||||
{
|
||||
auto function = FunctionFactory::instance().get(function_name, context);
|
||||
auto function = FunctionFactory::instance().get(function_name, getContext());
|
||||
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
|
||||
}
|
||||
|
||||
private:
|
||||
ContextPtr & context;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void CustomizeFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
CustomizeFunctionsVisitor visitor(context);
|
||||
CustomizeFunctionsVisitor visitor(std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
|
@ -22,15 +22,17 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitor<FunctionToSubcolumnsVisitor>
|
||||
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit FunctionToSubcolumnsVisitor(ContextPtr & context_)
|
||||
: context(context_)
|
||||
{}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node) const
|
||||
{
|
||||
if (!getSettings().optimize_functions_to_subcolumns)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
@ -192,11 +194,9 @@ public:
|
||||
private:
|
||||
inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
|
||||
{
|
||||
auto function = FunctionFactory::instance().get(function_name, context);
|
||||
auto function = FunctionFactory::instance().get(function_name, getContext());
|
||||
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
|
||||
}
|
||||
|
||||
ContextPtr & context;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Transform functions to subcolumns.
|
||||
/** Transform functions to subcolumns. Enabled using setting optimize_functions_to_subcolumns.
|
||||
* It can help to reduce amount of read data.
|
||||
*
|
||||
* Example: SELECT tupleElement(column, subcolumn) FROM test_table;
|
||||
|
@ -26,16 +26,22 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
class FuseFunctionsVisitor : public InDepthQueryTreeVisitor<FuseFunctionsVisitor>
|
||||
class FuseFunctionsVisitor : public InDepthQueryTreeVisitorWithContext<FuseFunctionsVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<FuseFunctionsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
explicit FuseFunctionsVisitor(const std::unordered_set<String> names_to_collect_)
|
||||
: names_to_collect(names_to_collect_)
|
||||
explicit FuseFunctionsVisitor(const std::unordered_set<String> names_to_collect_, ContextPtr context)
|
||||
: Base(std::move(context))
|
||||
, names_to_collect(names_to_collect_)
|
||||
{}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_syntax_fuse_functions)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || !function_node->isAggregateFunction() || !names_to_collect.contains(function_node->getFunctionName()))
|
||||
return;
|
||||
@ -201,7 +207,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector<QueryTreeNodePtr *> & nodes
|
||||
|
||||
void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
FuseFunctionsVisitor visitor({"sum", "count", "avg"});
|
||||
FuseFunctionsVisitor visitor({"sum", "count", "avg"}, context);
|
||||
visitor.visit(query_tree_node);
|
||||
|
||||
for (auto & [argument, nodes] : visitor.argument_to_functions_mapping)
|
||||
@ -220,7 +226,7 @@ void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
|
||||
void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
FuseFunctionsVisitor visitor_quantile({"quantile"});
|
||||
FuseFunctionsVisitor visitor_quantile({"quantile"}, context);
|
||||
visitor_quantile.visit(query_tree_node);
|
||||
|
||||
for (auto & [argument, nodes_set] : visitor_quantile.argument_to_functions_mapping)
|
||||
|
@ -12,15 +12,22 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class IfChainToMultiIfPassVisitor : public InDepthQueryTreeVisitor<IfChainToMultiIfPassVisitor>
|
||||
class IfChainToMultiIfPassVisitor : public InDepthQueryTreeVisitorWithContext<IfChainToMultiIfPassVisitor>
|
||||
{
|
||||
public:
|
||||
explicit IfChainToMultiIfPassVisitor(FunctionOverloadResolverPtr multi_if_function_ptr_)
|
||||
: multi_if_function_ptr(std::move(multi_if_function_ptr_))
|
||||
using Base = InDepthQueryTreeVisitorWithContext<IfChainToMultiIfPassVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
explicit IfChainToMultiIfPassVisitor(FunctionOverloadResolverPtr multi_if_function_ptr_, ContextPtr context)
|
||||
: Base(std::move(context))
|
||||
, multi_if_function_ptr(std::move(multi_if_function_ptr_))
|
||||
{}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_if_chain_to_multiif)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || function_node->getFunctionName() != "if" || function_node->getArguments().getNodes().size() != 3)
|
||||
return;
|
||||
@ -68,7 +75,8 @@ private:
|
||||
|
||||
void IfChainToMultiIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
IfChainToMultiIfPassVisitor visitor(FunctionFactory::instance().get("multiIf", context));
|
||||
auto multi_if_function_ptr = FunctionFactory::instance().get("multiIf", context);
|
||||
IfChainToMultiIfPassVisitor visitor(std::move(multi_if_function_ptr), std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
|
@ -107,21 +107,24 @@ void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, Contex
|
||||
assert(isString(function_node.getResultType()));
|
||||
}
|
||||
|
||||
class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor<ConvertStringsToEnumVisitor>
|
||||
class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitorWithContext<ConvertStringsToEnumVisitor>
|
||||
{
|
||||
public:
|
||||
explicit ConvertStringsToEnumVisitor(ContextPtr context_)
|
||||
: context(std::move(context_))
|
||||
{
|
||||
}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<ConvertStringsToEnumVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_if_transform_strings_to_enum)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
const auto & context = getContext();
|
||||
|
||||
/// to preserve return type (String) of the current function_node, we wrap the newly
|
||||
/// generated function nodes into toString
|
||||
|
||||
@ -198,16 +201,13 @@ public:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context)
|
||||
{
|
||||
ConvertStringsToEnumVisitor visitor(context);
|
||||
ConvertStringsToEnumVisitor visitor(std::move(context));
|
||||
visitor.visit(query);
|
||||
}
|
||||
|
||||
|
@ -10,15 +10,22 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class MultiIfToIfVisitor : public InDepthQueryTreeVisitor<MultiIfToIfVisitor>
|
||||
class MultiIfToIfVisitor : public InDepthQueryTreeVisitorWithContext<MultiIfToIfVisitor>
|
||||
{
|
||||
public:
|
||||
explicit MultiIfToIfVisitor(FunctionOverloadResolverPtr if_function_ptr_)
|
||||
: if_function_ptr(if_function_ptr_)
|
||||
using Base = InDepthQueryTreeVisitorWithContext<MultiIfToIfVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
explicit MultiIfToIfVisitor(FunctionOverloadResolverPtr if_function_ptr_, ContextPtr context)
|
||||
: Base(std::move(context))
|
||||
, if_function_ptr(std::move(if_function_ptr_))
|
||||
{}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_multiif_to_if)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || function_node->getFunctionName() != "multiIf")
|
||||
return;
|
||||
@ -38,7 +45,8 @@ private:
|
||||
|
||||
void MultiIfToIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
MultiIfToIfVisitor visitor(FunctionFactory::instance().get("if", context));
|
||||
auto if_function_ptr = FunctionFactory::instance().get("if", context);
|
||||
MultiIfToIfVisitor visitor(std::move(if_function_ptr), std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
|
@ -14,12 +14,17 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class NormalizeCountVariantsVisitor : public InDepthQueryTreeVisitor<NormalizeCountVariantsVisitor>
|
||||
class NormalizeCountVariantsVisitor : public InDepthQueryTreeVisitorWithContext<NormalizeCountVariantsVisitor>
|
||||
{
|
||||
public:
|
||||
explicit NormalizeCountVariantsVisitor(ContextPtr context_) : context(std::move(context_)) {}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<NormalizeCountVariantsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_normalize_count_variants)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || !function_node->isAggregateFunction() || (function_node->getFunctionName() != "count" && function_node->getFunctionName() != "sum"))
|
||||
return;
|
||||
@ -42,15 +47,13 @@ public:
|
||||
else if (function_node->getFunctionName() == "sum" &&
|
||||
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
|
||||
first_argument_constant_literal.get<UInt64>() == 1 &&
|
||||
!context->getSettingsRef().aggregate_functions_null_for_empty)
|
||||
!getSettings().aggregate_functions_null_for_empty)
|
||||
{
|
||||
resolveAsCountAggregateFunction(*function_node);
|
||||
function_node->getArguments().getNodes().clear();
|
||||
}
|
||||
}
|
||||
private:
|
||||
ContextPtr context;
|
||||
|
||||
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
|
@ -1,26 +1,33 @@
|
||||
#include <Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class OptimizeGroupByFunctionKeysVisitor : public InDepthQueryTreeVisitor<OptimizeGroupByFunctionKeysVisitor>
|
||||
class OptimizeGroupByFunctionKeysVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeGroupByFunctionKeysVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<OptimizeGroupByFunctionKeysVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
static bool needChildVisit(QueryTreeNodePtr & /*parent*/, QueryTreeNodePtr & child)
|
||||
{
|
||||
return !child->as<FunctionNode>();
|
||||
}
|
||||
|
||||
static void visitImpl(QueryTreeNodePtr & node)
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_group_by_function_keys)
|
||||
return;
|
||||
|
||||
auto * query = node->as<QueryNode>();
|
||||
if (!query)
|
||||
return;
|
||||
@ -41,6 +48,11 @@ public:
|
||||
optimizeGroupingSet(group_by);
|
||||
}
|
||||
private:
|
||||
struct NodeWithInfo
|
||||
{
|
||||
QueryTreeNodePtr node;
|
||||
bool parents_are_only_deterministic = false;
|
||||
};
|
||||
|
||||
static bool canBeEliminated(QueryTreeNodePtr & node, const QueryTreeNodePtrWithHashSet & group_by_keys)
|
||||
{
|
||||
@ -48,16 +60,17 @@ private:
|
||||
if (!function || function->getArguments().getNodes().empty())
|
||||
return false;
|
||||
|
||||
QueryTreeNodes candidates;
|
||||
std::vector<NodeWithInfo> candidates;
|
||||
auto & function_arguments = function->getArguments().getNodes();
|
||||
bool is_deterministic = function->getFunction()->isDeterministicInScopeOfQuery();
|
||||
for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
|
||||
candidates.push_back(*it);
|
||||
candidates.push_back({ *it, is_deterministic });
|
||||
|
||||
// Using DFS we traverse function tree and try to find if it uses other keys as function arguments.
|
||||
// TODO: Also process CONSTANT here. We can simplify GROUP BY x, x + 1 to GROUP BY x.
|
||||
while (!candidates.empty())
|
||||
{
|
||||
auto candidate = candidates.back();
|
||||
auto [candidate, parents_are_only_deterministic] = candidates.back();
|
||||
candidates.pop_back();
|
||||
|
||||
bool found = group_by_keys.contains(candidate);
|
||||
@ -73,8 +86,9 @@ private:
|
||||
|
||||
if (!found)
|
||||
{
|
||||
bool is_deterministic_function = parents_are_only_deterministic && function->getFunction()->isDeterministicInScopeOfQuery();
|
||||
for (auto it = arguments.rbegin(); it != arguments.rend(); ++it)
|
||||
candidates.push_back(*it);
|
||||
candidates.push_back({ *it, is_deterministic_function });
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -82,6 +96,10 @@ private:
|
||||
if (!found)
|
||||
return false;
|
||||
break;
|
||||
case QueryTreeNodeType::CONSTANT:
|
||||
if (!parents_are_only_deterministic)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -105,9 +123,10 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
void OptimizeGroupByFunctionKeysPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
|
||||
void OptimizeGroupByFunctionKeysPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
OptimizeGroupByFunctionKeysVisitor().visit(query_tree_node);
|
||||
OptimizeGroupByFunctionKeysVisitor visitor(std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,11 +1,13 @@
|
||||
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/SortNode.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -13,9 +15,12 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
|
||||
class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitorWithContext<OptimizeRedundantFunctionsInOrderByVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<OptimizeRedundantFunctionsInOrderByVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
|
||||
{
|
||||
if (node->as<FunctionNode>())
|
||||
@ -25,6 +30,9 @@ public:
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_redundant_functions_in_order_by)
|
||||
return;
|
||||
|
||||
auto * query = node->as<QueryNode>();
|
||||
if (!query)
|
||||
return;
|
||||
@ -116,9 +124,10 @@ private:
|
||||
|
||||
}
|
||||
|
||||
void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
|
||||
void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node);
|
||||
OptimizeRedundantFunctionsInOrderByVisitor visitor(std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1943,7 +1943,7 @@ void QueryAnalyzer::validateTableExpressionModifiers(const QueryTreeNodePtr & ta
|
||||
|
||||
if (!table_node && !table_function_node && !query_node && !union_node)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Unexpected table expression. Expected table, table function, query or union node. Actual {}",
|
||||
"Unexpected table expression. Expected table, table function, query or union node. Table node: {}, scope node: {}",
|
||||
table_expression_node->formatASTForErrorMessage(),
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
@ -4366,12 +4366,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
{
|
||||
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
|
||||
{
|
||||
std::string error_message = fmt::format("Aggregate function with name '{}' does not exists. In scope {}",
|
||||
function_name,
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
AggregateFunctionFactory::instance().appendHintsMessage(error_message, function_name);
|
||||
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, error_message);
|
||||
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Aggregate function with name '{}' does not exists. In scope {}{}",
|
||||
function_name, scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(AggregateFunctionFactory::instance().getHints(function_name)));
|
||||
}
|
||||
|
||||
if (!function_lambda_arguments_indexes.empty())
|
||||
@ -5726,7 +5723,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
|
||||
case QueryTreeNodeType::IDENTIFIER:
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Identifiers in FROM section must be already resolved. In scope {}",
|
||||
"Identifiers in FROM section must be already resolved. Node {}, scope {}",
|
||||
join_tree_node->formatASTForErrorMessage(),
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
@ -20,15 +20,17 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class SumIfToCountIfVisitor : public InDepthQueryTreeVisitor<SumIfToCountIfVisitor>
|
||||
class SumIfToCountIfVisitor : public InDepthQueryTreeVisitorWithContext<SumIfToCountIfVisitor>
|
||||
{
|
||||
public:
|
||||
explicit SumIfToCountIfVisitor(ContextPtr & context_)
|
||||
: context(context_)
|
||||
{}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<SumIfToCountIfVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_rewrite_sum_if_to_count_if)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || !function_node->isAggregateFunction())
|
||||
return;
|
||||
@ -56,7 +58,7 @@ public:
|
||||
if (!isInt64OrUInt64FieldType(constant_value_literal.getType()))
|
||||
return;
|
||||
|
||||
if (constant_value_literal.get<UInt64>() != 1 || context->getSettingsRef().aggregate_functions_null_for_empty)
|
||||
if (constant_value_literal.get<UInt64>() != 1 || getSettings().aggregate_functions_null_for_empty)
|
||||
return;
|
||||
|
||||
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
|
||||
@ -122,7 +124,7 @@ public:
|
||||
auto & not_function_arguments = not_function->getArguments().getNodes();
|
||||
not_function_arguments.push_back(nested_if_function_arguments_nodes[0]);
|
||||
|
||||
not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns()));
|
||||
not_function->resolveAsFunction(FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns()));
|
||||
|
||||
function_node_arguments_nodes[0] = std::move(not_function);
|
||||
function_node_arguments_nodes.resize(1);
|
||||
@ -143,8 +145,6 @@ private:
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
|
||||
ContextPtr & context;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,11 +25,17 @@ bool isUniqFunction(const String & function_name)
|
||||
function_name == "uniqTheta";
|
||||
}
|
||||
|
||||
class UniqInjectiveFunctionsEliminationVisitor : public InDepthQueryTreeVisitor<UniqInjectiveFunctionsEliminationVisitor>
|
||||
class UniqInjectiveFunctionsEliminationVisitor : public InDepthQueryTreeVisitorWithContext<UniqInjectiveFunctionsEliminationVisitor>
|
||||
{
|
||||
public:
|
||||
static void visitImpl(QueryTreeNodePtr & node)
|
||||
using Base = InDepthQueryTreeVisitorWithContext<UniqInjectiveFunctionsEliminationVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_injective_functions_inside_uniq)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || !function_node->isAggregateFunction() || !isUniqFunction(function_node->getFunctionName()))
|
||||
return;
|
||||
@ -81,9 +87,9 @@ public:
|
||||
|
||||
}
|
||||
|
||||
void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
|
||||
void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
UniqInjectiveFunctionsEliminationVisitor visitor;
|
||||
UniqInjectiveFunctionsEliminationVisitor visitor(std::move(context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <Analyzer/QueryNode.h>
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
@ -17,7 +19,6 @@
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <fmt/core.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -36,7 +37,7 @@ QueryNode::QueryNode(ContextMutablePtr context_, SettingsChanges settings_change
|
||||
}
|
||||
|
||||
QueryNode::QueryNode(ContextMutablePtr context_)
|
||||
: QueryNode(context_, {} /*settings_changes*/)
|
||||
: QueryNode(std::move(context_), {} /*settings_changes*/)
|
||||
{}
|
||||
|
||||
void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
|
||||
@ -185,11 +186,8 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
|
||||
{
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "SETTINGS";
|
||||
for (const auto & change : settings_changes)
|
||||
{
|
||||
buffer << fmt::format(" {}={}", change.name, toString(change.value));
|
||||
}
|
||||
buffer << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <memory>
|
||||
#include <Analyzer/QueryTreePassManager.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -133,7 +134,6 @@ private:
|
||||
* TODO: Support setting optimize_aggregators_of_group_by_keys.
|
||||
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
|
||||
* TODO: Support setting optimize_monotonous_functions_in_order_by.
|
||||
* TODO: Support settings.optimize_or_like_chain.
|
||||
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
|
||||
*/
|
||||
|
||||
@ -210,52 +210,30 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
|
||||
|
||||
void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
{
|
||||
auto context = manager.getContext();
|
||||
const auto & settings = context->getSettingsRef();
|
||||
|
||||
manager.addPass(std::make_unique<QueryAnalysisPass>());
|
||||
|
||||
if (settings.optimize_functions_to_subcolumns)
|
||||
manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
|
||||
|
||||
if (settings.count_distinct_optimization)
|
||||
manager.addPass(std::make_unique<CountDistinctPass>());
|
||||
|
||||
if (settings.optimize_rewrite_sum_if_to_count_if)
|
||||
manager.addPass(std::make_unique<SumIfToCountIfPass>());
|
||||
|
||||
if (settings.optimize_normalize_count_variants)
|
||||
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());
|
||||
|
||||
manager.addPass(std::make_unique<CustomizeFunctionsPass>());
|
||||
|
||||
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
|
||||
manager.addPass(std::make_unique<AggregateFunctionsArithmericOperationsPass>());
|
||||
|
||||
if (settings.optimize_injective_functions_inside_uniq)
|
||||
manager.addPass(std::make_unique<UniqInjectiveFunctionsEliminationPass>());
|
||||
|
||||
if (settings.optimize_group_by_function_keys)
|
||||
manager.addPass(std::make_unique<OptimizeGroupByFunctionKeysPass>());
|
||||
|
||||
if (settings.optimize_multiif_to_if)
|
||||
manager.addPass(std::make_unique<MultiIfToIfPass>());
|
||||
|
||||
manager.addPass(std::make_unique<IfConstantConditionPass>());
|
||||
|
||||
if (settings.optimize_if_chain_to_multiif)
|
||||
manager.addPass(std::make_unique<IfChainToMultiIfPass>());
|
||||
|
||||
if (settings.optimize_redundant_functions_in_order_by)
|
||||
manager.addPass(std::make_unique<OptimizeRedundantFunctionsInOrderByPass>());
|
||||
|
||||
manager.addPass(std::make_unique<OrderByTupleEliminationPass>());
|
||||
manager.addPass(std::make_unique<OrderByLimitByDuplicateEliminationPass>());
|
||||
|
||||
if (settings.optimize_syntax_fuse_functions)
|
||||
manager.addPass(std::make_unique<FuseFunctionsPass>());
|
||||
|
||||
if (settings.optimize_if_transform_strings_to_enum)
|
||||
manager.addPass(std::make_unique<IfTransformStringsToEnumPass>());
|
||||
|
||||
manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
|
||||
|
@ -79,7 +79,7 @@ namespace
|
||||
request.SetMaxKeys(1);
|
||||
auto outcome = client.ListObjects(request);
|
||||
if (!outcome.IsSuccess())
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
return outcome.GetResult().GetContents();
|
||||
}
|
||||
|
||||
@ -233,7 +233,7 @@ void BackupWriterS3::removeFile(const String & file_name)
|
||||
request.SetKey(fs::path(s3_uri.key) / file_name);
|
||||
auto outcome = client->DeleteObject(request);
|
||||
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
|
||||
void BackupWriterS3::removeFiles(const Strings & file_names)
|
||||
@ -291,7 +291,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names)
|
||||
|
||||
auto outcome = client->DeleteObjects(request);
|
||||
if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType()))
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -271,6 +271,18 @@ size_t BackupImpl::getNumFiles() const
|
||||
return num_files;
|
||||
}
|
||||
|
||||
size_t BackupImpl::getNumProcessedFiles() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return num_processed_files;
|
||||
}
|
||||
|
||||
UInt64 BackupImpl::getProcessedFilesSize() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return processed_files_size;
|
||||
}
|
||||
|
||||
UInt64 BackupImpl::getUncompressedSize() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
@ -355,6 +367,7 @@ void BackupImpl::writeBackupMetadata()
|
||||
out->finalize();
|
||||
|
||||
increaseUncompressedSize(str.size());
|
||||
increaseProcessedSize(str.size());
|
||||
}
|
||||
|
||||
|
||||
@ -380,6 +393,7 @@ void BackupImpl::readBackupMetadata()
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
increaseUncompressedSize(str.size());
|
||||
increaseProcessedSize(str.size());
|
||||
Poco::XML::DOMParser dom_parser;
|
||||
Poco::AutoPtr<Poco::XML::Document> config = dom_parser.parseMemory(str.data(), str.size());
|
||||
const Poco::XML::Node * config_root = getRootNode(config);
|
||||
@ -598,6 +612,8 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
|
||||
if (open_mode != OpenMode::READ)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
|
||||
|
||||
increaseProcessedSize(size_and_checksum.first);
|
||||
|
||||
if (!size_and_checksum.first)
|
||||
{
|
||||
/// Entry's data is empty.
|
||||
@ -762,6 +778,11 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
|
||||
.base_checksum = 0,
|
||||
};
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
increaseProcessedSize(info);
|
||||
}
|
||||
|
||||
/// Empty file, nothing to backup
|
||||
if (info.size == 0 && deduplicate_files)
|
||||
{
|
||||
@ -972,6 +993,17 @@ void BackupImpl::increaseUncompressedSize(const FileInfo & info)
|
||||
increaseUncompressedSize(info.size - info.base_size);
|
||||
}
|
||||
|
||||
void BackupImpl::increaseProcessedSize(UInt64 file_size) const
|
||||
{
|
||||
processed_files_size += file_size;
|
||||
++num_processed_files;
|
||||
}
|
||||
|
||||
void BackupImpl::increaseProcessedSize(const FileInfo & info)
|
||||
{
|
||||
increaseProcessedSize(info.size);
|
||||
}
|
||||
|
||||
void BackupImpl::setCompressedSize()
|
||||
{
|
||||
if (use_archives)
|
||||
@ -1011,7 +1043,7 @@ std::shared_ptr<IArchiveWriter> BackupImpl::getArchiveWriter(const String & suff
|
||||
String archive_name_with_suffix = getArchiveNameWithSuffix(suffix);
|
||||
auto new_archive_writer = createArchiveWriter(archive_params.archive_name, writer->writeFile(archive_name_with_suffix));
|
||||
new_archive_writer->setPassword(archive_params.password);
|
||||
|
||||
new_archive_writer->setCompression(archive_params.compression_method, archive_params.compression_level);
|
||||
size_t pos = suffix.empty() ? 0 : 1;
|
||||
archive_writers[pos] = {suffix, new_archive_writer};
|
||||
|
||||
|
@ -59,6 +59,8 @@ public:
|
||||
time_t getTimestamp() const override { return timestamp; }
|
||||
UUID getUUID() const override { return *uuid; }
|
||||
size_t getNumFiles() const override;
|
||||
size_t getNumProcessedFiles() const override;
|
||||
UInt64 getProcessedFilesSize() const override;
|
||||
UInt64 getUncompressedSize() const override;
|
||||
UInt64 getCompressedSize() const override;
|
||||
Strings listFiles(const String & directory, bool recursive) const override;
|
||||
@ -101,10 +103,16 @@ private:
|
||||
std::shared_ptr<IArchiveReader> getArchiveReader(const String & suffix) const;
|
||||
std::shared_ptr<IArchiveWriter> getArchiveWriter(const String & suffix);
|
||||
|
||||
/// Increases `uncompressed_size` by a specific value and `num_files` by 1.
|
||||
/// Increases `uncompressed_size` by a specific value,
|
||||
/// also increases `num_files` by 1.
|
||||
void increaseUncompressedSize(UInt64 file_size);
|
||||
void increaseUncompressedSize(const FileInfo & info);
|
||||
|
||||
/// Increases `num_processed_files` by a specific value,
|
||||
/// also increases `num_processed_files` by 1.
|
||||
void increaseProcessedSize(UInt64 file_size) const;
|
||||
void increaseProcessedSize(const FileInfo & info);
|
||||
|
||||
/// Calculates and sets `compressed_size`.
|
||||
void setCompressedSize();
|
||||
|
||||
@ -121,6 +129,8 @@ private:
|
||||
std::optional<UUID> uuid;
|
||||
time_t timestamp = 0;
|
||||
size_t num_files = 0;
|
||||
mutable size_t num_processed_files = 0;
|
||||
mutable UInt64 processed_files_size = 0;
|
||||
UInt64 uncompressed_size = 0;
|
||||
UInt64 compressed_size = 0;
|
||||
int version;
|
||||
|
@ -93,7 +93,7 @@ namespace
|
||||
catch (...)
|
||||
{
|
||||
if (coordination)
|
||||
coordination->setError(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
|
||||
coordination->setError(current_host, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -338,16 +338,20 @@ void BackupsWorker::doBackup(
|
||||
}
|
||||
|
||||
size_t num_files = 0;
|
||||
size_t num_processed_files = 0;
|
||||
UInt64 uncompressed_size = 0;
|
||||
UInt64 compressed_size = 0;
|
||||
UInt64 processed_files_size = 0;
|
||||
|
||||
/// Finalize backup (write its metadata).
|
||||
if (!backup_settings.internal)
|
||||
{
|
||||
backup->finalizeWriting();
|
||||
num_files = backup->getNumFiles();
|
||||
num_processed_files = backup->getNumProcessedFiles();
|
||||
uncompressed_size = backup->getUncompressedSize();
|
||||
compressed_size = backup->getCompressedSize();
|
||||
processed_files_size = backup->getProcessedFilesSize();
|
||||
}
|
||||
|
||||
/// Close the backup.
|
||||
@ -355,7 +359,7 @@ void BackupsWorker::doBackup(
|
||||
|
||||
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
|
||||
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
|
||||
setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size);
|
||||
setNumFilesAndSize(backup_id, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -496,8 +500,6 @@ void BackupsWorker::doRestore(
|
||||
backup_open_params.password = restore_settings.password;
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
setNumFilesAndSize(restore_id, backup->getNumFiles(), backup->getUncompressedSize(), backup->getCompressedSize());
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
|
||||
/// Checks access rights if this is ON CLUSTER query.
|
||||
@ -578,6 +580,13 @@ void BackupsWorker::doRestore(
|
||||
|
||||
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
|
||||
setStatus(restore_id, BackupStatus::RESTORED);
|
||||
setNumFilesAndSize(
|
||||
restore_id,
|
||||
backup->getNumFiles(),
|
||||
backup->getNumProcessedFiles(),
|
||||
backup->getProcessedFilesSize(),
|
||||
backup->getUncompressedSize(),
|
||||
backup->getCompressedSize());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -658,7 +667,7 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw
|
||||
}
|
||||
|
||||
|
||||
void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size)
|
||||
void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, size_t num_processed_files, UInt64 processed_files_size, UInt64 uncompressed_size, UInt64 compressed_size)
|
||||
{
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto it = infos.find(id);
|
||||
@ -667,6 +676,8 @@ void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, UInt
|
||||
|
||||
auto & info = it->second;
|
||||
info.num_files = num_files;
|
||||
info.num_processed_files = num_processed_files;
|
||||
info.processed_files_size = processed_files_size;
|
||||
info.uncompressed_size = uncompressed_size;
|
||||
info.compressed_size = compressed_size;
|
||||
}
|
||||
|
@ -56,6 +56,14 @@ public:
|
||||
/// Number of files in the backup (including backup's metadata; only unique files are counted).
|
||||
size_t num_files = 0;
|
||||
|
||||
/// Number of processed files during backup or restore process
|
||||
/// For restore it includes files from base backups
|
||||
size_t num_processed_files = 0;
|
||||
|
||||
/// Size of processed files during backup or restore
|
||||
/// For restore in includes sizes from base backups
|
||||
UInt64 processed_files_size = 0;
|
||||
|
||||
/// Size of all files in the backup (including backup's metadata; only unique files are counted).
|
||||
UInt64 uncompressed_size = 0;
|
||||
|
||||
@ -102,7 +110,7 @@ private:
|
||||
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
|
||||
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
|
||||
void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
|
||||
void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size);
|
||||
void setNumFilesAndSize(const OperationID & id, size_t num_files, size_t num_processed_files, UInt64 processed_files_size, UInt64 uncompressed_size, UInt64 compressed_size);
|
||||
std::vector<Info> getAllActiveBackupInfos() const;
|
||||
std::vector<Info> getAllActiveRestoreInfos() const;
|
||||
bool hasConcurrentBackups(const BackupSettings & backup_settings) const;
|
||||
|
@ -40,6 +40,12 @@ public:
|
||||
/// Returns the number of unique files in the backup.
|
||||
virtual size_t getNumFiles() const = 0;
|
||||
|
||||
/// Returns the number of files were processed for backup or restore
|
||||
virtual size_t getNumProcessedFiles() const = 0;
|
||||
|
||||
// Returns the total size of processed files for backup or restore
|
||||
virtual UInt64 getProcessedFilesSize() const = 0;
|
||||
|
||||
/// Returns the total size of unique files in the backup.
|
||||
virtual UInt64 getUncompressedSize() const = 0;
|
||||
|
||||
|
@ -63,10 +63,12 @@ public:
|
||||
XDBCBridgeHelper(
|
||||
ContextPtr context_,
|
||||
Poco::Timespan http_timeout_,
|
||||
const std::string & connection_string_)
|
||||
const std::string & connection_string_,
|
||||
bool use_connection_pooling_)
|
||||
: IXDBCBridgeHelper(context_->getGlobalContext())
|
||||
, log(&Poco::Logger::get(BridgeHelperMixin::getName() + "BridgeHelper"))
|
||||
, connection_string(connection_string_)
|
||||
, use_connection_pooling(use_connection_pooling_)
|
||||
, http_timeout(http_timeout_)
|
||||
, config(context_->getGlobalContext()->getConfigRef())
|
||||
{
|
||||
@ -132,6 +134,7 @@ protected:
|
||||
uri.setHost(bridge_host);
|
||||
uri.setPort(bridge_port);
|
||||
uri.setScheme("http");
|
||||
uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling));
|
||||
return uri;
|
||||
}
|
||||
|
||||
@ -146,6 +149,7 @@ private:
|
||||
|
||||
Poco::Logger * log;
|
||||
std::string connection_string;
|
||||
bool use_connection_pooling;
|
||||
Poco::Timespan http_timeout;
|
||||
std::string bridge_host;
|
||||
size_t bridge_port;
|
||||
@ -189,6 +193,7 @@ protected:
|
||||
uri.setPath(SCHEMA_ALLOWED_HANDLER);
|
||||
uri.addQueryParameter("version", std::to_string(XDBC_BRIDGE_PROTOCOL_VERSION));
|
||||
uri.addQueryParameter("connection_string", getConnectionString());
|
||||
uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling));
|
||||
|
||||
ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, {}, ConnectionTimeouts::getHTTPTimeouts(getContext()), credentials);
|
||||
|
||||
@ -210,6 +215,7 @@ protected:
|
||||
uri.setPath(IDENTIFIER_QUOTE_HANDLER);
|
||||
uri.addQueryParameter("version", std::to_string(XDBC_BRIDGE_PROTOCOL_VERSION));
|
||||
uri.addQueryParameter("connection_string", getConnectionString());
|
||||
uri.addQueryParameter("use_connection_pooling", toString(use_connection_pooling));
|
||||
|
||||
ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, {}, ConnectionTimeouts::getHTTPTimeouts(getContext()), credentials);
|
||||
|
||||
|
@ -451,7 +451,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
catch (const Exception &)
|
||||
{
|
||||
/// Catch client errors like NO_ROW_DELIMITER
|
||||
throw LocalFormatError(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
throw LocalFormatError(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
}
|
||||
|
||||
/// Received data block is immediately displayed to the user.
|
||||
@ -629,7 +629,7 @@ try
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw LocalFormatError(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
throw LocalFormatError(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
}
|
||||
|
||||
|
||||
@ -1897,7 +1897,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
|
||||
{
|
||||
// Surprisingly, this is a client error. A server error would
|
||||
// have been reported without throwing (see onReceiveSeverException()).
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,7 @@ void LocalConnection::sendQuery(
|
||||
catch (...)
|
||||
{
|
||||
state->io.onException();
|
||||
state->exception = std::make_unique<Exception>("Unknown exception", ErrorCodes::UNKNOWN_EXCEPTION);
|
||||
state->exception = std::make_unique<Exception>(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception");
|
||||
}
|
||||
}
|
||||
|
||||
@ -291,7 +291,7 @@ bool LocalConnection::poll(size_t)
|
||||
catch (...)
|
||||
{
|
||||
state->io.onException();
|
||||
state->exception = std::make_unique<Exception>("Unknown exception", ErrorCodes::UNKNOWN_EXCEPTION);
|
||||
state->exception = std::make_unique<Exception>(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ template <is_decimal T>
|
||||
UInt64 ColumnDecimal<T>::get64([[maybe_unused]] size_t n) const
|
||||
{
|
||||
if constexpr (sizeof(T) > sizeof(UInt64))
|
||||
throw Exception(String("Method get64 is not supported for ") + getFamilyName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method get64 is not supported for {}", getFamilyName());
|
||||
else
|
||||
return static_cast<NativeT>(data[n]);
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
|
||||
nested_column = getNestedColumn().convertToFullColumnIfConst();
|
||||
|
||||
if (!getNestedColumn().canBeInsideNullable())
|
||||
throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
|
||||
|
||||
if (isColumnConst(*null_map))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnNullable cannot have constant null map");
|
||||
|
@ -331,46 +331,14 @@ size_t ColumnUnique<ColumnType>::getNullValueIndex() const
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueInsert(const Field & x)
|
||||
{
|
||||
class FieldVisitorGetData : public StaticVisitor<>
|
||||
{
|
||||
public:
|
||||
StringRef res;
|
||||
|
||||
[[noreturn]] static void throwUnsupported()
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unsupported field type");
|
||||
}
|
||||
|
||||
[[noreturn]] void operator() (const Null &) { throwUnsupported(); }
|
||||
[[noreturn]] void operator() (const Array &) { throwUnsupported(); }
|
||||
[[noreturn]] void operator() (const Tuple &) { throwUnsupported(); }
|
||||
[[noreturn]] void operator() (const Map &) { throwUnsupported(); }
|
||||
[[noreturn]] void operator() (const Object &) { throwUnsupported(); }
|
||||
[[noreturn]] void operator() (const AggregateFunctionStateData &) { throwUnsupported(); }
|
||||
void operator() (const String & x) { res = {x.data(), x.size()}; }
|
||||
void operator() (const UInt64 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const UInt128 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const UInt256 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const Int64 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const Int128 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const Int256 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const UUID & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const IPv4 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const IPv6 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const Float64 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const DecimalField<Decimal32> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const DecimalField<Decimal64> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const DecimalField<Decimal128> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const DecimalField<Decimal256> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
void operator() (const bool & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
|
||||
};
|
||||
|
||||
if (x.isNull())
|
||||
return getNullValueIndex();
|
||||
|
||||
FieldVisitorGetData visitor;
|
||||
applyVisitor(visitor, x);
|
||||
return uniqueInsertData(visitor.res.data, visitor.res.size);
|
||||
auto single_value_column = column_holder->cloneEmpty();
|
||||
single_value_column->insert(x);
|
||||
auto single_value_data = single_value_column->getDataAt(0);
|
||||
|
||||
return uniqueInsertData(single_value_data.data, single_value_data.size);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
|
50
src/Columns/tests/gtest_low_cardinality.cpp
Normal file
50
src/Columns/tests/gtest_low_cardinality.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
template <typename T>
|
||||
void testLowCardinalityNumberInsert(const DataTypePtr & data_type)
|
||||
{
|
||||
auto low_cardinality_type = std::make_shared<DataTypeLowCardinality>(data_type);
|
||||
auto column = low_cardinality_type->createColumn();
|
||||
|
||||
column->insert(static_cast<T>(15));
|
||||
column->insert(static_cast<T>(20));
|
||||
column->insert(static_cast<T>(25));
|
||||
|
||||
Field value;
|
||||
column->get(0, value);
|
||||
ASSERT_EQ(value.get<T>(), 15);
|
||||
|
||||
column->get(1, value);
|
||||
ASSERT_EQ(value.get<T>(), 20);
|
||||
|
||||
column->get(2, value);
|
||||
ASSERT_EQ(value.get<T>(), 25);
|
||||
}
|
||||
|
||||
TEST(ColumnLowCardinality, Insert)
|
||||
{
|
||||
testLowCardinalityNumberInsert<UInt8>(std::make_shared<DataTypeUInt8>());
|
||||
testLowCardinalityNumberInsert<UInt16>(std::make_shared<DataTypeUInt16>());
|
||||
testLowCardinalityNumberInsert<UInt32>(std::make_shared<DataTypeUInt32>());
|
||||
testLowCardinalityNumberInsert<UInt64>(std::make_shared<DataTypeUInt64>());
|
||||
testLowCardinalityNumberInsert<UInt128>(std::make_shared<DataTypeUInt128>());
|
||||
testLowCardinalityNumberInsert<UInt256>(std::make_shared<DataTypeUInt256>());
|
||||
|
||||
testLowCardinalityNumberInsert<Int8>(std::make_shared<DataTypeInt8>());
|
||||
testLowCardinalityNumberInsert<Int16>(std::make_shared<DataTypeInt16>());
|
||||
testLowCardinalityNumberInsert<Int32>(std::make_shared<DataTypeInt32>());
|
||||
testLowCardinalityNumberInsert<Int64>(std::make_shared<DataTypeInt64>());
|
||||
testLowCardinalityNumberInsert<Int128>(std::make_shared<DataTypeInt128>());
|
||||
testLowCardinalityNumberInsert<Int256>(std::make_shared<DataTypeInt256>());
|
||||
|
||||
testLowCardinalityNumberInsert<Float32>(std::make_shared<DataTypeFloat32>());
|
||||
testLowCardinalityNumberInsert<Float64>(std::make_shared<DataTypeFloat64>());
|
||||
}
|
@ -207,8 +207,9 @@ private:
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (alignment > mmap_min_alignment)
|
||||
throw DB::Exception(fmt::format("Too large alignment {}: more than page size when allocating {}.",
|
||||
ReadableSize(alignment), ReadableSize(size)), DB::ErrorCodes::BAD_ARGUMENTS);
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Too large alignment {}: more than page size when allocating {}.",
|
||||
ReadableSize(alignment), ReadableSize(size));
|
||||
|
||||
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
|
||||
mmap_flags, -1, 0);
|
||||
|
@ -140,7 +140,7 @@ void CancelToken::raise()
|
||||
{
|
||||
std::unique_lock lock(signal_mutex);
|
||||
if (exception_code != 0)
|
||||
throw DB::Exception(
|
||||
throw DB::Exception::createRuntime(
|
||||
std::exchange(exception_code, 0),
|
||||
std::exchange(exception_message, {}));
|
||||
else
|
||||
|
@ -88,7 +88,7 @@ public:
|
||||
{
|
||||
/// A more understandable error message.
|
||||
if (e.code() == DB::ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == DB::ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
|
||||
throw DB::ParsingException("File " + path + " is empty. You must fill it manually with appropriate value.", e.code());
|
||||
throw DB::ParsingException(e.code(), "File {} is empty. You must fill it manually with appropriate value.", path);
|
||||
else
|
||||
throw;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user