Merge branch 'master' into archive-improvements-2

This commit is contained in:
Antonio Andelic 2023-08-30 07:43:25 +00:00
commit ddb58217d4
202 changed files with 2019 additions and 736 deletions

View File

@ -21,8 +21,7 @@ assignees: ''
**Enable crash reporting**
> If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`:
> Change "enabled" to true in "send_crash_reports" section in `config.xml`:
```
<send_crash_reports>
<!-- Changing <enabled> to true allows sending crash reports to -->

13
.gitmodules vendored
View File

@ -3,7 +3,7 @@
url = https://github.com/facebook/zstd
[submodule "contrib/lz4"]
path = contrib/lz4
url = https://github.com/lz4/lz4
url = https://github.com/ClickHouse/lz4
[submodule "contrib/librdkafka"]
path = contrib/librdkafka
url = https://github.com/ClickHouse/librdkafka
@ -13,7 +13,6 @@
[submodule "contrib/zlib-ng"]
path = contrib/zlib-ng
url = https://github.com/ClickHouse/zlib-ng
branch = clickhouse-2.0.x
[submodule "contrib/googletest"]
path = contrib/googletest
url = https://github.com/google/googletest
@ -47,7 +46,6 @@
[submodule "contrib/arrow"]
path = contrib/arrow
url = https://github.com/ClickHouse/arrow
branch = blessed/release-6.0.1
[submodule "contrib/thrift"]
path = contrib/thrift
url = https://github.com/apache/thrift
@ -93,7 +91,6 @@
[submodule "contrib/grpc"]
path = contrib/grpc
url = https://github.com/ClickHouse/grpc
branch = v1.33.2
[submodule "contrib/aws"]
path = contrib/aws
url = https://github.com/ClickHouse/aws-sdk-cpp
@ -140,11 +137,9 @@
[submodule "contrib/cassandra"]
path = contrib/cassandra
url = https://github.com/ClickHouse/cpp-driver
branch = clickhouse
[submodule "contrib/libuv"]
path = contrib/libuv
url = https://github.com/ClickHouse/libuv
branch = clickhouse
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt
@ -157,11 +152,9 @@
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/ClickHouse/cyrus-sasl
branch = cyrus-sasl-2.1
[submodule "contrib/croaring"]
path = contrib/croaring
url = https://github.com/RoaringBitmap/CRoaring
branch = v0.2.66
[submodule "contrib/miniselect"]
path = contrib/miniselect
url = https://github.com/danlark1/miniselect
@ -174,7 +167,6 @@
[submodule "contrib/abseil-cpp"]
path = contrib/abseil-cpp
url = https://github.com/abseil/abseil-cpp
branch = lts_2021_11_02
[submodule "contrib/dragonbox"]
path = contrib/dragonbox
url = https://github.com/ClickHouse/dragonbox
@ -187,7 +179,6 @@
[submodule "contrib/boringssl"]
path = contrib/boringssl
url = https://github.com/ClickHouse/boringssl
branch = unknown_branch_from_artur
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse/NuRaft
@ -248,7 +239,6 @@
[submodule "contrib/annoy"]
path = contrib/annoy
url = https://github.com/ClickHouse/annoy
branch = ClickHouse-master
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl
@ -282,7 +272,6 @@
[submodule "contrib/openssl"]
path = contrib/openssl
url = https://github.com/openssl/openssl
branch = openssl-3.0
[submodule "contrib/google-benchmark"]
path = contrib/google-benchmark
url = https://github.com/google/benchmark

View File

@ -35,10 +35,6 @@ find_package(Threads REQUIRED)
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies
# which are ok with static libraries but not with dynamic ones
link_libraries(global-group)
if (NOT OS_ANDROID)
if (NOT USE_MUSL)
# Our compatibility layer doesn't build under Android, many errors in musl.
@ -47,6 +43,8 @@ if (NOT OS_ANDROID)
add_subdirectory(base/harmful)
endif ()
link_libraries(global-group)
target_link_libraries(global-group INTERFACE
-Wl,--start-group
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>

View File

@ -136,9 +136,7 @@ add_contrib (aws-cmake
)
add_contrib (base64-cmake base64)
if (NOT ARCH_S390X)
add_contrib (simdjson-cmake simdjson)
endif()
add_contrib (rapidjson-cmake rapidjson)
add_contrib (fastops-cmake fastops)
add_contrib (libuv-cmake libuv)

View File

@ -334,20 +334,36 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/api_vector.cc"
"${LIBRARY_DIR}/compute/cast.cc"
"${LIBRARY_DIR}/compute/exec.cc"
"${LIBRARY_DIR}/compute/exec/accumulation_queue.cc"
"${LIBRARY_DIR}/compute/exec/accumulation_queue.h"
"${LIBRARY_DIR}/compute/exec/aggregate.cc"
"${LIBRARY_DIR}/compute/exec/aggregate_node.cc"
"${LIBRARY_DIR}/compute/exec/asof_join_node.cc"
"${LIBRARY_DIR}/compute/exec/bloom_filter.cc"
"${LIBRARY_DIR}/compute/exec/exec_plan.cc"
"${LIBRARY_DIR}/compute/exec/expression.cc"
"${LIBRARY_DIR}/compute/exec/filter_node.cc"
"${LIBRARY_DIR}/compute/exec/project_node.cc"
"${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/map_node.cc"
"${LIBRARY_DIR}/compute/exec/options.cc"
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
"${LIBRARY_DIR}/compute/exec/project_node.cc"
"${LIBRARY_DIR}/compute/exec/query_context.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/swiss_join.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/compute/exec/tpch_node.cc"
"${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
@ -355,49 +371,43 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc"
"${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
"${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
"${LIBRARY_DIR}/compute/row/grouper.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/row_internal.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit a01ddc144c130777d7c6727a3fc5d5cdbae016d6
Subproject commit ae94606a70f1e298ce2a5718db858079185c4d9c

View File

@ -25,18 +25,16 @@ if (OS_LINUX)
)
endif ()
# headers-only
# headers-only
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR})
# asio
target_compile_definitions (_boost_headers_only INTERFACE
BOOST_ASIO_STANDALONE=1
# Avoid using of deprecated in c++ > 17 std::result_of
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 # Avoid using of deprecated in c++ > 17 std::result_of
BOOST_TIMER_ENABLE_DEPRECATED=1 # wordnet-blast (enabled via USE_NLP) uses Boost legacy timer classes
)
# iostreams

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d
Subproject commit 791d68fd89902835133c50435e380ec7a73271b7

2
contrib/lz4 vendored

@ -1 +1 @@
Subproject commit e82198428c8061372d5adef1f9bfff4203f6081e
Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40

View File

@ -13,6 +13,11 @@ add_library (ch_contrib::lz4 ALIAS _lz4)
target_compile_definitions (_lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
target_compile_definitions (_lz4 PUBLIC LZ4_FAST_DEC_LOOP=1)
if(ARCH_S390X)
target_compile_definitions(_lz4 PRIVATE LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT)
endif()
if (SANITIZE STREQUAL "undefined")
target_compile_options (_lz4 PRIVATE -fno-sanitize=undefined)
endif ()

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 19cc035b6c6f2283573d29c7ea7f7d675cf750ce
Subproject commit 245cb0291e0db99d9ccf3692fa76f440b2b054c2

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -32,7 +32,6 @@ set(RE2_SOURCES
${SRC_DIR}/re2/tostring.cc
${SRC_DIR}/re2/unicode_casefold.cc
${SRC_DIR}/re2/unicode_groups.cc
${SRC_DIR}/util/pcre.cc
${SRC_DIR}/util/rune.cc
${SRC_DIR}/util/strutil.cc
)

2
contrib/usearch vendored

@ -1 +1 @@
Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30
Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="23.7.4.5"
ARG VERSION="23.7.5.30"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.7.4.5"
ARG VERSION="23.7.5.30"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.7.4.5"
ARG VERSION="23.7.5.30"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -41,7 +41,7 @@ do
echo "Creating destination table ${table}_${hash}" >&2
echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue
echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 --receive_timeout=10 --send_timeout=10 $CONNECTION_PARAMETERS || continue
echo "Creating table system.${table}_sender" >&2

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.8.21.38-lts (70872e9859e) FIXME as compared to v22.8.20.11-lts (c9ca79e24e8)
#### Build/Testing/Packaging Improvement
* Backported in [#53017](https://github.com/ClickHouse/ClickHouse/issues/53017): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53459](https://github.com/ClickHouse/ClickHouse/issues/53459): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,17 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.11.5-lts (5762a23a76d) FIXME as compared to v23.3.10.5-lts (d8737007f9e)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,62 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.5.5.92-stable (557edaddace) FIXME as compared to v23.5.4.25-stable (190f962abcf)
#### Performance Improvement
* Backported in [#52749](https://github.com/ClickHouse/ClickHouse/issues/52749): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
#### Build/Testing/Packaging Improvement
* Backported in [#51886](https://github.com/ClickHouse/ClickHouse/issues/51886): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#52909](https://github.com/ClickHouse/ClickHouse/issues/52909): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53021](https://github.com/ClickHouse/ClickHouse/issues/53021): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53289](https://github.com/ClickHouse/ClickHouse/issues/53289): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53463](https://github.com/ClickHouse/ClickHouse/issues/53463): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)).
* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)).
* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,58 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.6.3.87-stable (36911c17d0f) FIXME as compared to v23.6.2.18-stable (89f39a7ccfe)
#### Performance Improvement
* Backported in [#52751](https://github.com/ClickHouse/ClickHouse/issues/52751): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
#### Build/Testing/Packaging Improvement
* Backported in [#52911](https://github.com/ClickHouse/ClickHouse/issues/52911): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53023](https://github.com/ClickHouse/ClickHouse/issues/53023): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53290](https://github.com/ClickHouse/ClickHouse/issues/53290): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53465](https://github.com/ClickHouse/ClickHouse/issues/53465): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)).
* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)).
* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,31 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.7.5.30-stable (e86c21fb922) FIXME as compared to v23.7.4.5-stable (bd2fcd44553)
#### Build/Testing/Packaging Improvement
* Backported in [#53291](https://github.com/ClickHouse/ClickHouse/issues/53291): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53467](https://github.com/ClickHouse/ClickHouse/issues/53467): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -1,23 +1,21 @@
# Laion-400M dataset
The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar.
The [Laion-400M dataset](https://laion.ai/blog/laion-400-open-dataset/) contains 400 million images with English image captions. Laion nowadays provides [an even larger dataset](https://laion.ai/blog/laion-5b/) but working with it will be similar.
The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md).
The dataset contains the image URL, embeddings for both the image and the image caption, a similarity score between the image and the image caption, as well as metadata, e.g. the image width/height, the licence and a NSFW flag. We can use the dataset to demonstrate [approximate nearest neighbor search](../../engines/table-engines/mergetree-family/annindexes.md) in ClickHouse.
## Prepare data
## Data preparation
Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data.
Download data and process it with simple `download.sh` script:
The embeddings and the metadata are stored in separate files in the raw data. A data preparation step downloads the data, merges the files,
converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:
```bash
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy
python3 process.py ${1}
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata
python3 process.py ${1} # merge files and convert to CSV
```
Where `process.py`:
Script `process.py` is defined as follows:
```python
import pandas as pd
@ -35,11 +33,11 @@ im_emb = np.load(npy_file)
text_emb = np.load(text_npy)
data = pd.read_parquet(metadata_file)
# combine them
# combine files
data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False)
# you can save more columns
data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]]
# columns to be imported into ClickHouse
data = data[['url', 'caption', 'NSFW', 'similarity', "image_embedding", "text_embedding"]]
# transform np.arrays to lists
data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x))
@ -48,30 +46,32 @@ data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x))
# this small hack is needed becase caption sometimes contains all kind of quotes
data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " "))
# save data to file
# export data as CSV file
data.to_csv(str_i + '.csv', header=False)
# previous files can be removed
# removed raw data files
os.system(f"rm {npy_file} {metadata_file} {text_npy}")
```
You can download data with
To start the data preparation pipeline, run:
```bash
seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}'
```
The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits.
The dataset is split into 410 files, each file contains ca. 1 million rows. If you like to work with a smaller subset of the data, simply adjust the limits, e.g. `seq 0 9 | ...`.
## Create table for laion
## Create table
Without indexes table can be created by
To create a table without indexes, run:
```sql
CREATE TABLE laion_dataset
CREATE TABLE laion
(
`id` Int64,
`url` String,
`caption` String,
`NSFW` String,
`similarity` Float32,
`image_embedding` Array(Float32),
`text_embedding` Array(Float32)
@ -81,23 +81,23 @@ ORDER BY id
SETTINGS index_granularity = 8192
```
Fill table with data:
To import the CSV files into ClickHouse:
```sql
INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv'
INSERT INTO laion FROM INFILE '{path_to_csv_files}/*.csv'
```
## Check data in table without indexes
## Run a brute-force ANN search (without ANN index)
Let's check the work of the following query on the part of the dataset (8 million records):
To run a brute-force approximate nearest neighbor search, run:
```sql
select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30
SELECT url, caption FROM laion WHERE similarity > 0.2 ORDER BY L2Distance(image_embedding, {target:Array(Float32)}) LIMIT 30
```
Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector.
The filter on `similarity` makes sure that the images correspond to the image captions in the query results. `target` is an array of 512 elements and a client parameter. A convenient way to obtain such arrays will be presented at the end of the article. For now, we can run the embedding of a random cat picture as `target`.
**The result**
**Result**
```
┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐
@ -114,32 +114,32 @@ Since the embeddings for images and texts may not match, let's also require a ce
8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.)
```
## Add indexes
## Run a ANN with an ANN index
Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md).
Either create a new table or use [ALTER TABLE ADD INDEX](../../sql-reference/statements/alter/skipping-index.md) to add an ANN index:
```sql
CREATE TABLE laion_dataset
CREATE TABLE laion
(
`id` Int64,
`url` String,
`caption` String,
`NSFW` String,
`similarity` Float32,
`image_embedding` Array(Float32),
`text_embedding` Array(Float32),
INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000,
INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000
INDEX annoy_image image_embedding TYPE annoy(1000),
INDEX annoy_text text_embedding TYPE annoy(1000)
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity = 8192
```
When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search.
Now let's check again with the same query:
By default, Annoy indexes use the L2 distance as metric. Further tuning knobs for index creation and search are described in the Annoy index [documentation](../../engines/table-engines/mergetree-family/annindexes.md). Let's check now again with the same query:
```sql
select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8
SELECT url, caption FROM test_indexes_laion WHERE similarity > 0.2 ORDER BY l2Distance(image_embedding, {target:Array(Float32)}) LIMIT 8
```
**Result**
@ -159,15 +159,18 @@ select url, caption from test_indexes_laion where similarity > 0.2 order by L2Di
8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.)
```
The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching.
The speed increased significantly at the cost of less accurate results. This is because the ANN index only provide approximate search results. Note the example searched for similar image embeddings, yet it is also possible to search for positive image caption embeddings.
## Scripts for embeddings
## Creating embeddings with UDFs
Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model.
One usually wants to create embeddings for new images or new image captions and search for similar image / image caption pairs in the data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) to create the `target` vector without leaving the client. It is important to use the same model to create the data and new embeddings for searches. The following scripts utilize the `ViT-B/32` model which also underlies the dataset.
### Text embeddings
First, store the following Python script in the `user_scripts/` directory of your ClickHouse data path and make it executable (`chmod +x encode_text.py`).
`encode_text.py`:
```python
#!/usr/bin/python3
import clip
@ -182,10 +185,12 @@ if __name__ == '__main__':
inputs = clip.tokenize(text)
with torch.no_grad():
text_features = model.encode_text(inputs)[0].tolist()
print(text_features)
sys.stdout.flush()
```
`encode_text_function.xml`:
Then create `encode_text_function.xml` in a location referenced by `<user_defined_executable_functions_config>/path/to/*_function.xml</user_defined_executable_functions_config>` in your ClickHouse server configuration file.
```xml
<functions>
<function>
@ -203,19 +208,19 @@ if __name__ == '__main__':
</functions>
```
Now we can simply use:
You can now simply use:
```sql
SELECT encode_text('cat');
```
The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries
The first run will be slow because it loads the model, but repeated runs will be fast. We can then copy the output to `SET param_target=...` and can easily write queries.
### Image embeddings
For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer)
Image embeddings can be created similarly but we will provide the Python script the path to a local image instead of the image caption text.
`encode_image.py`
`encode_picture.py`
```python
#!/usr/bin/python3
import clip
@ -231,29 +236,31 @@ if __name__ == '__main__':
image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device)
with torch.no_grad():
image_features = model.encode_image(image)[0].tolist()
print(image_features)
print(image_features)
sys.stdout.flush()
```
`encode_picture_function.xml`
`encode_image_function.xml`
```xml
<functions>
<function>
<type>executable_pool</type>
<name>encode_picture</name>
<name>encode_image</name>
<return_type>Array(Float32)</return_type>
<argument>
<type>String</type>
<name>path</name>
</argument>
<format>TabSeparated</format>
<command>encode_picture.py</command>
<command>encode_image.py</command>
<command_read_timeout>1000000</command_read_timeout>
</function>
</functions>
```
The query:
Then run this query:
```sql
SELECT encode_picture('some/path/to/your/picture');
SELECT encode_image('/path/to/your/image');
```

View File

@ -40,6 +40,32 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
- `IDENTIFIED BY 'qwerty'`
Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule.
```xml
<clickhouse>
<password_complexity>
<rule>
<pattern>.{12}</pattern>
<message>be at least 12 characters long</message>
</rule>
<rule>
<pattern>\p{N}</pattern>
<message>contain at least 1 numeric character</message>
</rule>
</password_complexity>
</clickhouse>
```
:::note
In ClickHouse Cloud, by default, passwords must meet the following complexity requirements:
- Be at least 12 characters long
- Contain at least 1 numeric character
- Contain at least 1 uppercase character
- Contain at least 1 lowercase character
- Contain at least 1 special character
:::
## Examples
1. The following username is `name1` and does not require a password - which obviously doesn't provide much security:

View File

@ -60,9 +60,9 @@ Specifics of each optional clause are covered in separate sections, which are li
If you want to include all columns in the result, use the asterisk (`*`) symbol. For example, `SELECT * FROM ...`.
### COLUMNS expression
### Dynamic column selection
To match some columns in the result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression, you can use the `COLUMNS` expression.
Dynamic column selection (also known as a COLUMNS expression) allows you to match some columns in a result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression.
``` sql
COLUMNS('regexp')

View File

@ -0,0 +1,103 @@
---
slug: /zh/engines/table-engines/mergetree-family/sharedmergetree
---
# SharedMergeTree {#sharedmergetree}
仅在ClickHouse Cloud以及第一方合作伙伴云服务中提供
SharedMergeTree表引擎系列是ReplicatedMergeTree引擎的云原生替代方案经过优化适用于共享对象存储例如Amazon S3、Google Cloud Storage、MinIO、Azure Blob Storage。每个特定的MergeTree引擎类型都有对应的SharedMergeTree引擎例如ReplacingSharedMergeTree替代ReplacingReplicatedMergeTree。
SharedMergeTree表引擎为ClickHouse Cloud的性能带来了显著提升。对于终端用户来说无需做任何改变即可开始使用SharedMergeTree引擎系列而不是基于ReplicatedMergeTree的引擎。它提供的好处包括
- 更高的插入吞吐量
- 后台合并的吞吐量提高
- Mutation操作的吞吐量提高
- 更快的扩容和缩容操作
- 用于选择查询的更轻量级强一致性
SharedMergeTree带来的一个重要改进是与ReplicatedMergeTree相比它提供了更彻底的计算和存储分离。下图展示了ReplicatedMergeTree如何分离计算和存储
![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-1.png)
正如您所见尽管存储在ReplicatedMergeTree中的数据位于对象存储中但元数据仍存储在每个clickhouse-server上。这意味着对于每个复制操作元数据也需要在所有副本上进行复制。
![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-2.png)
与ReplicatedMergeTree不同SharedMergeTree不需要副本之间进行通信。相反所有通信都通过共享存储和clickhouse-keeper进行。SharedMergeTree实现了异步无领导复制并使用clickhouse-keeper进行协调和元数据存储。这意味着随着服务的扩展不需要复制元数据。这可以加快复制、变更、合并和扩展操作。SharedMergeTree允许每个表有数百个副本使得无需分片即可进行动态扩展。这也意味着在ClickHouse Cloud中使用分布式查询执行方法可以利用更多的计算资源来执行查询。
## 系统监控
用于系统监控的ReplicatedMergeTree的大部分系统表system table在SharedMergeTree中也存在唯独没有`system.replication_queue`和`system.replicated_fetches`因为没有数据和元数据的复制。然而SharedMergeTree对这两个表有相应的替代表。
`system.virtual_parts`
这个表作为SharedMergeTree对 `system.replication_queue` 的替代存储关于最新的一组data parts以及未来正在进行的合并、变更和删除parts。
`system.shared_merge_tree_fetches`
这个表是SharedMergeTree对`system.replicated_fetches`的替代。它包含关于正在加载入内存的主键和校验码信息。
## 使用SharedMergeTree
SharedMergeTree已经是所有开发实例development tier中的默认表引擎并且可以通过提交支持工单在生产环境实例product tier中启用https://clickhouse.cloud/support。
对于支持SharedMergeTree表引擎的实例您不需要做任何额外变更。您可以像以前一样创建表它会自动使用基于SharedMergeTree的表引擎该引擎与您在CREATE TABLE查询中指定的引擎相对应。
通过使用 SharedMergeTree 表引擎可以创建 my_table 表。
```sql
CREATE TABLE my_table(
key UInt64,
value String
)
ENGINE = MergeTree
ORDER BY key
```
在ClickHouse Cloud中由于 `default_table_engine=MergeTree`,用户不必再特别设置`ENGINE=MergeTree`。下面的查询语句和上面的完全一样。
```sql
CREATE TABLE my_table(
key UInt64,
value String
)
ORDER BY key
```
如果您使用Replacing、Collapsing、Aggregating、Summing、VersionedCollapsing、Graphite MergeTree表它们将自动转换为相应的基于SharedMergeTree的表引擎。
```sql
CREATE TABLE myFirstReplacingMT
(
`key` Int64,
`someCol` String,
`eventTime` DateTime
)
ENGINE = ReplacingMergeTree
ORDER BY key;
```
您可以使用SHOW CREATE TABLE查看用于创建表的语句。
``` sql
SHOW CREATE TABLE myFirstReplacingMT;
```
```sql
CREATE TABLE default.myFirstReplacingMT
( `key` Int64, `someCol` String, `eventTime` DateTime )
ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
ORDER BY key
SETTINGS index_granularity = 8192
```
## 配置
一些配置的行为发生了显著的改变:
- `insert_quorum` -- 所有对SharedMergeTree的insert都是quorum insert写入共享对象存储因此在使用SharedMergeTree表引擎时不需要此设置。
- `insert_quorum_parallel` -- 所有对SharedMergeTree的insert都是quorum insert写入共享对象存储
- `select_sequential_consistency` -- 不需要quorum inserts会引起在SELECT查询中向clickhouse-keeper增加附加的请求。

Binary file not shown.

After

Width:  |  Height:  |  Size: 695 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 700 KiB

View File

@ -125,7 +125,7 @@ SELECT max2(-1, 2);
**语法**
```sql
max2(value1, value2)
min2(value1, value2)
```
**参数**

View File

@ -2,6 +2,7 @@
#include "Commands.h"
#include <Client/ReplxxLineReader.h>
#include <Client/ClientBase.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/EventNotifier.h>
#include <Common/filesystemHelpers.h>
#include <Common/ZooKeeper/ZooKeeper.h>
@ -155,6 +156,11 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
.argument("<seconds>")
.binding("operation-timeout"));
options.addOption(
Poco::Util::Option("config-file", "c", "if set, will try to get a connection string from clickhouse config. default `config.xml`")
.argument("<file>")
.binding("config-file"));
options.addOption(
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
.argument("<file>")
@ -211,7 +217,14 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
}
}
Poco::Logger::root().setLevel(config().getString("log-level", "error"));
String default_log_level;
if (config().has("query"))
/// We don't want to see any information log in query mode, unless it was set explicitly
default_log_level = "error";
else
default_log_level = "information";
Poco::Logger::root().setLevel(config().getString("log-level", default_log_level));
EventNotifier::init();
}
@ -311,9 +324,39 @@ int KeeperClient::main(const std::vector<String> & /* args */)
return 0;
}
auto host = config().getString("host", "localhost");
auto port = config().getString("port", "9181");
zk_args.hosts = {host + ":" + port};
DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml"));
/// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present.
config_processor.registerEmbeddedConfig("config.xml", "<clickhouse/>");
auto clickhouse_config = config_processor.loadConfig();
Poco::Util::AbstractConfiguration::Keys keys;
clickhouse_config.configuration->keys("zookeeper", keys);
if (!config().has("host") && !config().has("port") && !keys.empty())
{
LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection");
for (const auto & key : keys)
{
String prefix = "zookeeper." + key;
String host = clickhouse_config.configuration->getString(prefix + ".host");
String port = clickhouse_config.configuration->getString(prefix + ".port");
if (clickhouse_config.configuration->has(prefix + ".secure"))
host = "secure://" + host;
zk_args.hosts.push_back(host + ":" + port);
}
}
else
{
String host = config().getString("host", "localhost");
String port = config().getString("port", "9181");
zk_args.hosts.push_back(host + ":" + port);
}
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;

View File

@ -450,11 +450,11 @@ void checkForUsersNotInMainConfig(
/// Unused in other builds
#if defined(OS_LINUX)
static String readString(const String & path)
static String readLine(const String & path)
{
ReadBufferFromFile in(path);
String contents;
readStringUntilEOF(contents, in);
readStringUntilNewlineInto(contents, in);
return contents;
}
@ -479,9 +479,16 @@ static void sanityChecks(Server & server)
#if defined(OS_LINUX)
try
{
const std::unordered_set<std::string> fastClockSources = {
// ARM clock
"arch_sys_counter",
// KVM guest clock
"kvm-clock",
// X86 clock
"tsc",
};
const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource";
String clocksource = readString(filename);
if (clocksource.find("tsc") == std::string::npos && clocksource.find("kvm-clock") == std::string::npos)
if (!fastClockSources.contains(readLine(filename)))
server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename));
}
catch (...)
@ -501,7 +508,7 @@ static void sanityChecks(Server & server)
try
{
const char * filename = "/sys/kernel/mm/transparent_hugepage/enabled";
if (readString(filename).find("[always]") != std::string::npos)
if (readLine(filename).find("[always]") != std::string::npos)
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\". Check " + String(filename));
}
catch (...)

View File

@ -100,8 +100,8 @@ private:
struct Constraint
{
SettingConstraintWritability writability = SettingConstraintWritability::WRITABLE;
Field min_value;
Field max_value;
Field min_value{};
Field max_value{};
bool operator ==(const Constraint & other) const;
bool operator !=(const Constraint & other) const { return !(*this == other); }

View File

@ -169,6 +169,10 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName());
}
/// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function
/// then destroy states (on which src places points to).
virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, Arena * arena) const = 0;
/// Serializes state (to transmit it over the network, for example).
virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
@ -506,6 +510,15 @@ public:
static_cast<const Derived *>(this)->merge(places[i] + place_offset, rhs[i], arena);
}
void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, Arena * arena) const override
{
for (size_t i = 0; i < size; ++i)
{
static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena);
static_cast<const Derived *>(this)->destroy(rhs_places[i] + offset);
}
}
void addBatchSinglePlace( /// NOLINT
size_t row_begin,
size_t row_end,

View File

@ -10,6 +10,8 @@
#include <Parsers/ASTWithAlias.h>
#include <boost/functional/hash.hpp>
namespace DB
{

View File

@ -1471,8 +1471,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
sendDataFromPipe(
std::move(pipe),
parsed_query,
have_data_in_stdin
);
have_data_in_stdin);
}
catch (Exception & e)
{

View File

@ -886,7 +886,7 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
return sink;
});
executor = pipeline.execute();
executor->execute(/*num_threads = */ 1);
executor->execute(/*num_threads = */ 1, false);
auto read_rows = sink->getNumReadRows();
rows += read_rows;

View File

@ -559,6 +559,7 @@ FieldInfo ColumnObject::Subcolumn::getFieldInfo() const
.have_nulls = base_type->isNullable(),
.need_convert = false,
.num_dimensions = least_common_type.getNumberOfDimensions(),
.need_fold_dimension = false,
};
}

View File

@ -182,6 +182,7 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
init.max_threads,
/* max_free_threads = */ 0,
init.max_threads),
.ready_queue = {},
.max_threads = init.max_threads
});
}

View File

@ -69,11 +69,11 @@ struct FunctionDocumentation
using Related = std::string;
Description description; /// E.g. "Returns the position (in bytes, starting at 1) of a substring needle in a string haystack."
Syntax syntax; /// E.g. "position(haystack, needle)"
Arguments arguments; /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
ReturnedValue returned_value; /// E.g. "Starting position in bytes and counting from 1, if the substring was found."
Examples examples; ///
Categories categories; /// E.g. {"String Search"}
Syntax syntax = {}; /// E.g. "position(haystack, needle)"
Arguments arguments {}; /// E.g. ["haystack — String in which the search is performed. String.", "needle — Substring to be searched. String."]
ReturnedValue returned_value {};/// E.g. "Starting position in bytes and counting from 1, if the substring was found."
Examples examples {}; ///
Categories categories {}; /// E.g. {"String Search"}
std::string argumentsAsString() const;
std::string examplesAsString() const;

View File

@ -402,7 +402,7 @@ struct UInt128HashCRC32 : public UInt128Hash {};
struct UInt128TrivialHash
{
size_t operator()(UInt128 x) const { return x.items[0]; }
size_t operator()(UInt128 x) const { return x.items[UInt128::_impl::little(0)]; }
};
struct UUIDTrivialHash

View File

@ -34,8 +34,8 @@ public:
StorageID table_id = StorageID::createEmpty();
bool ignore_unknown = false;
bool expand_special_macros_only = false;
std::optional<String> shard;
std::optional<String> replica;
std::optional<String> shard = {};
std::optional<String> replica = {};
/// Information about macro expansion
size_t level = 0;

View File

@ -258,6 +258,8 @@ The server successfully detected this situation and will download merged part fr
M(RWLockReadersWaitMilliseconds, "Total time spent waiting for a read lock to be acquired (in a heavy RWLock).") \
M(RWLockWritersWaitMilliseconds, "Total time spent waiting for a write lock to be acquired (in a heavy RWLock).") \
M(DNSError, "Total count of errors in DNS resolution") \
M(PartsLockHoldMicroseconds, "Total time spent holding data parts lock in MergeTree tables") \
M(PartsLockWaitMicroseconds, "Total time spent waiting for data parts lock in MergeTree tables") \
\
M(RealTimeMicroseconds, "Total (wall clock) time spent in processing (queries and other tasks) threads (note that this is a sum).") \
M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user mode. This include time CPU pipeline was stalled due to main memory access, cache misses, branch mispredictions, hyper-threading, etc.") \

View File

@ -863,7 +863,9 @@ void TestKeeper::reconfig(
.callback = [callback](const Response & response)
{
callback(dynamic_cast<const ReconfigResponse &>(response));
}
},
.watch = nullptr,
.time = {}
});
}

View File

@ -49,7 +49,7 @@ struct AsyncLoaderTest
}
explicit AsyncLoaderTest(size_t max_threads = 1)
: AsyncLoaderTest({{.max_threads = max_threads}})
: AsyncLoaderTest({{.max_threads = max_threads, .priority = {}}})
{}
std::vector<AsyncLoader::PoolInitializer> getPoolInitializers(std::vector<Initializer> initializers)

View File

@ -149,6 +149,9 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
write_bool(coordination_settings->compress_snapshots_with_zstd_format);
writeText("configuration_change_tries_count=", buf);
write_int(coordination_settings->configuration_change_tries_count);
writeText("raft_limits_reconnect_limit=", buf);
write_int(static_cast<uint64_t>(coordination_settings->raft_limits_reconnect_limit));
}
KeeperConfigurationAndSettingsPtr

View File

@ -48,7 +48,8 @@ struct Settings;
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0)
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -480,6 +480,7 @@ void KeeperDispatcher::shutdown()
.session_id = session,
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
.request = std::move(request),
.digest = std::nullopt
};
close_requests.push_back(std::move(request_info));
@ -576,6 +577,7 @@ void KeeperDispatcher::sessionCleanerTask()
.session_id = dead_session,
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
.request = std::move(request),
.digest = std::nullopt
};
if (!requests_queue->push(std::move(request_info)))
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");

View File

@ -372,6 +372,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
state_manager->getLogStore()->setRaftServer(raft_instance);
nuraft::raft_server::limits raft_limits;
raft_limits.reconnect_limit_ = getValueOrMaxInt32AndLogWarning(coordination_settings->raft_limits_reconnect_limit, "raft_limits_reconnect_limit", log);
raft_instance->set_raft_limits(raft_limits);
raft_instance->start_server(init_options.skip_initial_election_timeout_);
nuraft::ptr<nuraft::raft_server> casted_raft_server = raft_instance;

View File

@ -2127,7 +2127,7 @@ void KeeperStorage::preprocessRequest(
}
std::vector<Delta> new_deltas;
TransactionInfo transaction{.zxid = new_last_zxid};
TransactionInfo transaction{.zxid = new_last_zxid, .nodes_digest = {}};
uint64_t new_digest = getNodesDigest(false).value;
SCOPE_EXIT({
if (keeper_context->digestEnabled())

View File

@ -5,7 +5,6 @@
#include <Storages/ConstraintsDescription.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <IO/ReadBufferFromIStream.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/LimitReadBuffer.h>
@ -18,7 +17,6 @@
#include <Core/ExternalTable.h>
#include <Poco/Net/MessageHeader.h>
#include <base/find_symbols.h>
#include <base/scope_guard.h>

View File

@ -16,7 +16,6 @@
#include <base/IPv4andIPv6.h>
#include <base/DayNum.h>
namespace DB
{
@ -285,6 +284,11 @@ decltype(auto) castToNearestFieldType(T && x)
return U(x);
}
template <typename T>
concept not_field_or_bool_or_stringlike
= (!std::is_same_v<std::decay_t<T>, Field> && !std::is_same_v<std::decay_t<T>, bool>
&& !std::is_same_v<NearestFieldType<std::decay_t<T>>, String>);
/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
*/
@ -347,13 +351,6 @@ public:
|| which == Types::Decimal256;
}
/// Templates to avoid ambiguity.
template <typename T, typename Z = void *>
using enable_if_not_field_or_bool_or_stringlike_t = std::enable_if_t<
!std::is_same_v<std::decay_t<T>, Field> &&
!std::is_same_v<std::decay_t<T>, bool> &&
!std::is_same_v<NearestFieldType<std::decay_t<T>>, String>, Z>;
Field() : Field(Null{}) {}
/** Despite the presence of a template constructor, this constructor is still needed,
@ -370,7 +367,8 @@ public:
}
template <typename T>
Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T> = nullptr); /// NOLINT
requires not_field_or_bool_or_stringlike<T>
Field(T && rhs); /// NOLINT
Field(bool rhs) : Field(castToNearestFieldType(rhs)) /// NOLINT
{
@ -425,7 +423,8 @@ public:
/// 1. float <--> int needs explicit cast
/// 2. customized types needs explicit cast
template <typename T>
enable_if_not_field_or_bool_or_stringlike_t<T, Field> & /// NOLINT
requires not_field_or_bool_or_stringlike<T>
Field & /// NOLINT
operator=(T && rhs);
Field & operator= (bool rhs)
@ -896,14 +895,16 @@ auto & Field::safeGet()
template <typename T>
Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T>)
requires not_field_or_bool_or_stringlike<T>
Field::Field(T && rhs)
{
auto && val = castToNearestFieldType(std::forward<T>(rhs));
createConcrete(std::forward<decltype(val)>(val));
}
template <typename T>
Field::enable_if_not_field_or_bool_or_stringlike_t<T, Field> & /// NOLINT
requires not_field_or_bool_or_stringlike<T>
Field & /// NOLINT
Field::operator=(T && rhs)
{
auto && val = castToNearestFieldType(std::forward<T>(rhs));
@ -1005,7 +1006,6 @@ void writeFieldText(const Field & x, WriteBuffer & buf);
String toString(const Field & x);
std::string_view fieldTypeToString(Field::Types::Which type);
}
template <>

View File

@ -47,6 +47,8 @@ public:
void tryUpdateConnection();
bool isConnected() const { return connection != nullptr && connection->is_open(); }
const ConnectionInfo & getConnectionInfo() { return connection_info; }
String getInfoForLog() const { return connection_info.host_port; }

View File

@ -28,10 +28,25 @@ public:
ConnectionHolder(const ConnectionHolder & other) = delete;
void setBroken() { is_broken = true; }
~ConnectionHolder()
{
if (auto_close)
{
connection.reset();
}
else if (is_broken)
{
try
{
connection->getRef().reset();
}
catch (...)
{
connection.reset();
}
}
pool->returnObject(std::move(connection));
}
@ -49,6 +64,7 @@ private:
PoolPtr pool;
ConnectionPtr connection;
bool auto_close;
bool is_broken = false;
};
using ConnectionHolderPtr = std::unique_ptr<ConnectionHolder>;

View File

@ -47,6 +47,7 @@ class IColumn;
M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \
M(UInt64, max_threads_for_indexes, 0, "The maximum number of threads process indices.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(Bool, use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \
M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <ranges>
namespace DB
{
@ -29,21 +30,13 @@ template <typename T>
void SerializationDecimalBase<T>::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const typename ColumnType::Container & x = typeid_cast<const ColumnType &>(column).getData();
size_t size = x.size();
if (limit == 0 || offset + limit > size)
if (const size_t size = x.size(); limit == 0 || offset + limit > size)
limit = size - offset;
if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
if constexpr (std::endian::native == std::endian::big)
{
for (size_t i = 0; i < limit; i++)
{
auto tmp(x[offset+i]);
char *start = reinterpret_cast<char*>(&tmp);
char *end = start + sizeof(FieldType);
std::reverse(start, end);
ostr.write(reinterpret_cast<const char *>(&tmp), sizeof(FieldType));
}
std::ranges::for_each(
x | std::views::drop(offset) | std::views::take(limit), [&ostr](const auto & d) { writeBinaryLittleEndian(d, ostr); });
}
else
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(FieldType) * limit);
@ -69,20 +62,14 @@ template <typename T>
void SerializationDecimalBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const
{
typename ColumnType::Container & x = typeid_cast<ColumnType &>(column).getData();
size_t initial_size = x.size();
const size_t initial_size = x.size();
x.resize(initial_size + limit);
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(FieldType) * limit);
if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
{
for (size_t i = 0; i < limit; i++)
{
char *start = reinterpret_cast<char*>(&x[initial_size + i]);
char *end = start + sizeof(FieldType);
std::reverse(start, end);
}
}
const size_t size = istr.readBig(reinterpret_cast<char *>(&x[initial_size]), sizeof(FieldType) * limit);
x.resize(initial_size + size / sizeof(FieldType));
if constexpr (std::endian::native == std::endian::big)
std::ranges::for_each(
x | std::views::drop(initial_size), [](auto & d) { transformEndianness<std::endian::big, std::endian::little>(d); });
}
template class SerializationDecimalBase<Decimal32>;

View File

@ -145,15 +145,8 @@ void SerializationNumber<T>::serializeBinaryBulk(const IColumn & column, WriteBu
if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
{
static constexpr auto to_little_endian = [](auto i)
{
transformEndianness<std::endian::little>(i);
return i;
};
std::ranges::for_each(
x | std::views::drop(offset) | std::views::take(limit) | std::views::transform(to_little_endian),
[&ostr](const auto & i) { ostr.write(reinterpret_cast<const char *>(&i), sizeof(typename ColumnVector<T>::ValueType)); });
x | std::views::drop(offset) | std::views::take(limit), [&ostr](const auto & i) { writeBinaryLittleEndian(i, ostr); });
}
else
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(typename ColumnVector<T>::ValueType) * limit);

View File

@ -7,6 +7,7 @@
#include <IO/WriteHelpers.h>
#include <Common/assert_cast.h>
#include <ranges>
namespace DB
{
@ -136,23 +137,37 @@ void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr, c
void SerializationUUID::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const typename ColumnVector<UUID>::Container & x = typeid_cast<const ColumnVector<UUID> &>(column).getData();
size_t size = x.size();
if (limit == 0 || offset + limit > size)
if (const size_t size = x.size(); limit == 0 || offset + limit > size)
limit = size - offset;
if (limit)
if (limit == 0)
return;
if constexpr (std::endian::native == std::endian::big)
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunreachable-code"
std::ranges::for_each(
x | std::views::drop(offset) | std::views::take(limit), [&ostr](const auto & uuid) { writeBinaryLittleEndian(uuid, ostr); });
#pragma clang diagnostic pop
}
else
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(UUID) * limit);
}
void SerializationUUID::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
{
typename ColumnVector<UUID>::Container & x = typeid_cast<ColumnVector<UUID> &>(column).getData();
size_t initial_size = x.size();
const size_t initial_size = x.size();
x.resize(initial_size + limit);
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(UUID) * limit);
const size_t size = istr.readBig(reinterpret_cast<char *>(&x[initial_size]), sizeof(UUID) * limit);
x.resize(initial_size + size / sizeof(UUID));
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunreachable-code"
if constexpr (std::endian::native == std::endian::big)
std::ranges::for_each(
x | std::views::drop(initial_size), [](auto & uuid) { transformEndianness<std::endian::big, std::endian::little>(uuid); });
#pragma clang diagnostic pop
}
}

View File

@ -35,6 +35,7 @@ namespace ErrorCodes
extern const int UNKNOWN_TABLE;
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_GET_CREATE_TABLE_QUERY;
}
DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
@ -221,10 +222,25 @@ ASTPtr DatabaseMaterializedPostgreSQL::getCreateTableQueryImpl(const String & ta
std::lock_guard lock(handler_mutex);
/// FIXME TSA
auto storage = std::make_shared<StorageMaterializedPostgreSQL>(StorageID(TSA_SUPPRESS_WARNING_FOR_READ(database_name), table_name), getContext(), remote_database_name, table_name);
auto ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name);
assert_cast<ASTCreateQuery *>(ast_storage.get())->uuid = UUIDHelpers::generateV4();
ASTPtr ast_storage;
try
{
auto storage = std::make_shared<StorageMaterializedPostgreSQL>(StorageID(TSA_SUPPRESS_WARNING_FOR_READ(database_name), table_name), getContext(), remote_database_name, table_name);
ast_storage = replication_handler->getCreateNestedTableQuery(storage.get(), table_name);
assert_cast<ASTCreateQuery *>(ast_storage.get())->uuid = UUIDHelpers::generateV4();
}
catch (...)
{
if (throw_on_error)
{
throw Exception(ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY,
"Received error while fetching table structure for table {} from PostgreSQL: {}",
backQuote(table_name), getCurrentExceptionMessage(true));
}
tryLogCurrentException(__PRETTY_FUNCTION__);
}
return ast_storage;
}

View File

@ -288,7 +288,8 @@ public:
: ISource(pipeline_.getHeader())
, pipeline(std::move(pipeline_))
, executor(pipeline)
{}
{
}
std::string getName() const override
{

View File

@ -228,7 +228,7 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
else
{
Field field = parseStringToField(value, attr.type);
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .original_value = value};
node->attributes[name_] = RegexTreeNode::AttributeValue{.field = std::move(field), .pieces = {}, .original_value = value};
}
}
}

View File

@ -165,6 +165,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
.file_segment_range = { file_segment_range.left, file_segment_right_bound },
.requested_range = {},
.cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE,
.file_segment_key = {},
.file_segment_size = file_segment_range.size(),
.read_from_cache_attempted = false,
.read_buffer_id = {},

View File

@ -62,8 +62,16 @@ IOUringReader::IOUringReader(uint32_t entries_)
struct io_uring_params params =
{
.sq_entries = 0, // filled by the kernel, initializing to silence warning
.cq_entries = 0, // filled by the kernel, initializing to silence warning
.flags = 0,
.sq_thread_cpu = 0, // Unused (IORING_SETUP_SQ_AFF isn't set). Silences warning
.sq_thread_idle = 0, // Unused (IORING_SETUP_SQPOL isn't set). Silences warning
.features = 0, // filled by the kernel, initializing to silence warning
.wq_fd = 0, // Unused (IORING_SETUP_ATTACH_WQ isn't set). Silences warning.
.resv = {0, 0, 0}, // "The resv array must be initialized to zero."
.sq_off = {}, // filled by the kernel, initializing to silence warning
.cq_off = {}, // filled by the kernel, initializing to silence warning
};
int ret = io_uring_queue_init_params(entries_, &ring, &params);

View File

@ -25,11 +25,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
@ -50,6 +45,8 @@ std::unique_ptr<S3::Client> getClient(
{
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
if (!uri.key.ends_with('/'))
uri.key.push_back('/');
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
config.getString(config_prefix + ".region", ""),
@ -61,9 +58,6 @@ std::unique_ptr<S3::Client> getClient(
settings.request_settings.put_request_throttler,
uri.uri.getScheme());
if (uri.key.back() != '/')
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);

View File

@ -104,12 +104,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
{
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
if (uri.key.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString());
if (uri.key.back() != '/')
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
if (!uri.key.ends_with('/'))
uri.key.push_back('/');
S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
std::shared_ptr<S3ObjectStorage> s3_storage;

View File

@ -20,8 +20,8 @@ void IndexOfBlockForNativeFormat::read(ReadBuffer & istr)
auto & column = columns.emplace_back();
readBinary(column.name, istr);
readBinary(column.type, istr);
readBinary(column.location.offset_in_compressed_file, istr);
readBinary(column.location.offset_in_decompressed_block, istr);
readBinaryLittleEndian(column.location.offset_in_compressed_file, istr);
readBinaryLittleEndian(column.location.offset_in_decompressed_block, istr);
}
}
@ -34,8 +34,8 @@ void IndexOfBlockForNativeFormat::write(WriteBuffer & ostr) const
const auto & column = columns[i];
writeBinary(column.name, ostr);
writeBinary(column.type, ostr);
writeBinary(column.location.offset_in_compressed_file, ostr);
writeBinary(column.location.offset_in_decompressed_block, ostr);
writeBinaryLittleEndian(column.location.offset_in_compressed_file, ostr);
writeBinaryLittleEndian(column.location.offset_in_decompressed_block, ostr);
}
}

View File

@ -44,6 +44,7 @@ inline std::string_view getURLHostRFC(const char * data, size_t size)
case '.':
case '-':
case '+':
case '[':
break;
case ' ': /// restricted symbols
case '\t':
@ -56,7 +57,6 @@ inline std::string_view getURLHostRFC(const char * data, size_t size)
case '\\':
case '^':
case '~':
case '[':
case ']':
case ';':
case '=':
@ -73,6 +73,13 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
pos = data;
}
bool has_open_bracket = false;
bool has_end_bracket = false;
if (*pos == '[') /// IPv6 [2001:db8::1]:80
{
has_open_bracket = true;
++pos;
}
Pos dot_pos = nullptr;
Pos colon_pos = nullptr;
bool has_sub_delims = false;
@ -84,10 +91,14 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
switch (*pos)
{
case '.':
if (has_open_bracket)
return std::string_view{};
if (has_at_symbol || colon_pos == nullptr)
dot_pos = pos;
break;
case ':':
if (has_open_bracket)
continue;
if (has_at_symbol || colon_pos) goto done;
colon_pos = pos;
break;
@ -116,6 +127,13 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
/// registered).
has_sub_delims = true;
continue;
case ']':
if (has_open_bracket)
{
has_end_bracket = true;
goto done;
}
[[fallthrough]];
case ' ': /// restricted symbols in whole URL
case '\t':
case '<':
@ -126,7 +144,6 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
case '\\':
case '^':
case '[':
case ']':
if (colon_pos == nullptr)
return std::string_view{};
else
@ -138,7 +155,11 @@ done:
if (has_sub_delims)
return std::string_view{};
if (!has_at_symbol)
{
if (has_open_bracket && has_end_bracket)
return std::string_view(start_of_host, pos - start_of_host);
pos = colon_pos ? colon_pos : pos;
}
return checkAndReturnHost(pos, dot_pos, start_of_host);
}

View File

@ -3,12 +3,10 @@
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnSet.h>
#include <Columns/ColumnLowCardinality.h>
#include <Interpreters/Set.h>
@ -70,12 +68,6 @@ public:
return 2;
}
/// Do not use default implementation for LowCardinality.
/// For now, Set may be const or non const column, depending on how it was created.
/// But we will return UInt8 for any case.
/// TODO: we could use special implementation later.
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeUInt8>();
@ -143,8 +135,6 @@ public:
else
columns_of_key_columns.emplace_back(left_arg);
/// Replace single LowCardinality column to it's dictionary if possible.
ColumnPtr lc_indexes = nullptr;
bool is_const = false;
if (columns_of_key_columns.size() == 1)
{
@ -155,20 +145,10 @@ public:
col = &const_col->getDataColumn();
is_const = true;
}
if (const auto * lc = typeid_cast<const ColumnLowCardinality *>(col))
{
lc_indexes = lc->getIndexesPtr();
arg.column = lc->getDictionary().getNestedColumn();
arg.type = removeLowCardinality(arg.type);
}
}
auto res = set->execute(columns_of_key_columns, negative);
if (lc_indexes)
res = res->index(*lc_indexes, 0);
if (is_const)
res = ColumnUInt8::create(input_rows_count, res->getUInt(0));

View File

@ -163,7 +163,16 @@ namespace
ColumnPtr default_non_const;
if (!cache.default_column && arguments.size() == 4)
{
default_non_const = castColumn(arguments[3], result_type);
if (in->size() > default_non_const->size())
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Fourth argument of function {} must be a constant or a column at least as big as the second and third arguments",
getName());
}
}
ColumnPtr in_casted = arguments[0].column;
if (arguments.size() == 3)
@ -490,7 +499,7 @@ namespace
else if (cache.default_column)
column_result.insertFrom(*cache.default_column, 0);
else if (default_non_const)
column_result.insertFrom(*default_non_const, 0);
column_result.insertFrom(*default_non_const, i);
else
column_result.insertFrom(in_casted, i);
}

View File

@ -64,7 +64,7 @@ public:
/// Optional. Useful when implementation needs to do ignore().
size_t offset = 0;
std::unique_ptr<Stopwatch> execution_watch;
std::unique_ptr<Stopwatch> execution_watch = {};
operator std::tuple<size_t &, size_t &>() { return {size, offset}; }
};

View File

@ -16,7 +16,7 @@ struct ObjectInfo
size_t size = 0;
time_t last_modification_time = 0;
std::map<String, String> metadata; /// Set only if getObjectInfo() is called with `with_metadata = true`.
std::map<String, String> metadata = {}; /// Set only if getObjectInfo() is called with `with_metadata = true`.
};
ObjectInfo getObjectInfo(

View File

@ -2191,8 +2191,8 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
/// Replace predicate result to constant 1.
Node node;
node.type = ActionType::COLUMN;
node.result_name = std::move(predicate->result_name);
node.result_type = std::move(predicate->result_type);
node.result_name = predicate->result_name;
node.result_type = predicate->result_type;
node.column = node.result_type->createColumnConst(0, 1);
if (predicate->type != ActionType::INPUT)

View File

@ -2479,48 +2479,21 @@ void NO_INLINE Aggregator::mergeDataNullKey(
}
}
template <typename Method, bool use_compiled_functions, bool prefetch, typename Table>
void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const
{
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
PaddedPODArray<AggregateDataPtr> dst_places;
PaddedPODArray<AggregateDataPtr> src_places;
auto merge = [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted)
{
if (!inserted)
{
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
{
const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
compiled_functions.merge_aggregate_states_function(dst, src);
if (compiled_aggregate_functions_holder->compiled_aggregate_functions.functions_count != params.aggregates_size)
{
for (size_t i = 0; i < params.aggregates_size; ++i)
{
if (!is_aggregate_function_compiled[i])
aggregate_functions[i]->merge(
dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena);
}
for (size_t i = 0; i < params.aggregates_size; ++i)
{
if (!is_aggregate_function_compiled[i])
aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]);
}
}
}
else
#endif
{
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]);
}
dst_places.push_back(dst);
src_places.push_back(src);
}
else
{
@ -2531,8 +2504,30 @@ void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, A
};
table_src.template mergeToViaEmplace<decltype(merge), prefetch>(table_dst, std::move(merge));
table_src.clearAndShrink();
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
{
const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
compiled_functions.merge_aggregate_states_function(dst_places.data(), src_places.data(), dst_places.size());
for (size_t i = 0; i < params.aggregates_size; ++i)
{
if (!is_aggregate_function_compiled[i])
aggregate_functions[i]->mergeAndDestroyBatch(
dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena);
}
return;
}
#endif
for (size_t i = 0; i < params.aggregates_size; ++i)
{
aggregate_functions[i]->mergeAndDestroyBatch(
dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena);
}
}

View File

@ -251,6 +251,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
return PushResult
{
.status = PushResult::TOO_MUCH_DATA,
.future = {},
.insert_data_buffer = std::make_unique<ConcatReadBuffer>(std::move(buffers)),
};
}
@ -318,6 +319,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
{
.status = PushResult::OK,
.future = std::move(insert_future),
.insert_data_buffer = nullptr,
};
}

View File

@ -895,6 +895,12 @@ FileSegments LockedKey::sync()
FileSegments broken;
for (auto it = key_metadata->begin(); it != key_metadata->end();)
{
if (it->second->evicting() || !it->second->releasable())
{
++it;
continue;
}
auto file_segment = it->second->file_segment;
if (file_segment->isDetached())
{

View File

@ -86,4 +86,10 @@ std::shared_ptr<ReadBuffer> WriteBufferToFileSegment::getReadBufferImpl()
return std::make_shared<ReadBufferFromFile>(file_segment->getPathInLocalCache());
}
WriteBufferToFileSegment::~WriteBufferToFileSegment()
{
/// To be sure that file exists before destructor of segment_holder is called
WriteBufferFromFileDecorator::finalize();
}
}

View File

@ -16,6 +16,7 @@ public:
explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder);
void nextImpl() override;
~WriteBufferToFileSegment() override;
private:

View File

@ -318,20 +318,11 @@ void executeQueryWithParallelReplicas(
}
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(all_replicas_count);
/// This is a little bit weird, but we construct an "empty" coordinator without
/// any specified reading/coordination method (like Default, InOrder, InReverseOrder)
/// Because we will understand it later during QueryPlan optimization
/// So we place a reference to the coordinator to some common plane like QueryInfo
/// to then tell it about the reading method we chose.
query_info.coordinator = coordinator;
auto external_tables = new_context->getExternalTables();
auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
query_ast,
new_cluster,
coordinator,
std::move(coordinator),
stream_factory.header,
stream_factory.processed_stage,
main_table,

View File

@ -44,10 +44,7 @@ public:
if (database)
{
for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next())
{
const auto & storage_id = table_it->table()->getStorageID();
result.emplace_back(storage_id.getTableName());
}
result.emplace_back(table_it->name());
}
return result;
}

View File

@ -184,7 +184,9 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
PullingAsyncPipelineExecutor executor(io.pipeline);
io.pipeline.setProgressCallback(data.getContext()->getProgressCallback());
while (block.rows() == 0 && executor.pull(block));
while (block.rows() == 0 && executor.pull(block))
{
}
if (block.rows() == 0)
{
@ -216,7 +218,8 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
Block tmp_block;
while (tmp_block.rows() == 0 && executor.pull(tmp_block))
;
{
}
if (tmp_block.rows() != 0)
throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row");

View File

@ -30,12 +30,12 @@ struct FilesystemCacheLogElement
std::pair<size_t, size_t> file_segment_range{};
std::pair<size_t, size_t> requested_range{};
CacheType cache_type{};
std::string file_segment_key;
size_t file_segment_offset;
size_t file_segment_size;
std::string file_segment_key{};
size_t file_segment_offset = 0;
size_t file_segment_size = 0;
bool read_from_cache_attempted;
String read_buffer_id;
std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters;
String read_buffer_id{};
std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters = nullptr;
static std::string name() { return "FilesystemCacheLog"; }

View File

@ -23,11 +23,9 @@ public:
{
const char * assert_no_aggregates = nullptr;
const char * assert_no_windows = nullptr;
// Explicit empty initializers are needed to make designated initializers
// work on GCC 10.
std::unordered_set<String> uniq_names {};
ASTs aggregates;
ASTs window_functions;
ASTs aggregates{};
ASTs window_functions{};
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)

View File

@ -208,7 +208,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
if (table->isStaticStorage())
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");
table->checkTableCanBeDropped();
table->checkTableCanBeDropped(context_);
TableExclusiveLockHolder table_excl_lock;
/// We don't need any lock for ReplicatedMergeTree and for simple MergeTree
@ -228,10 +228,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
{
/// If DROP DICTIONARY query is not used, check if Dictionary can be dropped with DROP TABLE query
if (!query.is_dictionary)
table->checkTableCanBeDropped();
table->checkTableCanBeDropped(context_);
}
else
table->checkTableCanBeDropped();
table->checkTableCanBeDropped(context_);
/// Check dependencies before shutting table down
bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies;

View File

@ -616,6 +616,7 @@ BlockIO InterpreterInsertQuery::execute()
presink_chains.at(0).appendChain(std::move(sink_chains.at(0)));
res.pipeline = QueryPipeline(std::move(presink_chains[0]));
res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
if (query.hasInlinedData() && !async_insert)
{

View File

@ -68,7 +68,6 @@
#include <Processors/Sources/NullSource.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/AggregatingTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/FilterTransform.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
@ -84,12 +83,9 @@
#include <Core/ProtocolDefines.h>
#include <Functions/IFunction.h>
#include <Interpreters/Aggregator.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/IJoin.h>
#include <QueryPipeline/SizeLimits.h>
#include <base/map.h>
#include <base/sort.h>
#include <base/types.h>
#include <Common/FieldVisitorToString.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <Common/checkStackSize.h>
@ -97,7 +93,6 @@
#include <Common/typeid_cast.h>
#include <Common/ProfileEvents.h>
#include "config_version.h"
namespace ProfileEvents
{
@ -2527,6 +2522,8 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
if (!query_plan.getMaxThreads() || is_remote)
query_plan.setMaxThreads(max_threads_execute_query);
query_plan.setConcurrencyControl(settings.use_concurrency_control);
/// Aliases in table declaration.
if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions)
{

View File

@ -357,27 +357,60 @@ static void compileMergeAggregatesStates(llvm::Module & module, const std::vecto
llvm::IRBuilder<> b(module.getContext());
auto * aggregate_data_place_type = b.getInt8Ty()->getPointerTo();
auto * merge_aggregates_states_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_place_type, aggregate_data_place_type }, false);
auto * merge_aggregates_states_func = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module);
auto * aggregate_data_places_type = aggregate_data_place_type->getPointerTo();
auto * size_type = b.getInt64Ty();
auto * merge_aggregates_states_func_declaration
= llvm::FunctionType::get(b.getVoidTy(), {aggregate_data_places_type, aggregate_data_places_type, size_type}, false);
auto * merge_aggregates_states_func
= llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module);
auto * arguments = merge_aggregates_states_func->args().begin();
llvm::Value * aggregate_data_place_dst_arg = arguments++;
llvm::Value * aggregate_data_place_src_arg = arguments++;
llvm::Value * aggregate_data_places_dst_arg = arguments++;
llvm::Value * aggregate_data_places_src_arg = arguments++;
llvm::Value * aggregate_places_size_arg = arguments++;
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", merge_aggregates_states_func);
b.SetInsertPoint(entry);
/// Initialize loop
auto * end = llvm::BasicBlock::Create(b.getContext(), "end", merge_aggregates_states_func);
auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", merge_aggregates_states_func);
b.CreateCondBr(b.CreateICmpEQ(aggregate_places_size_arg, llvm::ConstantInt::get(size_type, 0)), end, loop);
b.SetInsertPoint(loop);
/// Loop
auto * counter_phi = b.CreatePHI(size_type, 2);
counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry);
for (const auto & function_to_compile : functions)
{
auto * aggregate_data_place_dst = b.CreateLoad(aggregate_data_place_type,
b.CreateInBoundsGEP(aggregate_data_place_type->getPointerTo(), aggregate_data_places_dst_arg, counter_phi));
auto * aggregate_data_place_src = b.CreateLoad(aggregate_data_place_type,
b.CreateInBoundsGEP(aggregate_data_place_type->getPointerTo(), aggregate_data_places_src_arg, counter_phi));
size_t aggregate_function_offset = function_to_compile.aggregate_data_offset;
auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst_arg, aggregate_function_offset);
auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src_arg, aggregate_function_offset);
auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst, aggregate_function_offset);
auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src, aggregate_function_offset);
const auto * aggregate_function_ptr = function_to_compile.function;
aggregate_function_ptr->compileMerge(b, aggregate_data_place_merge_dst_with_offset, aggregate_data_place_merge_src_with_offset);
}
/// End of loop
auto * current_block = b.GetInsertBlock();
auto * incremeted_counter = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1));
counter_phi->addIncoming(incremeted_counter, current_block);
b.CreateCondBr(b.CreateICmpEQ(incremeted_counter, aggregate_places_size_arg), end, loop);
b.SetInsertPoint(end);
b.CreateRetVoid();
}

View File

@ -56,7 +56,7 @@ struct AggregateFunctionWithOffset
using JITCreateAggregateStatesFunction = void (*)(AggregateDataPtr);
using JITAddIntoAggregateStatesFunction = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr *);
using JITAddIntoAggregateStatesFunctionSinglePlace = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr);
using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr, AggregateDataPtr);
using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr *, AggregateDataPtr *, size_t);
using JITInsertAggregateStatesIntoColumnsFunction = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr *);
struct CompiledAggregateFunctions

View File

@ -20,9 +20,9 @@ struct QueryStatusInfo;
struct QueryResultDetails
{
String query_id;
std::optional<String> content_type;
std::optional<String> format;
std::optional<String> timezone;
std::optional<String> content_type = {};
std::optional<String> format = {};
std::optional<String> timezone = {};
};
using SetResultDetailsFunc = std::function<void(const QueryResultDetails &)>;

View File

@ -0,0 +1,26 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
/*
* Currently ignore the foreign key node, flesh it out when needed
*/
class ASTForeignKeyDeclaration : public IAST
{
public:
String name;
String getID(char) const override { return "Foreign Key"; }
ASTPtr clone() const override
{
auto res = std::make_shared<ASTForeignKeyDeclaration>();
res->name = name;
return res;
}
};
}

Some files were not shown because too many files have changed in this diff Show More