Merge branch 'master' into keeper-some-improvement2

This commit is contained in:
Antonio Andelic 2024-09-18 13:19:24 +02:00
commit 4f73c677ac
182 changed files with 4418 additions and 1846 deletions

6
.gitmodules vendored
View File

@ -170,9 +170,6 @@
[submodule "contrib/fast_float"]
path = contrib/fast_float
url = https://github.com/fastfloat/fast_float
[submodule "contrib/libpq"]
path = contrib/libpq
url = https://github.com/ClickHouse/libpq
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse/NuRaft
@ -369,3 +366,6 @@
[submodule "contrib/numactl"]
path = contrib/numactl
url = https://github.com/ClickHouse/numactl.git
[submodule "contrib/postgres"]
path = contrib/postgres
url = https://github.com/ClickHouse/postgres.git

View File

@ -40,17 +40,8 @@ Every month we get together with the community (users, contributors, customers,
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov:
Upcoming meetups
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
Other upcoming meetups
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
* [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1
@ -62,13 +53,20 @@ Other upcoming meetups
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
Recently completed events
Recently completed meetups
* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

View File

@ -188,8 +188,9 @@ namespace Crypto
pFile = fopen(keyFile.c_str(), "r");
if (pFile)
{
pem_password_cb * pCB = pass.empty() ? (pem_password_cb *)0 : &passCB;
void * pPassword = pass.empty() ? (void *)0 : (void *)pass.c_str();
pem_password_cb * pCB = &passCB;
static constexpr char * no_password = "";
void * pPassword = pass.empty() ? (void *)no_password : (void *)pass.c_str();
if (readFunc(pFile, &pKey, pCB, pPassword))
{
fclose(pFile);
@ -225,6 +226,13 @@ namespace Crypto
error:
if (pFile)
fclose(pFile);
if (*ppKey)
{
if constexpr (std::is_same_v<K, EVP_PKEY>)
EVP_PKEY_free(*ppKey);
else
EC_KEY_free(*ppKey);
}
throw OpenSSLException("EVPKey::loadKey(string)");
}
@ -286,6 +294,13 @@ namespace Crypto
error:
if (pBIO)
BIO_free(pBIO);
if (*ppKey)
{
if constexpr (std::is_same_v<K, EVP_PKEY>)
EVP_PKEY_free(*ppKey);
else
EC_KEY_free(*ppKey);
}
throw OpenSSLException("EVPKey::loadKey(stream)");
}

View File

@ -248,6 +248,9 @@ namespace Net
SSL_CTX * sslContext() const;
/// Returns the underlying OpenSSL SSL Context object.
SSL_CTX * takeSslContext();
/// Takes ownership of the underlying OpenSSL SSL Context object.
Usage usage() const;
/// Returns whether the context is for use by a client or by a server
/// and whether TLSv1 is required.
@ -401,6 +404,13 @@ namespace Net
return _pSSLContext;
}
inline SSL_CTX * Context::takeSslContext()
{
auto * result = _pSSLContext;
_pSSLContext = nullptr;
return result;
}
inline bool Context::extendedCertificateVerificationEnabled() const
{

View File

@ -106,6 +106,11 @@ Context::Context(
Context::~Context()
{
if (_pSSLContext == nullptr)
{
return;
}
try
{
SSL_CTX_free(_pSSLContext);

View File

@ -145,8 +145,13 @@ add_contrib (isa-l-cmake isa-l)
add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift, avro, arrow, libhdfs3
add_contrib (cppkafka-cmake cppkafka)
add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq)
option(ENABLE_LIBPQXX "Enable PostgreSQL" ${ENABLE_LIBRARIES})
if (ENABLE_LIBPQXX)
add_contrib (postgres-cmake postgres)
add_contrib (libpqxx-cmake libpqxx)
endif()
add_contrib (rocksdb-cmake rocksdb) # requires: jemalloc, snappy, zlib, lz4, zstd, liburing
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)

1
contrib/libpq vendored

@ -1 +0,0 @@
Subproject commit 2446f2c85650b56df9d4ebc4c2ea7f4b01beee57

View File

@ -1,78 +0,0 @@
if (NOT ENABLE_LIBPQXX)
return()
endif()
set(LIBPQ_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpq")
set(SRCS
"${LIBPQ_SOURCE_DIR}/fe-auth.c"
"${LIBPQ_SOURCE_DIR}/fe-auth-scram.c"
"${LIBPQ_SOURCE_DIR}/fe-connect.c"
"${LIBPQ_SOURCE_DIR}/fe-exec.c"
"${LIBPQ_SOURCE_DIR}/fe-lobj.c"
"${LIBPQ_SOURCE_DIR}/fe-misc.c"
"${LIBPQ_SOURCE_DIR}/fe-print.c"
"${LIBPQ_SOURCE_DIR}/fe-trace.c"
"${LIBPQ_SOURCE_DIR}/fe-protocol3.c"
"${LIBPQ_SOURCE_DIR}/fe-secure.c"
"${LIBPQ_SOURCE_DIR}/fe-secure-common.c"
"${LIBPQ_SOURCE_DIR}/fe-secure-openssl.c"
"${LIBPQ_SOURCE_DIR}/legacy-pqsignal.c"
"${LIBPQ_SOURCE_DIR}/libpq-events.c"
"${LIBPQ_SOURCE_DIR}/pqexpbuffer.c"
"${LIBPQ_SOURCE_DIR}/common/scram-common.c"
"${LIBPQ_SOURCE_DIR}/common/sha2.c"
"${LIBPQ_SOURCE_DIR}/common/sha1.c"
"${LIBPQ_SOURCE_DIR}/common/md5.c"
"${LIBPQ_SOURCE_DIR}/common/md5_common.c"
"${LIBPQ_SOURCE_DIR}/common/hmac_openssl.c"
"${LIBPQ_SOURCE_DIR}/common/cryptohash.c"
"${LIBPQ_SOURCE_DIR}/common/saslprep.c"
"${LIBPQ_SOURCE_DIR}/common/unicode_norm.c"
"${LIBPQ_SOURCE_DIR}/common/ip.c"
"${LIBPQ_SOURCE_DIR}/common/jsonapi.c"
"${LIBPQ_SOURCE_DIR}/common/wchar.c"
"${LIBPQ_SOURCE_DIR}/common/base64.c"
"${LIBPQ_SOURCE_DIR}/common/link-canary.c"
"${LIBPQ_SOURCE_DIR}/common/fe_memutils.c"
"${LIBPQ_SOURCE_DIR}/common/string.c"
"${LIBPQ_SOURCE_DIR}/common/pg_get_line.c"
"${LIBPQ_SOURCE_DIR}/common/stringinfo.c"
"${LIBPQ_SOURCE_DIR}/common/psprintf.c"
"${LIBPQ_SOURCE_DIR}/common/encnames.c"
"${LIBPQ_SOURCE_DIR}/common/logging.c"
"${LIBPQ_SOURCE_DIR}/port/snprintf.c"
"${LIBPQ_SOURCE_DIR}/port/strlcpy.c"
"${LIBPQ_SOURCE_DIR}/port/strerror.c"
"${LIBPQ_SOURCE_DIR}/port/inet_net_ntop.c"
"${LIBPQ_SOURCE_DIR}/port/getpeereid.c"
"${LIBPQ_SOURCE_DIR}/port/chklocale.c"
"${LIBPQ_SOURCE_DIR}/port/noblock.c"
"${LIBPQ_SOURCE_DIR}/port/pg_strong_random.c"
"${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c"
"${LIBPQ_SOURCE_DIR}/port/thread.c"
"${LIBPQ_SOURCE_DIR}/port/path.c"
)
add_library(_libpq ${SRCS})
add_definitions(-DHAVE_BIO_METH_NEW)
add_definitions(-DHAVE_HMAC_CTX_NEW)
add_definitions(-DHAVE_HMAC_CTX_FREE)
target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
# NOTE: this is a dirty hack to avoid and instead pg_config.h should be shipped
# for different OS'es like for jemalloc, not one generic for all OS'es like
# now.
if (OS_DARWIN OR OS_FREEBSD OR USE_MUSL)
target_compile_definitions(_libpq PRIVATE -DSTRERROR_R_INT=1)
endif()
target_link_libraries (_libpq PRIVATE OpenSSL::SSL)
add_library(ch_contrib::libpq ALIAS _libpq)

View File

@ -1,10 +1,3 @@
option(ENABLE_LIBPQXX "Enalbe libpqxx" ${ENABLE_LIBRARIES})
if (NOT ENABLE_LIBPQXX)
message(STATUS "Not using libpqxx")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx")
set (SRCS

1
contrib/postgres vendored Submodule

@ -0,0 +1 @@
Subproject commit cfd77000af28469fcb650485bad65a35e7649e41

View File

@ -0,0 +1,78 @@
# Build description for libpq which is part of the PostgreSQL sources
set(POSTGRES_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/postgres")
set(LIBPQ_SOURCE_DIR "${POSTGRES_SOURCE_DIR}/src/interfaces/libpq")
set(LIBPQ_CMAKE_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/postgres-cmake")
set(SRCS
"${LIBPQ_SOURCE_DIR}/fe-auth.c"
"${LIBPQ_SOURCE_DIR}/fe-auth-scram.c"
"${LIBPQ_SOURCE_DIR}/fe-connect.c"
"${LIBPQ_SOURCE_DIR}/fe-exec.c"
"${LIBPQ_SOURCE_DIR}/fe-lobj.c"
"${LIBPQ_SOURCE_DIR}/fe-misc.c"
"${LIBPQ_SOURCE_DIR}/fe-print.c"
"${LIBPQ_SOURCE_DIR}/fe-trace.c"
"${LIBPQ_SOURCE_DIR}/fe-protocol3.c"
"${LIBPQ_SOURCE_DIR}/fe-secure.c"
"${LIBPQ_SOURCE_DIR}/fe-secure-common.c"
"${LIBPQ_SOURCE_DIR}/fe-secure-openssl.c"
"${LIBPQ_SOURCE_DIR}/legacy-pqsignal.c"
"${LIBPQ_SOURCE_DIR}/libpq-events.c"
"${LIBPQ_SOURCE_DIR}/pqexpbuffer.c"
"${POSTGRES_SOURCE_DIR}/src/common/scram-common.c"
"${POSTGRES_SOURCE_DIR}/src/common/sha2.c"
"${POSTGRES_SOURCE_DIR}/src/common/sha1.c"
"${POSTGRES_SOURCE_DIR}/src/common/md5.c"
"${POSTGRES_SOURCE_DIR}/src/common/md5_common.c"
"${POSTGRES_SOURCE_DIR}/src/common/hmac_openssl.c"
"${POSTGRES_SOURCE_DIR}/src/common/cryptohash.c"
"${POSTGRES_SOURCE_DIR}/src/common/saslprep.c"
"${POSTGRES_SOURCE_DIR}/src/common/unicode_norm.c"
"${POSTGRES_SOURCE_DIR}/src/common/ip.c"
"${POSTGRES_SOURCE_DIR}/src/common/jsonapi.c"
"${POSTGRES_SOURCE_DIR}/src/common/wchar.c"
"${POSTGRES_SOURCE_DIR}/src/common/base64.c"
"${POSTGRES_SOURCE_DIR}/src/common/link-canary.c"
"${POSTGRES_SOURCE_DIR}/src/common/fe_memutils.c"
"${POSTGRES_SOURCE_DIR}/src/common/string.c"
"${POSTGRES_SOURCE_DIR}/src/common/pg_get_line.c"
"${POSTGRES_SOURCE_DIR}/src/common/stringinfo.c"
"${POSTGRES_SOURCE_DIR}/src/common/psprintf.c"
"${POSTGRES_SOURCE_DIR}/src/common/encnames.c"
"${POSTGRES_SOURCE_DIR}/src/common/logging.c"
"${POSTGRES_SOURCE_DIR}/src/port/snprintf.c"
"${POSTGRES_SOURCE_DIR}/src/port/strlcpy.c"
"${POSTGRES_SOURCE_DIR}/src/port/strerror.c"
"${POSTGRES_SOURCE_DIR}/src/port/inet_net_ntop.c"
"${POSTGRES_SOURCE_DIR}/src/port/getpeereid.c"
"${POSTGRES_SOURCE_DIR}/src/port/chklocale.c"
"${POSTGRES_SOURCE_DIR}/src/port/noblock.c"
"${POSTGRES_SOURCE_DIR}/src/port/pg_strong_random.c"
"${POSTGRES_SOURCE_DIR}/src/port/pgstrcasecmp.c"
"${POSTGRES_SOURCE_DIR}/src/port/thread.c"
"${POSTGRES_SOURCE_DIR}/src/port/path.c"
)
add_library(_libpq ${SRCS})
add_definitions(-DHAVE_BIO_METH_NEW)
add_definitions(-DHAVE_HMAC_CTX_NEW)
add_definitions(-DHAVE_HMAC_CTX_FREE)
target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
target_include_directories (_libpq SYSTEM PUBLIC "${POSTGRES_SOURCE_DIR}/src/include")
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_CMAKE_SOURCE_DIR}") # pre-generated headers
# NOTE: this is a dirty hack to avoid and instead pg_config.h should be shipped
# for different OS'es like for jemalloc, not one generic for all OS'es like
# now.
if (OS_DARWIN OR OS_FREEBSD OR USE_MUSL)
target_compile_definitions(_libpq PRIVATE -DSTRERROR_R_INT=1)
endif()
target_link_libraries (_libpq PRIVATE OpenSSL::SSL)
add_library(ch_contrib::libpq ALIAS _libpq)

View File

@ -0,0 +1,943 @@
/* src/include/pg_config.h. Generated from pg_config.h.in by configure. */
/* src/include/pg_config.h.in. Generated from configure.in by autoheader. */
/* Define if building universal (internal helper macro) */
/* #undef AC_APPLE_UNIVERSAL_BUILD */
/* The normal alignment of `double', in bytes. */
#define ALIGNOF_DOUBLE 4
/* The normal alignment of `int', in bytes. */
#define ALIGNOF_INT 4
/* The normal alignment of `long', in bytes. */
#define ALIGNOF_LONG 4
/* The normal alignment of `long long int', in bytes. */
#define ALIGNOF_LONG_LONG_INT 4
/* The normal alignment of `short', in bytes. */
#define ALIGNOF_SHORT 2
/* Size of a disk block --- this also limits the size of a tuple. You can set
it bigger if you need bigger tuples (although TOAST should reduce the need
to have large tuples, since fields can be spread across multiple tuples).
BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ is
currently 2^15 (32768). This is determined by the 15-bit widths of the
lp_off and lp_len fields in ItemIdData (see include/storage/itemid.h).
Changing BLCKSZ requires an initdb. */
#define BLCKSZ 8192
/* Define to the default TCP port number on which the server listens and to
which clients will try to connect. This can be overridden at run-time, but
it's convenient if your clients have the right default compiled in.
(--with-pgport=PORTNUM) */
#define DEF_PGPORT 5432
/* Define to the default TCP port number as a string constant. */
#define DEF_PGPORT_STR "5432"
/* Define to the file name extension of dynamically-loadable modules. */
#define DLSUFFIX ".so"
/* Define to build with GSSAPI support. (--with-gssapi) */
//#define ENABLE_GSS 0
/* Define to 1 if you want National Language Support. (--enable-nls) */
/* #undef ENABLE_NLS */
/* Define to 1 to build client libraries as thread-safe code.
(--enable-thread-safety) */
#define ENABLE_THREAD_SAFETY 1
/* Define to nothing if C supports flexible array members, and to 1 if it does
not. That way, with a declaration like `struct s { int n; double
d[FLEXIBLE_ARRAY_MEMBER]; };', the struct hack can be used with pre-C99
compilers. When computing the size of such an object, don't use 'sizeof
(struct s)' as it overestimates the size. Use 'offsetof (struct s, d)'
instead. Don't use 'offsetof (struct s, d[0])', as this doesn't work with
MSVC and with C++ compilers. */
#define FLEXIBLE_ARRAY_MEMBER /**/
/* float4 values are passed by value if 'true', by reference if 'false' */
#define FLOAT4PASSBYVAL true
/* float8, int8, and related values are passed by value if 'true', by
reference if 'false' */
#define FLOAT8PASSBYVAL false
/* Define to 1 if gettimeofday() takes only 1 argument. */
/* #undef GETTIMEOFDAY_1ARG */
#ifdef GETTIMEOFDAY_1ARG
# define gettimeofday(a,b) gettimeofday(a)
#endif
/* Define to 1 if you have the `append_history' function. */
/* #undef HAVE_APPEND_HISTORY */
/* Define to 1 if you want to use atomics if available. */
#define HAVE_ATOMICS 1
/* Define to 1 if you have the <atomic.h> header file. */
/* #undef HAVE_ATOMIC_H */
/* Define to 1 if you have the `cbrt' function. */
#define HAVE_CBRT 1
/* Define to 1 if you have the `class' function. */
/* #undef HAVE_CLASS */
/* Define to 1 if you have the <crtdefs.h> header file. */
/* #undef HAVE_CRTDEFS_H */
/* Define to 1 if you have the `crypt' function. */
#define HAVE_CRYPT 1
/* Define to 1 if you have the <crypt.h> header file. */
#define HAVE_CRYPT_H 1
/* Define to 1 if you have the declaration of `fdatasync', and to 0 if you
don't. */
#define HAVE_DECL_FDATASYNC 1
/* Define to 1 if you have the declaration of `F_FULLFSYNC', and to 0 if you
don't. */
#define HAVE_DECL_F_FULLFSYNC 0
/* Define to 1 if you have the declaration of `posix_fadvise', and to 0 if you
don't. */
#define HAVE_DECL_POSIX_FADVISE 1
/* Define to 1 if you have the declaration of `snprintf', and to 0 if you
don't. */
#define HAVE_DECL_SNPRINTF 1
/* Define to 1 if you have the declaration of `sigwait', and to 0 if you don't. */
#define HAVE_DECL_SIGWAIT 1
/* Define to 1 if you have the declaration of `strlcat', and to 0 if you
don't. */
#if OS_DARWIN
#define HAVE_DECL_STRLCAT 1
#endif
/* Define to 1 if you have the declaration of `strlcpy', and to 0 if you
don't. */
#if OS_DARWIN
#define HAVE_DECL_STRLCPY 1
#endif
/* Define to 1 if you have the declaration of `sys_siglist', and to 0 if you
don't. */
#define HAVE_DECL_SYS_SIGLIST 1
/* Define to 1 if you have the declaration of `vsnprintf', and to 0 if you
don't. */
#define HAVE_DECL_VSNPRINTF 1
/* Define to 1 if you have the <dld.h> header file. */
/* #undef HAVE_DLD_H */
/* Define to 1 if you have the `dlopen' function. */
#define HAVE_DLOPEN 1
/* Define to 1 if you have the <editline/history.h> header file. */
/* #undef HAVE_EDITLINE_HISTORY_H */
/* Define to 1 if you have the <editline/readline.h> header file. */
#define HAVE_EDITLINE_READLINE_H 1
/* Define to 1 if you have the `fdatasync' function. */
#define HAVE_FDATASYNC 1
/* Define to 1 if you have the `fls' function. */
/* #undef HAVE_FLS */
/* Define to 1 if you have the `fpclass' function. */
/* #undef HAVE_FPCLASS */
/* Define to 1 if you have the `fp_class' function. */
/* #undef HAVE_FP_CLASS */
/* Define to 1 if you have the `fp_class_d' function. */
/* #undef HAVE_FP_CLASS_D */
/* Define to 1 if you have the <fp_class.h> header file. */
/* #undef HAVE_FP_CLASS_H */
/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
#define HAVE_FSEEKO 1
/* Define to 1 if your compiler understands __func__. */
#define HAVE_FUNCNAME__FUNC 1
/* Define to 1 if your compiler understands __FUNCTION__. */
/* #undef HAVE_FUNCNAME__FUNCTION */
/* Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int). */
/* #undef HAVE_GCC__ATOMIC_INT32_CAS */
/* Define to 1 if you have __atomic_compare_exchange_n(int64 *, int *, int64).
*/
/* #undef HAVE_GCC__ATOMIC_INT64_CAS */
/* Define to 1 if you have __sync_lock_test_and_set(char *) and friends. */
#define HAVE_GCC__SYNC_CHAR_TAS 1
/* Define to 1 if you have __sync_compare_and_swap(int *, int, int). */
/* #undef HAVE_GCC__SYNC_INT32_CAS */
/* Define to 1 if you have __sync_lock_test_and_set(int *) and friends. */
#define HAVE_GCC__SYNC_INT32_TAS 1
/* Define to 1 if you have __sync_compare_and_swap(int64 *, int64, int64). */
/* #undef HAVE_GCC__SYNC_INT64_CAS */
/* Define to 1 if you have the `getaddrinfo' function. */
#define HAVE_GETADDRINFO 1
/* Define to 1 if you have the `gethostbyname_r' function. */
#define HAVE_GETHOSTBYNAME_R 1
/* Define to 1 if you have the `getifaddrs' function. */
#define HAVE_GETIFADDRS 1
/* Define to 1 if you have the `getopt' function. */
#define HAVE_GETOPT 1
/* Define to 1 if you have the <getopt.h> header file. */
#define HAVE_GETOPT_H 1
/* Define to 1 if you have the `getopt_long' function. */
#define HAVE_GETOPT_LONG 1
/* Define to 1 if you have the `getpeereid' function. */
/* #undef HAVE_GETPEEREID */
/* Define to 1 if you have the `getpeerucred' function. */
/* #undef HAVE_GETPEERUCRED */
/* Define to 1 if you have the `getpwuid_r' function. */
#define HAVE_GETPWUID_R 1
/* Define to 1 if you have the `getrlimit' function. */
#define HAVE_GETRLIMIT 1
/* Define to 1 if you have the `getrusage' function. */
#define HAVE_GETRUSAGE 1
/* Define to 1 if you have the `gettimeofday' function. */
/* #undef HAVE_GETTIMEOFDAY */
/* Define to 1 if you have the <gssapi/gssapi.h> header file. */
//#define HAVE_GSSAPI_GSSAPI_H 0
/* Define to 1 if you have the <gssapi.h> header file. */
/* #undef HAVE_GSSAPI_H */
/* Define to 1 if you have the <history.h> header file. */
/* #undef HAVE_HISTORY_H */
/* Define to 1 if you have the `history_truncate_file' function. */
#define HAVE_HISTORY_TRUNCATE_FILE 1
/* Define to 1 if you have the <ieeefp.h> header file. */
/* #undef HAVE_IEEEFP_H */
/* Define to 1 if you have the <ifaddrs.h> header file. */
#define HAVE_IFADDRS_H 1
/* Define to 1 if you have the `inet_aton' function. */
#define HAVE_INET_ATON 1
/* Define to 1 if you have the `inet_pton' function. */
#define HAVE_INET_PTON 1
/* Define to 1 if the system has the type `int64'. */
/* #undef HAVE_INT64 */
/* Define to 1 if the system has the type `int8'. */
/* #undef HAVE_INT8 */
/* Define to 1 if the system has the type `intptr_t'. */
#define HAVE_INTPTR_T 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the global variable 'int opterr'. */
#define HAVE_INT_OPTERR 1
/* Define to 1 if you have the global variable 'int optreset'. */
/* #undef HAVE_INT_OPTRESET */
/* Define to 1 if you have the global variable 'int timezone'. */
#define HAVE_INT_TIMEZONE 1
/* Define to 1 if you have support for IPv6. */
#define HAVE_IPV6 1
/* Define to 1 if you have isinf(). */
#define HAVE_ISINF 1
/* Define to 1 if you have the <langinfo.h> header file. */
#define HAVE_LANGINFO_H 1
/* Define to 1 if you have the <ldap.h> header file. */
//#define HAVE_LDAP_H 0
/* Define to 1 if you have the `crypto' library (-lcrypto). */
#define HAVE_LIBCRYPTO 1
/* Define to 1 if you have the `ldap' library (-lldap). */
//#define HAVE_LIBLDAP 0
/* Define to 1 if you have the `m' library (-lm). */
#define HAVE_LIBM 1
/* Define to 1 if you have the `pam' library (-lpam). */
#define HAVE_LIBPAM 1
/* Define if you have a function readline library */
#define HAVE_LIBREADLINE 1
/* Define to 1 if you have the `selinux' library (-lselinux). */
/* #undef HAVE_LIBSELINUX */
/* Define to 1 if you have the `ssl' library (-lssl). */
#define HAVE_LIBSSL 0
/* Define to 1 if you have the `wldap32' library (-lwldap32). */
/* #undef HAVE_LIBWLDAP32 */
/* Define to 1 if you have the `xml2' library (-lxml2). */
#define HAVE_LIBXML2 1
/* Define to 1 if you have the `xslt' library (-lxslt). */
#define HAVE_LIBXSLT 1
/* Define to 1 if you have the `z' library (-lz). */
#define HAVE_LIBZ 1
/* Define to 1 if you have the `zstd' library (-lzstd). */
/* #undef HAVE_LIBZSTD */
/* Define to 1 if constants of type 'long long int' should have the suffix LL.
*/
#define HAVE_LL_CONSTANTS 1
/* Define to 1 if the system has the type `locale_t'. */
#define HAVE_LOCALE_T 1
/* Define to 1 if `long int' works and is 64 bits. */
/* #undef HAVE_LONG_INT_64 */
/* Define to 1 if the system has the type `long long int'. */
#define HAVE_LONG_LONG_INT 1
/* Define to 1 if `long long int' works and is 64 bits. */
#define HAVE_LONG_LONG_INT_64 1
/* Define to 1 if you have the <mbarrier.h> header file. */
/* #undef HAVE_MBARRIER_H */
/* Define to 1 if you have the `mbstowcs_l' function. */
/* #undef HAVE_MBSTOWCS_L */
/* Define to 1 if you have the `memmove' function. */
#define HAVE_MEMMOVE 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if the system has the type `MINIDUMP_TYPE'. */
/* #undef HAVE_MINIDUMP_TYPE */
/* Define to 1 if you have the `mkdtemp' function. */
#define HAVE_MKDTEMP 1
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H 1
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H 1
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H 1
/* Define to 1 if you have the <ossp/uuid.h> header file. */
/* #undef HAVE_OSSP_UUID_H */
/* Define to 1 if you have the <pam/pam_appl.h> header file. */
/* #undef HAVE_PAM_PAM_APPL_H */
/* Define to 1 if you have the `poll' function. */
#define HAVE_POLL 1
/* Define to 1 if you have the <poll.h> header file. */
#define HAVE_POLL_H 1
/* Define to 1 if you have a POSIX-conforming sigwait declaration. */
/* #undef HAVE_POSIX_DECL_SIGWAIT */
/* Define to 1 if you have the `posix_fadvise' function. */
#define HAVE_POSIX_FADVISE 1
/* Define to 1 if you have the declaration of `preadv', and to 0 if you don't. */
/* #undef HAVE_DECL_PREADV */
/* Define to 1 if you have the declaration of `pwritev', and to 0 if you don't. */
/* #define HAVE_DECL_PWRITEV */
/* Define to 1 if you have the `X509_get_signature_info' function. */
/* #undef HAVE_X509_GET_SIGNATURE_INFO */
/* Define to 1 if you have the POSIX signal interface. */
#define HAVE_POSIX_SIGNALS 1
/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
/* #undef HAVE_PPC_LWARX_MUTEX_HINT */
/* Define to 1 if you have the `pstat' function. */
/* #undef HAVE_PSTAT */
/* Define to 1 if the PS_STRINGS thing exists. */
/* #undef HAVE_PS_STRINGS */
/* Define to 1 if you have the `pthread_is_threaded_np' function. */
/* #undef HAVE_PTHREAD_IS_THREADED_NP */
/* Define to 1 if you have the <pwd.h> header file. */
#define HAVE_PWD_H 1
/* Define to 1 if you have the <readline.h> header file. */
/* #undef HAVE_READLINE_H */
/* Define to 1 if you have the <readline/history.h> header file. */
#define HAVE_READLINE_HISTORY_H 1
/* Define to 1 if you have the <readline/readline.h> header file. */
/* #undef HAVE_READLINE_READLINE_H */
/* Define to 1 if you have the `readlink' function. */
#define HAVE_READLINK 1
/* Define to 1 if you have the `rint' function. */
#define HAVE_RINT 1
/* Define to 1 if you have the `rl_completion_matches' function. */
#define HAVE_RL_COMPLETION_MATCHES 1
/* Define to 1 if you have the `rl_filename_completion_function' function. */
#define HAVE_RL_FILENAME_COMPLETION_FUNCTION 1
/* Define to 1 if you have the `rl_reset_screen_size' function. */
/* #undef HAVE_RL_RESET_SCREEN_SIZE */
/* Define to 1 if you have the `rl_variable_bind' function. */
#define HAVE_RL_VARIABLE_BIND 1
/* Define to 1 if you have the <security/pam_appl.h> header file. */
#define HAVE_SECURITY_PAM_APPL_H 1
/* Define to 1 if you have the `setproctitle' function. */
/* #undef HAVE_SETPROCTITLE */
/* Define to 1 if you have the `setsid' function. */
#define HAVE_SETSID 1
/* Define to 1 if you have the `shm_open' function. */
#define HAVE_SHM_OPEN 1
/* Define to 1 if the system has the type `socklen_t'. */
#define HAVE_SOCKLEN_T 1
/* Define to 1 if you have the `sigprocmask' function. */
#define HAVE_SIGPROCMASK 1
/* Define to 1 if you have sigsetjmp(). */
#define HAVE_SIGSETJMP 1
/* Define to 1 if the system has the type `sig_atomic_t'. */
#define HAVE_SIG_ATOMIC_T 1
/* Define to 1 if you have the `snprintf' function. */
#define HAVE_SNPRINTF 1
/* Define to 1 if you have spinlocks. */
#define HAVE_SPINLOCKS 1
/* Define to 1 if you have the `SSL_CTX_set_num_tickets' function. */
/* #define HAVE_SSL_CTX_SET_NUM_TICKETS */
/* Define to 1 if you have the `SSL_get_current_compression' function. */
#define HAVE_SSL_GET_CURRENT_COMPRESSION 0
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strerror' function. */
#define HAVE_STRERROR 1
/* Define to 1 if you have the `strerror_r' function. */
#define HAVE_STRERROR_R 1
/* Define to 1 if you have the <strings.h> header file. */
//#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the `strlcat' function. */
/* #undef HAVE_STRLCAT */
/* Define to 1 if you have the `strlcpy' function. */
/* #undef HAVE_STRLCPY */
/* Define to 1 if you have the `strtoll' function. */
#define HAVE_STRTOLL 1
#if (!OS_DARWIN)
#define HAVE_STRCHRNUL 1
#endif
/* Define to 1 if you have the `strtoq' function. */
/* #undef HAVE_STRTOQ */
/* Define to 1 if you have the `strtoull' function. */
#define HAVE_STRTOULL 1
/* Define to 1 if you have the `strtouq' function. */
/* #undef HAVE_STRTOUQ */
/* Define to 1 if the system has the type `struct addrinfo'. */
#define HAVE_STRUCT_ADDRINFO 1
/* Define to 1 if the system has the type `struct cmsgcred'. */
/* #undef HAVE_STRUCT_CMSGCRED */
/* Define to 1 if the system has the type `struct option'. */
#define HAVE_STRUCT_OPTION 1
/* Define to 1 if `sa_len' is a member of `struct sockaddr'. */
/* #undef HAVE_STRUCT_SOCKADDR_SA_LEN */
/* Define to 1 if the system has the type `struct sockaddr_storage'. */
#define HAVE_STRUCT_SOCKADDR_STORAGE 1
/* Define to 1 if `ss_family' is a member of `struct sockaddr_storage'. */
#define HAVE_STRUCT_SOCKADDR_STORAGE_SS_FAMILY 1
/* Define to 1 if `ss_len' is a member of `struct sockaddr_storage'. */
/* #undef HAVE_STRUCT_SOCKADDR_STORAGE_SS_LEN */
/* Define to 1 if `__ss_family' is a member of `struct sockaddr_storage'. */
/* #undef HAVE_STRUCT_SOCKADDR_STORAGE___SS_FAMILY */
/* Define to 1 if `__ss_len' is a member of `struct sockaddr_storage'. */
/* #undef HAVE_STRUCT_SOCKADDR_STORAGE___SS_LEN */
/* Define to 1 if `tm_zone' is a member of `struct tm'. */
#define HAVE_STRUCT_TM_TM_ZONE 1
/* Define to 1 if you have the `symlink' function. */
#define HAVE_SYMLINK 1
/* Define to 1 if you have the `sync_file_range' function. */
/* #undef HAVE_SYNC_FILE_RANGE */
/* Define to 1 if you have the syslog interface. */
#define HAVE_SYSLOG 1
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H 1
/* Define to 1 if you have the <sys/ipc.h> header file. */
#define HAVE_SYS_IPC_H 1
/* Define to 1 if you have the <sys/personality.h> header file. */
/* #undef HAVE_SYS_PERSONALITY_H */
/* Define to 1 if you have the <sys/poll.h> header file. */
#define HAVE_SYS_POLL_H 1
/* Define to 1 if you have the <sys/pstat.h> header file. */
/* #undef HAVE_SYS_PSTAT_H */
/* Define to 1 if you have the <sys/resource.h> header file. */
#define HAVE_SYS_RESOURCE_H 1
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H 1
/* Define to 1 if you have the <sys/sem.h> header file. */
#define HAVE_SYS_SEM_H 1
/* Define to 1 if you have the <sys/shm.h> header file. */
#define HAVE_SYS_SHM_H 1
/* Define to 1 if you have the <sys/signalfd.h> header file. */
/* #undef HAVE_SYS_SIGNALFD_H */
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H 1
/* Define to 1 if you have the <sys/sockio.h> header file. */
/* #undef HAVE_SYS_SOCKIO_H */
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/tas.h> header file. */
/* #undef HAVE_SYS_TAS_H */
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <sys/ucred.h> header file. */
#if (OS_DARWIN || OS_FREEBSD)
#define HAVE_SYS_UCRED_H 1
#endif
/* Define to 1 if you have the <sys/un.h> header file. */
#define HAVE_SYS_UN_H 1
#define _GNU_SOURCE 1 /* Needed for glibc struct ucred */
/* Define to 1 if you have the <termios.h> header file. */
#define HAVE_TERMIOS_H 1
/* Define to 1 if your `struct tm' has `tm_zone'. Deprecated, use
`HAVE_STRUCT_TM_TM_ZONE' instead. */
#define HAVE_TM_ZONE 1
/* Define to 1 if you have the `towlower' function. */
#define HAVE_TOWLOWER 1
/* Define to 1 if you have the external array `tzname'. */
#define HAVE_TZNAME 1
/* Define to 1 if you have the <ucred.h> header file. */
/* #undef HAVE_UCRED_H */
/* Define to 1 if the system has the type `uint64'. */
/* #undef HAVE_UINT64 */
/* Define to 1 if the system has the type `uint8'. */
/* #undef HAVE_UINT8 */
/* Define to 1 if the system has the type `uintptr_t'. */
#define HAVE_UINTPTR_T 1
/* Define to 1 if the system has the type `union semun'. */
/* #undef HAVE_UNION_SEMUN */
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if you have unix sockets. */
#define HAVE_UNIX_SOCKETS 1
/* Define to 1 if you have the `unsetenv' function. */
#define HAVE_UNSETENV 1
/* Define to 1 if the system has the type `unsigned long long int'. */
#define HAVE_UNSIGNED_LONG_LONG_INT 1
/* Define to 1 if you have the `utime' function. */
#define HAVE_UTIME 1
/* Define to 1 if you have the `utimes' function. */
#define HAVE_UTIMES 1
/* Define to 1 if you have the <utime.h> header file. */
#define HAVE_UTIME_H 1
/* Define to 1 if you have BSD UUID support. */
/* #undef HAVE_UUID_BSD */
/* Define to 1 if you have E2FS UUID support. */
/* #undef HAVE_UUID_E2FS */
/* Define to 1 if you have the <uuid.h> header file. */
#define HAVE_UUID_H 1
/* Define to 1 if you have OSSP UUID support. */
#define HAVE_UUID_OSSP 1
/* Define to 1 if you have the <uuid/uuid.h> header file. */
/* #undef HAVE_UUID_UUID_H */
/* Define to 1 if you have the `vsnprintf' function. */
#define HAVE_VSNPRINTF 1
/* Define to 1 if you have the <wchar.h> header file. */
#define HAVE_WCHAR_H 1
/* Define to 1 if you have the `wcstombs' function. */
#define HAVE_WCSTOMBS 1
/* Define to 1 if you have the `wcstombs_l' function. */
/* #undef HAVE_WCSTOMBS_L */
/* Define to 1 if you have the <wctype.h> header file. */
#define HAVE_WCTYPE_H 1
/* Define to 1 if you have the <winldap.h> header file. */
/* #undef HAVE_WINLDAP_H */
/* Define to 1 if your compiler understands __builtin_bswap32. */
/* #undef HAVE__BUILTIN_BSWAP32 */
/* Define to 1 if your compiler understands __builtin_constant_p. */
#define HAVE__BUILTIN_CONSTANT_P 1
/* Define to 1 if your compiler understands __builtin_frame_address. */
/* #undef HAVE__BUILTIN_FRAME_ADDRESS */
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#define HAVE__BUILTIN_TYPES_COMPATIBLE_P 1
/* Define to 1 if your compiler understands __builtin_unreachable. */
/* #undef HAVE__BUILTIN_UNREACHABLE */
/* Define to 1 if you have __cpuid. */
/* #undef HAVE__CPUID */
/* Define to 1 if you have __get_cpuid. */
/* #undef HAVE__GET_CPUID */
/* Define to 1 if your compiler understands _Static_assert. */
/* #undef HAVE__STATIC_ASSERT */
/* Define to 1 if your compiler understands __VA_ARGS__ in macros. */
#define HAVE__VA_ARGS 1
/* Define to the appropriate snprintf length modifier for 64-bit ints. */
#define INT64_MODIFIER "ll"
/* Define to 1 if `locale_t' requires <xlocale.h>. */
/* #undef LOCALE_T_IN_XLOCALE */
/* Define as the maximum alignment requirement of any C data type. */
#define MAXIMUM_ALIGNOF 4
/* Define bytes to use libc memset(). */
#define MEMSET_LOOP_LIMIT 1024
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "pgsql-bugs@postgresql.org"
/* Define to the full name of this package. */
#define PACKAGE_NAME "PostgreSQL"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PostgreSQL 9.5.4"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "postgresql"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "9.5.4"
/* Define to the name of a signed 128-bit integer type. */
/* #undef PG_INT128_TYPE */
/* Define to the name of a signed 64-bit integer type. */
#define PG_INT64_TYPE long long int
/* Define to the name of the default PostgreSQL service principal in Kerberos
(GSSAPI). (--with-krb-srvnam=NAME) */
#define PG_KRB_SRVNAM "postgres"
/* PostgreSQL major version as a string */
#define PG_MAJORVERSION "9.5"
/* Define to gnu_printf if compiler supports it, else printf. */
#define PG_PRINTF_ATTRIBUTE printf
/* Define to 1 if "static inline" works without unwanted warnings from
compilations where static inline functions are defined but not called. */
#define PG_USE_INLINE 1
/* PostgreSQL version as a string */
#define PG_VERSION "9.5.4"
/* PostgreSQL version as a number */
#define PG_VERSION_NUM 90504
/* A string containing the version number, platform, and C compiler */
#define PG_VERSION_STR "PostgreSQL 9.5.4 on i686-pc-linux-gnu, compiled by gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-55), 32-bit"
/* Define to 1 to allow profiling output to be saved separately for each
process. */
/* #undef PROFILE_PID_DIR */
/* RELSEG_SIZE is the maximum number of blocks allowed in one disk file. Thus,
the maximum size of a single file is RELSEG_SIZE * BLCKSZ; relations bigger
than that are divided into multiple files. RELSEG_SIZE * BLCKSZ must be
less than your OS' limit on file size. This is often 2 GB or 4GB in a
32-bit operating system, unless you have large file support enabled. By
default, we make the limit 1 GB to avoid any possible integer-overflow
problems within the OS. A limit smaller than necessary only means we divide
a large relation into more chunks than necessary, so it seems best to err
in the direction of a small limit. A power-of-2 value is recommended to
save a few cycles in md.c, but is not absolutely required. Changing
RELSEG_SIZE requires an initdb. */
#define RELSEG_SIZE 131072
/* The size of `long', as computed by sizeof. */
#define SIZEOF_LONG 4
/* The size of `off_t', as computed by sizeof. */
#define SIZEOF_OFF_T 8
/* The size of `size_t', as computed by sizeof. */
#define SIZEOF_SIZE_T 4
/* The size of `void *', as computed by sizeof. */
#define SIZEOF_VOID_P 4
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to 1 if strerror_r() returns a int. */
/* #undef STRERROR_R_INT */
/* Define to 1 if your <sys/time.h> declares `struct tm'. */
/* #undef TM_IN_SYS_TIME */
/* Define to 1 to build with assertion checks. (--enable-cassert) */
/* #undef USE_ASSERT_CHECKING */
/* Define to 1 to build with Bonjour support. (--with-bonjour) */
/* #undef USE_BONJOUR */
/* Define to 1 if you want float4 values to be passed by value.
(--enable-float4-byval) */
#define USE_FLOAT4_BYVAL 1
/* Define to 1 if you want float8, int8, etc values to be passed by value.
(--enable-float8-byval) */
/* #undef USE_FLOAT8_BYVAL */
/* Define to 1 if you want 64-bit integer timestamp and interval support.
(--enable-integer-datetimes) */
#define USE_INTEGER_DATETIMES 1
/* Define to 1 to build with LDAP support. (--with-ldap) */
//#define USE_LDAP 0
/* Define to 1 to build with XML support. (--with-libxml) */
#define USE_LIBXML 1
/* Define to 1 to use XSLT support when building contrib/xml2.
(--with-libxslt) */
#define USE_LIBXSLT 1
/* Define to select named POSIX semaphores. */
/* #undef USE_NAMED_POSIX_SEMAPHORES */
/* Define to build with OpenSSL support. (--with-openssl) */
#define USE_OPENSSL 0
#define USE_OPENSSL_RANDOM 0
#define FRONTEND 1
/* Define to 1 to build with PAM support. (--with-pam) */
#define USE_PAM 1
/* Use replacement snprintf() functions. */
/* #undef USE_REPL_SNPRINTF */
/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */
#define USE_SLICING_BY_8_CRC32C 1
/* Define to 1 use Intel SSE 4.2 CRC instructions. */
/* #undef USE_SSE42_CRC32C */
/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */
/* #undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK */
/* Define to select SysV-style semaphores. */
#define USE_SYSV_SEMAPHORES 1
/* Define to select SysV-style shared memory. */
#define USE_SYSV_SHARED_MEMORY 1
/* Define to select unnamed POSIX semaphores. */
/* #undef USE_UNNAMED_POSIX_SEMAPHORES */
/* Define to select Win32-style semaphores. */
/* #undef USE_WIN32_SEMAPHORES */
/* Define to select Win32-style shared memory. */
/* #undef USE_WIN32_SHARED_MEMORY */
/* Define to 1 to build with ZSTD support. (--with-zstd) */
/* #undef USE_ZSTD */
/* Define to 1 if `wcstombs_l' requires <xlocale.h>. */
/* #undef WCSTOMBS_L_IN_XLOCALE */
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
/* # undef WORDS_BIGENDIAN */
# endif
#endif
/* Size of a WAL file block. This need have no particular relation to BLCKSZ.
XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O,
XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O
buffers, else direct I/O may fail. Changing XLOG_BLCKSZ requires an initdb.
*/
#define XLOG_BLCKSZ 8192
/* XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2
and larger than XLOG_BLCKSZ (preferably, a great deal larger than
XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */
#define XLOG_SEG_SIZE (16 * 1024 * 1024)
/* Number of bits in a file offset, on hosts where this is settable. */
#define _FILE_OFFSET_BITS 64
/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
/* #undef _LARGEFILE_SOURCE */
/* Define for large files, on AIX-style hosts. */
/* #undef _LARGE_FILES */
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
/* #undef inline */
#endif
/* Define to the type of a signed integer type wide enough to hold a pointer,
if such a type exists, and if the system does not define it. */
/* #undef intptr_t */
/* Define to empty if the C compiler does not understand signed types. */
/* #undef signed */
/* Define to the type of an unsigned integer type wide enough to hold a
pointer, if such a type exists, and if the system does not define it. */
/* #undef uintptr_t */

View File

@ -0,0 +1,7 @@
/*
* * src/include/pg_config_ext.h.in. This is generated manually, not by
* * autoheader, since we want to limit which symbols get defined here.
* */
/* Define to the name of a signed 64-bit integer type. */
#define PG_INT64_TYPE long long int

View File

@ -0,0 +1,34 @@
#if defined(OS_DARWIN)
/* src/include/port/darwin.h */
#define __darwin__ 1
#if HAVE_DECL_F_FULLFSYNC /* not present before macOS 10.3 */
#define HAVE_FSYNC_WRITETHROUGH
#endif
#else
/* src/include/port/linux.h */
/*
* As of July 2007, all known versions of the Linux kernel will sometimes
* return EIDRM for a shmctl() operation when EINVAL is correct (it happens
* when the low-order 15 bits of the supplied shm ID match the slot number
* assigned to a newer shmem segment). We deal with this by assuming that
* EIDRM means EINVAL in PGSharedMemoryIsInUse(). This is reasonably safe
* since in fact Linux has no excuse for ever returning EIDRM; it doesn't
* track removed segments in a way that would allow distinguishing them from
* private ones. But someday that code might get upgraded, and we'd have
* to have a kernel version test here.
*/
#define HAVE_LINUX_EIDRM_BUG
/*
* Set the default wal_sync_method to fdatasync. With recent Linux versions,
* xlogdefs.h's normal rules will prefer open_datasync, which (a) doesn't
* perform better and (b) causes outright failures on ext4 data=journal
* filesystems, because those don't support O_DIRECT.
*/
#define PLATFORM_DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC
#endif

View File

@ -0,0 +1,12 @@
#define PGBINDIR "/bin"
#define PGSHAREDIR "/share"
#define SYSCONFDIR "/etc"
#define INCLUDEDIR "/include"
#define PKGINCLUDEDIR "/include"
#define INCLUDEDIRSERVER "/include/server"
#define LIBDIR "/lib"
#define PKGLIBDIR "/lib"
#define LOCALEDIR "/share/locale"
#define DOCDIR "/doc"
#define HTMLDIR "/doc"
#define MANDIR "/man"

View File

View File

@ -155,6 +155,12 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm
Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine.
Each table can have subset of replicated columns in brackets. If subset of columns is omitted, then all columns for table will be replicated.
``` sql
materialized_postgresql_tables_list = 'table1(co1, col2),table2,table3(co3, col5, col7)
```
Default value: empty list — means whole PostgreSQL database will be replicated.
### `materialized_postgresql_schema` {#materialized-postgresql-schema}

View File

@ -112,7 +112,7 @@ Example:
```
The NATS server configuration can be added using the ClickHouse config file.
More specifically you can add Redis password for NATS engine:
More specifically you can add Redis password for NATS engine:
``` xml
<nats>
@ -167,7 +167,7 @@ If you want to change the target table by using `ALTER`, we recommend disabling
- `_subject` - NATS message subject. Data type: `String`.
Additional virtual columns when `kafka_handle_error_mode='stream'`:
Additional virtual columns when `nats_handle_error_mode='stream'`:
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.

View File

@ -29,6 +29,7 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_RESTORE_TABLE;
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
extern const int LOGICAL_ERROR;
}
@ -175,9 +176,46 @@ namespace
return res;
}
std::unordered_map<UUID, UUID> resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies, const AccessControl & access_control, bool allow_unresolved_dependencies)
/// Checks if new entities (which we're going to restore) already exist,
/// and either skips them or throws an exception depending on the restore settings.
void checkExistingEntities(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
std::unordered_map<UUID, UUID> & old_to_new_id,
const AccessControl & access_control,
RestoreAccessCreationMode creation_mode)
{
if (creation_mode == RestoreAccessCreationMode::kReplace)
return;
auto should_skip = [&](const std::pair<UUID, AccessEntityPtr> & id_and_entity)
{
const auto & id = id_and_entity.first;
const auto & entity = *id_and_entity.second;
auto existing_id = access_control.find(entity.getType(), entity.getName());
if (!existing_id)
{
return false;
}
else if (creation_mode == RestoreAccessCreationMode::kCreateIfNotExists)
{
old_to_new_id[id] = *existing_id;
return true;
}
else
{
throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Cannot restore {} because it already exists", entity.formatTypeWithName());
}
};
std::erase_if(entities, should_skip);
}
/// If new entities (which we're going to restore) depend on other entities which are not going to be restored or not present in the backup
/// then we should try to replace those dependencies with already existing entities.
void resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies,
std::unordered_map<UUID, UUID> & old_to_new_ids,
const AccessControl & access_control,
bool allow_unresolved_dependencies)
{
std::unordered_map<UUID, UUID> old_to_new_ids;
for (const auto & [id, name_and_type] : dependencies)
{
std::optional<UUID> new_id;
@ -188,9 +226,9 @@ namespace
if (new_id)
old_to_new_ids.emplace(id, *new_id);
}
return old_to_new_ids;
}
/// Generates random IDs for the new entities.
void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids)
{
Poco::UUIDGenerator generator;
@ -203,27 +241,12 @@ namespace
}
}
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const std::unordered_map<UUID, UUID> & old_to_new_ids)
/// Updates dependencies of the new entities using a specified map.
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
for (auto & entity : entities | boost::adaptors::map_values)
{
bool need_replace = false;
for (const auto & dependency : entity->findDependencies())
{
if (old_to_new_ids.contains(dependency))
{
need_replace = true;
break;
}
}
if (!need_replace)
continue;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
IAccessEntity::replaceDependencies(entity, old_to_new_ids);
}
AccessRightsElements getRequiredAccessToRestore(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities)
@ -314,7 +337,9 @@ std::pair<String, BackupEntryPtr> makeBackupEntryForAccess(
AccessRestorerFromBackup::AccessRestorerFromBackup(
const BackupPtr & backup_, const RestoreSettings & restore_settings_)
: backup(backup_), allow_unresolved_access_dependencies(restore_settings_.allow_unresolved_access_dependencies)
: backup(backup_)
, creation_mode(restore_settings_.create_access)
, allow_unresolved_dependencies(restore_settings_.allow_unresolved_access_dependencies)
{
}
@ -362,7 +387,9 @@ std::vector<std::pair<UUID, AccessEntityPtr>> AccessRestorerFromBackup::getAcces
{
auto new_entities = entities;
auto old_to_new_ids = resolveDependencies(dependencies, access_control, allow_unresolved_access_dependencies);
std::unordered_map<UUID, UUID> old_to_new_ids;
checkExistingEntities(new_entities, old_to_new_ids, access_control, creation_mode);
resolveDependencies(dependencies, old_to_new_ids, access_control, allow_unresolved_dependencies);
generateRandomIDs(new_entities, old_to_new_ids);
replaceDependencies(new_entities, old_to_new_ids);

View File

@ -17,6 +17,7 @@ using BackupPtr = std::shared_ptr<const IBackup>;
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
struct RestoreSettings;
enum class RestoreAccessCreationMode : uint8_t;
/// Makes a backup of access entities of a specified type.
@ -45,7 +46,8 @@ public:
private:
BackupPtr backup;
bool allow_unresolved_access_dependencies = false;
RestoreAccessCreationMode creation_mode;
bool allow_unresolved_dependencies = false;
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
std::unordered_set<String> data_paths;

View File

@ -544,9 +544,9 @@ scope_guard AccessControl::subscribeForChanges(const std::vector<UUID> & ids, co
return changes_notifier->subscribeForChanges(ids, handler);
}
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists))
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
{
changes_notifier->sendNotifications();
return true;

View File

@ -243,7 +243,7 @@ private:
class CustomSettingsPrefixes;
class PasswordComplexityRules;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;

View File

@ -1,8 +1,6 @@
#include <Access/DiskAccessStorage.h>
#include <Access/AccessEntityIO.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
@ -418,7 +416,7 @@ void DiskAccessStorage::setAllInMemory(const std::vector<std::pair<UUID, AccessE
/// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* write_on_disk= */ false);
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr, /* write_on_disk= */ false);
}
void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep)
@ -507,14 +505,14 @@ std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWi
}
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, /* write_on_disk = */ true);
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id, /* write_on_disk = */ true);
}
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk)
{
const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType();
@ -533,9 +531,15 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
if (name_collision && !replace_if_exists)
{
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
*conflicting_id = id_by_name;
return false;
}
}
auto it_by_id = entries_by_id.find(id);
@ -548,7 +552,11 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name);
}
else
{
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
if (write_on_disk)
@ -727,25 +735,4 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Couldn't delete {}", file_path);
}
void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
}
}

View File

@ -34,14 +34,13 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void restoreFromBackup(RestorerFromBackup & restorer) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -55,7 +54,7 @@ private:
void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS;
void stopListsWritingThread();
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);

View File

@ -9,4 +9,28 @@ bool IAccessEntity::equal(const IAccessEntity & other) const
return (name == other.name) && (getType() == other.getType());
}
void IAccessEntity::replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
if (old_to_new_ids.empty())
return;
bool need_replace_dependencies = false;
auto dependencies = entity->findDependencies();
for (const auto & dependency : dependencies)
{
if (old_to_new_ids.contains(dependency))
{
need_replace_dependencies = true;
break;
}
}
if (!need_replace_dependencies)
return;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
}

View File

@ -50,7 +50,8 @@ struct IAccessEntity
virtual std::vector<UUID> findDependencies() const { return {}; }
/// Replaces dependencies according to a specified map.
virtual void replaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) { doReplaceDependencies(old_to_new_ids); }
static void replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids);
/// Whether this access entity should be written to a backup.
virtual bool isBackupAllowed() const { return false; }
@ -66,6 +67,8 @@ protected:
{
return std::make_shared<EntityClassT>(typeid_cast<const EntityClassT &>(*this));
}
virtual void doReplaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
};
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;

View File

@ -4,6 +4,8 @@
#include <Access/User.h>
#include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <Common/callOnce.h>
@ -14,10 +16,11 @@
#include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
namespace DB
{
namespace ErrorCodes
@ -178,20 +181,20 @@ UUID IAccessStorage::insert(const AccessEntityPtr & entity)
return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
}
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
auto id = generateRandomID();
if (insert(id, entity, replace_if_exists, throw_if_exists))
if (insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
return id;
return std::nullopt;
}
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
return insertImpl(id, entity, replace_if_exists, throw_if_exists);
return insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id);
}
@ -285,7 +288,7 @@ std::vector<UUID> IAccessStorage::insertOrReplace(const std::vector<AccessEntity
}
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool)
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool, UUID *)
{
if (isReadOnly())
throwReadonlyCannotInsert(entity->getType(), entity->getName());
@ -611,12 +614,51 @@ void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, c
}
void IAccessStorage::restoreFromBackup(RestorerFromBackup &)
void IAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "restoreFromBackup() is not implemented in {}", getStorageType());
if (isReplicated() && !acquireReplicatedRestore(restorer))
return;
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, entities_to_restore = std::move(entities), replace_if_exists, throw_if_exists] mutable
{
std::unordered_map<UUID, UUID> new_to_existing_ids;
for (auto & [id, entity] : entities_to_restore)
{
UUID existing_entity_id;
if (!insert(id, entity, replace_if_exists, throw_if_exists, &existing_entity_id))
{
/// Couldn't insert `entity` because there is an existing entity with the same name.
new_to_existing_ids[id] = existing_entity_id;
}
}
if (!new_to_existing_ids.empty())
{
/// If new entities restored from backup have dependencies on other entities from backup which were not restored because they existed,
/// then we should correct those dependencies.
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
{
auto res = entity;
IAccessEntity::replaceDependencies(res, new_to_existing_ids);
return res;
};
std::vector<UUID> ids;
ids.reserve(entities_to_restore.size());
boost::copy(entities_to_restore | boost::adaptors::map_keys, std::back_inserter(ids));
tryUpdate(ids, update_func);
}
});
}

View File

@ -64,6 +64,9 @@ public:
/// Returns true if this entity is readonly.
virtual bool isReadOnly(const UUID &) const { return isReadOnly(); }
/// Returns true if this storage is replicated.
virtual bool isReplicated() const { return false; }
/// Starts periodic reloading and updating of entities in this storage.
virtual void startPeriodicReloading() {}
@ -153,8 +156,8 @@ public:
/// Inserts an entity to the storage. Returns ID of a new entry in the storage.
/// Throws an exception if the specified name already exists.
UUID insert(const AccessEntityPtr & entity);
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, bool replace_if_exists = false, bool throw_if_exists = true);
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, const std::vector<UUID> & ids, bool replace_if_exists = false, bool throw_if_exists = true);
@ -218,7 +221,7 @@ protected:
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const;
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
virtual std::optional<AuthResult> authenticateImpl(
@ -240,6 +243,7 @@ protected:
LoggerPtr getLogger() const;
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
virtual bool acquireReplicatedRestore(RestorerFromBackup &) const { return false; }
[[noreturn]] void throwNotFound(const UUID & id) const;
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);

View File

@ -1,7 +1,5 @@
#include <Access/MemoryAccessStorage.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <base/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
@ -63,14 +61,14 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not
}
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id);
}
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType();
@ -86,9 +84,15 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
if (name_collision && !replace_if_exists)
{
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
*conflicting_id = id_by_name;
return false;
}
}
auto it_by_id = entries_by_id.find(id);
@ -97,9 +101,15 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
{
const auto & existing_entry = it_by_id->second;
if (throw_if_exists)
{
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
}
else
{
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
/// Remove collisions if necessary.
@ -270,28 +280,7 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
/// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false);
}
void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr);
}
}

View File

@ -34,17 +34,16 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void restoreFromBackup(RestorerFromBackup & restorer) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);

View File

@ -353,7 +353,7 @@ void MultipleAccessStorage::reload(ReloadMode reload_mode)
}
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
std::shared_ptr<IAccessStorage> storage_for_insertion;
@ -376,7 +376,7 @@ bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr &
getStorageName());
}
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists))
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
{
std::lock_guard lock{mutex};
ids_cache.set(id, storage_for_insertion);

View File

@ -67,7 +67,7 @@ protected:
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;

View File

@ -24,7 +24,7 @@ std::vector<UUID> Quota::findDependencies() const
return to_roles.findDependencies();
}
void Quota::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void Quota::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}

View File

@ -47,7 +47,7 @@ struct Quota : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
};

View File

@ -5,10 +5,9 @@
#include <Access/AccessChangesNotifier.h>
#include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/RestorerFromBackup.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/Context.h>
#include <Common/ZooKeeper/KeeperException.h>
@ -120,7 +119,7 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function)
}
}
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{
const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType());
const String & name = new_entity->getName();
@ -128,7 +127,7 @@ bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists, conflicting_id); });
if (!ok)
return false;
@ -143,7 +142,8 @@ bool ReplicatedAccessStorage::insertZooKeeper(
const UUID & id,
const AccessEntityPtr & new_entity,
bool replace_if_exists,
bool throw_if_exists)
bool throw_if_exists,
UUID * conflicting_id)
{
const String & name = new_entity->getName();
const AccessEntityType type = new_entity->getType();
@ -167,27 +167,52 @@ bool ReplicatedAccessStorage::insertZooKeeper(
if (res == Coordination::Error::ZNODEEXISTS)
{
if (!throw_if_exists && !replace_if_exists)
return false; /// Couldn't insert a new entity.
if (throw_if_exists)
if (!replace_if_exists)
{
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// If that happens, then we'll just retry from the start.
String existing_entity_definition = zookeeper->get(entity_path);
/// Couldn't insert the new entity because there is an existing entity with such UUID.
if (throw_if_exists)
{
/// To fail with a nice error message, we need info about what already exists.
/// This itself can fail if the conflicting uuid disappears in the meantime.
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
String existing_entity_definition = zookeeper->get(entity_path);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else
{
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
else if (responses[1]->error == Coordination::Error::ZNODEEXISTS)
{
/// Couldn't insert the new entity because there is an existing entity with the same name.
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
{
/// Get UUID of the existing entry with the same name.
/// This itself can fail if the conflicting name disappears in the meantime.
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
*conflicting_id = parseUUID(zookeeper->get(name_path));
}
return false;
}
}
else
{
/// Couldn't insert the new entity because there is an existing entity with such name.
throwNameCollisionCannotInsert(type, name);
zkutil::KeeperMultiException::check(res, ops, responses);
}
}
@ -693,28 +718,10 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
}
void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
bool ReplicatedAccessStorage::acquireReplicatedRestore(RestorerFromBackup & restorer) const
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto restore_coordination = restorer.getRestoreCoordination();
if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path))
return;
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
return restore_coordination->acquireReplicatedAccessStorage(zookeeper_path);
}
}

View File

@ -26,6 +26,7 @@ public:
void shutdown() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
bool isReplicated() const override { return true; }
void startPeriodicReloading() override { startWatchingThread(); }
void stopPeriodicReloading() override { stopWatchingThread(); }
@ -35,7 +36,6 @@ public:
bool isBackupAllowed() const override { return backup_allowed; }
void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override;
void restoreFromBackup(RestorerFromBackup & restorer) override;
private:
String zookeeper_path;
@ -48,11 +48,11 @@ private:
std::unique_ptr<ThreadFromGlobalPool> watching_thread;
std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) override;
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -80,6 +80,7 @@ private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
bool acquireReplicatedRestore(RestorerFromBackup & restorer) const override;
mutable std::mutex mutex;
MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex);

View File

@ -21,7 +21,7 @@ std::vector<UUID> Role::findDependencies() const
return res;
}
void Role::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void Role::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
granted_roles.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids);

View File

@ -21,7 +21,7 @@ struct Role : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};

View File

@ -63,7 +63,7 @@ std::vector<UUID> RowPolicy::findDependencies() const
return to_roles.findDependencies();
}
void RowPolicy::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void RowPolicy::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}

View File

@ -50,7 +50,7 @@ struct RowPolicy : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
/// Which roles or users should use this row policy.

View File

@ -21,7 +21,7 @@ std::vector<UUID> SettingsProfile::findDependencies() const
return res;
}
void SettingsProfile::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void SettingsProfile::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
elements.replaceDependencies(old_to_new_ids);
to_roles.replaceDependencies(old_to_new_ids);

View File

@ -22,7 +22,7 @@ struct SettingsProfile : public IAccessEntity
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return elements.isBackupAllowed(); }
};

View File

@ -49,7 +49,7 @@ std::vector<UUID> User::findDependencies() const
return res;
}
void User::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
void User::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
default_roles.replaceDependencies(old_to_new_ids);
granted_roles.replaceDependencies(old_to_new_ids);

View File

@ -32,7 +32,7 @@ struct User : public IAccessEntity
void setName(const String & name_) override;
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};

View File

@ -3,370 +3,89 @@
#include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
class FunctionTreeNode : public AbstractFunction
{
public:
class ArgumentTreeNode : public Argument
{
public:
explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<FunctionNode>())
return std::make_unique<FunctionTreeNode>(*f);
return nullptr;
}
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ConstantNode>())
{
if (literal->getValue().getType() != Field::Types::String)
return false;
if (res)
*res = literal->getValue().safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<IdentifierNode>())
{
if (res)
*res = id->getIdentifier().getFullName();
return true;
}
}
return false;
}
private:
const IQueryTreeNode * argument = nullptr;
};
class ArgumentsTreeNode : public Arguments
{
public:
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
private:
const QueryTreeNodes * arguments = nullptr;
};
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
{
if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
}
String name() const override { return function->getFunctionName(); }
private:
const FunctionNode * function = nullptr;
};
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderTreeNode
class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments())
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
{
if (arguments.getNodes().empty())
if (!function->hasArguments())
return;
findFunctionSecretArguments();
findOrdinaryFunctionSecretArguments();
}
struct Result
{
/// Result constructed by default means no arguments will be hidden.
size_t start = static_cast<size_t>(-1);
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
/// In all known cases secret arguments are consecutive
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
bool hasSecrets() const
{
return count != 0 || !nested_maps.empty();
}
};
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const FunctionNode & function;
const ListNode & arguments;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments.getNodes().size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findFunctionSecretArguments()
{
const auto & name = function.getFunctionName();
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((name == "remote") || (name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((name == "encrypt") || (name == "decrypt") ||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
(name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
const auto & nodes = arguments.getNodes();
size_t count = nodes.size();
while (count > 0)
{
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
if (!f)
break;
if (f->getFunctionName() == "headers")
result.nested_maps.push_back(f->getFunctionName());
else if (f->getFunctionName() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments.getNodes().size())
return false;
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument->as<ConstantNode>())
{
if (literal->getValue().getType() != Field::Types::String)
return false;
if (res)
*res = literal->getValue().safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<IdentifierNode>())
{
if (res)
*res = id->getIdentifier().getFullName();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments.getNodes().size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments.getNodes().empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments.getNodes().size() - 1;
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments.getNodes().size() <= arg_idx)
return false;
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments.getNodes().size(); ++i)
{
const auto & argument = arguments.getNodes()[i];
const auto * equals_func = argument->as<FunctionNode>();
if (!equals_func || (equals_func->getFunctionName() != "equals"))
continue;
const auto * expr_list = equals_func->getArguments().as<ListNode>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->getNodes();
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
};
}

View File

@ -455,6 +455,9 @@ void Connection::sendAddendum()
writeStringBinary(proto_recv_chunked, *out);
}
if (server_revision >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, *out);
out->next();
}
@ -525,6 +528,8 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
readVarUInt(server_version_major, *in);
readVarUInt(server_version_minor, *in);
readVarUInt(server_revision, *in);
if (server_revision >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
readVarUInt(server_parallel_replicas_protocol_version, *in);
if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
readStringBinary(server_timezone, *in);
if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)
@ -959,7 +964,7 @@ void Connection::sendReadTaskResponse(const String & response)
void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
{
writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
response.serialize(*out);
response.serialize(*out, server_parallel_replicas_protocol_version);
out->finishChunk();
out->next();
}
@ -1413,7 +1418,7 @@ ParallelReadRequest Connection::receiveParallelReadRequest() const
InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnouncement() const
{
return InitialAllRangesAnnouncement::deserialize(*in);
return InitialAllRangesAnnouncement::deserialize(*in, server_parallel_replicas_protocol_version);
}

View File

@ -210,6 +210,7 @@ private:
UInt64 server_version_minor = 0;
UInt64 server_version_patch = 0;
UInt64 server_revision = 0;
UInt64 server_parallel_replicas_protocol_version = 0;
String server_timezone;
String server_display_name;

View File

@ -64,6 +64,7 @@ static struct InitFiu
REGULAR(lazy_pipe_fds_fail_close) \
PAUSEABLE(infinite_sleep) \
PAUSEABLE(stop_moving_part_before_swap_with_active) \
REGULAR(slowdown_index_analysis) \
namespace FailPoints

View File

@ -376,6 +376,7 @@ The server successfully detected this situation and will download merged part fr
M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \
M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \
M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \
M(ParallelReplicasReadMarks, "How many marks were read by the given replica") \
\
M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \
M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \
@ -529,6 +530,7 @@ The server successfully detected this situation and will download merged part fr
M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \
M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \
M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache") \
M(CachedReadBufferPredownloadedBytes, "Bytes read from filesystem cache source. Cache segments are read from left to right as a whole, it might be that we need to predownload some part of the segment irrelevant for the current task just to get to the needed data") \
M(CachedReadBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \
M(CachedReadBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \
M(CachedReadBufferCreateBufferMicroseconds, "Prepare buffer time") \

View File

@ -1,4 +1,4 @@
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp orc_string_dictionary.cpp)
target_link_libraries (integer_hash_tables_and_hashes PRIVATE
ch_contrib::gbenchmark_all
dbms
@ -7,3 +7,8 @@ target_link_libraries (integer_hash_tables_and_hashes PRIVATE
ch_contrib::wyhash
ch_contrib::farmhash
ch_contrib::xxHash)
clickhouse_add_executable(orc_string_dictionary orc_string_dictionary.cpp)
target_link_libraries (orc_string_dictionary PRIVATE
ch_contrib::gbenchmark_all
dbms)

View File

@ -0,0 +1,311 @@
#include <cstdlib>
#include <base/defines.h>
#include <benchmark/benchmark.h>
class OldSortedStringDictionary
{
public:
struct DictEntry
{
DictEntry(const char * str, size_t len) : data(str), length(len) { }
const char * data;
size_t length;
};
OldSortedStringDictionary() : totalLength(0) { }
// insert a new string into dictionary, return its insertion order
size_t insert(const char * str, size_t len);
// reorder input index buffer from insertion order to dictionary order
void reorder(std::vector<int64_t> & idxBuffer) const;
// get dict entries in insertion order
void getEntriesInInsertionOrder(std::vector<const DictEntry *> &) const;
size_t size() const;
// return total length of strings in the dictionary
uint64_t length() const;
void clear();
// store indexes of insertion order in the dictionary for not-null rows
std::vector<int64_t> idxInDictBuffer;
private:
struct LessThan
{
bool operator()(const DictEntry & left, const DictEntry & right) const
{
int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
if (ret != 0)
{
return ret < 0;
}
return left.length < right.length;
}
};
std::map<DictEntry, size_t, LessThan> dict;
std::vector<std::vector<char>> data;
uint64_t totalLength;
};
// insert a new string into dictionary, return its insertion order
size_t OldSortedStringDictionary::insert(const char * str, size_t len)
{
auto ret = dict.insert({DictEntry(str, len), dict.size()});
if (ret.second)
{
// make a copy to internal storage
data.push_back(std::vector<char>(len));
memcpy(data.back().data(), str, len);
// update dictionary entry to link pointer to internal storage
DictEntry * entry = const_cast<DictEntry *>(&(ret.first->first));
entry->data = data.back().data();
totalLength += len;
}
return ret.first->second;
}
/**
* Reorder input index buffer from insertion order to dictionary order
*
* We require this function because string values are buffered by indexes
* in their insertion order. Until the entire dictionary is complete can
* we get their sorted indexes in the dictionary in that ORC specification
* demands dictionary should be ordered. Therefore this function transforms
* the indexes from insertion order to dictionary value order for final
* output.
*/
void OldSortedStringDictionary::reorder(std::vector<int64_t> & idxBuffer) const
{
// iterate the dictionary to get mapping from insertion order to value order
std::vector<size_t> mapping(dict.size());
size_t dictIdx = 0;
for (auto it = dict.cbegin(); it != dict.cend(); ++it)
{
mapping[it->second] = dictIdx++;
}
// do the transformation
for (size_t i = 0; i != idxBuffer.size(); ++i)
{
idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
}
}
// get dict entries in insertion order
void OldSortedStringDictionary::getEntriesInInsertionOrder(std::vector<const DictEntry *> & entries) const
{
entries.resize(dict.size());
for (auto it = dict.cbegin(); it != dict.cend(); ++it)
{
entries[it->second] = &(it->first);
}
}
// return count of entries
size_t OldSortedStringDictionary::size() const
{
return dict.size();
}
// return total length of strings in the dictionary
uint64_t OldSortedStringDictionary::length() const
{
return totalLength;
}
void OldSortedStringDictionary::clear()
{
totalLength = 0;
data.clear();
dict.clear();
}
/**
* Implementation of increasing sorted string dictionary
*/
class NewSortedStringDictionary
{
public:
struct DictEntry
{
DictEntry(const char * str, size_t len) : data(str), length(len) { }
const char * data;
size_t length;
};
struct DictEntryWithIndex
{
DictEntryWithIndex(const char * str, size_t len, size_t index_) : entry(str, len), index(index_) { }
DictEntry entry;
size_t index;
};
NewSortedStringDictionary() : totalLength_(0) { }
// insert a new string into dictionary, return its insertion order
size_t insert(const char * str, size_t len);
// reorder input index buffer from insertion order to dictionary order
void reorder(std::vector<int64_t> & idxBuffer) const;
// get dict entries in insertion order
void getEntriesInInsertionOrder(std::vector<const DictEntry *> &) const;
// return count of entries
size_t size() const;
// return total length of strings in the dictionary
uint64_t length() const;
void clear();
// store indexes of insertion order in the dictionary for not-null rows
std::vector<int64_t> idxInDictBuffer;
private:
struct LessThan
{
bool operator()(const DictEntryWithIndex & l, const DictEntryWithIndex & r)
{
const auto & left = l.entry;
const auto & right = r.entry;
int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
if (ret != 0)
{
return ret < 0;
}
return left.length < right.length;
}
};
mutable std::vector<DictEntryWithIndex> flatDict_;
std::unordered_map<std::string, size_t> keyToIndex;
uint64_t totalLength_;
};
// insert a new string into dictionary, return its insertion order
size_t NewSortedStringDictionary::insert(const char * str, size_t len)
{
size_t index = flatDict_.size();
auto ret = keyToIndex.emplace(std::string(str, len), index);
if (ret.second)
{
flatDict_.emplace_back(ret.first->first.data(), ret.first->first.size(), index);
totalLength_ += len;
}
return ret.first->second;
}
/**
* Reorder input index buffer from insertion order to dictionary order
*
* We require this function because string values are buffered by indexes
* in their insertion order. Until the entire dictionary is complete can
* we get their sorted indexes in the dictionary in that ORC specification
* demands dictionary should be ordered. Therefore this function transforms
* the indexes from insertion order to dictionary value order for final
* output.
*/
void NewSortedStringDictionary::reorder(std::vector<int64_t> & idxBuffer) const
{
// iterate the dictionary to get mapping from insertion order to value order
std::vector<size_t> mapping(flatDict_.size());
for (size_t i = 0; i < flatDict_.size(); ++i)
{
mapping[flatDict_[i].index] = i;
}
// do the transformation
for (size_t i = 0; i != idxBuffer.size(); ++i)
{
idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
}
}
// get dict entries in insertion order
void NewSortedStringDictionary::getEntriesInInsertionOrder(std::vector<const DictEntry *> & entries) const
{
std::sort(
flatDict_.begin(),
flatDict_.end(),
[](const DictEntryWithIndex & left, const DictEntryWithIndex & right) { return left.index < right.index; });
entries.resize(flatDict_.size());
for (size_t i = 0; i < flatDict_.size(); ++i)
{
entries[i] = &(flatDict_[i].entry);
}
}
// return count of entries
size_t NewSortedStringDictionary::size() const
{
return flatDict_.size();
}
// return total length of strings in the dictionary
uint64_t NewSortedStringDictionary::length() const
{
return totalLength_;
}
void NewSortedStringDictionary::clear()
{
totalLength_ = 0;
keyToIndex.clear();
flatDict_.clear();
}
template <size_t cardinality>
static std::vector<std::string> mockStrings()
{
std::vector<std::string> res(1000000);
for (auto & s : res)
{
s = "test string dictionary " + std::to_string(rand() % cardinality);
}
return res;
}
template <typename DictionaryImpl>
static NO_INLINE std::unique_ptr<DictionaryImpl> createAndWriteStringDictionary(const std::vector<std::string> & strs)
{
auto dict = std::make_unique<DictionaryImpl>();
for (const auto & str : strs)
{
auto index = dict->insert(str.data(), str.size());
dict->idxInDictBuffer.push_back(index);
}
dict->reorder(dict->idxInDictBuffer);
return dict;
}
template <typename DictionaryImpl, size_t cardinality>
static void BM_writeStringDictionary(benchmark::State & state)
{
auto strs = mockStrings<cardinality>();
for (auto _ : state)
{
auto dict = createAndWriteStringDictionary<DictionaryImpl>(strs);
benchmark::DoNotOptimize(dict);
}
}
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 1000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 1000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100000);

View File

@ -28,6 +28,16 @@
#include <Common/getMultipleKeysFromConfig.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#if USE_SSL
# include <Server/CertificateReloader.h>
# include <openssl/ssl.h>
# include <Poco/Crypto/EVPPKey.h>
# include <Poco/Net/Context.h>
# include <Poco/Net/SSLManager.h>
# include <Poco/Net/Utility.h>
# include <Poco/StringTokenizer.h>
#endif
#include <chrono>
#include <mutex>
#include <string>
@ -48,6 +58,7 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED;
extern const int LOGICAL_ERROR;
extern const int INVALID_CONFIG_PARAMETER;
extern const int BAD_ARGUMENTS;
}
using namespace std::chrono_literals;
@ -56,6 +67,16 @@ namespace
{
#if USE_SSL
int callSetCertificate(SSL * ssl, void * arg)
{
if (!arg)
return -1;
const CertificateReloader::Data * data = reinterpret_cast<CertificateReloader::Data *>(arg);
return setCertificateCallback(ssl, data, getLogger("SSLContext"));
}
void setSSLParams(nuraft::asio_service::options & asio_opts)
{
const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
@ -69,18 +90,55 @@ void setSSLParams(nuraft::asio_service::options & asio_opts)
if (!config.has(private_key_file_property))
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Server private key file is not set.");
asio_opts.enable_ssl_ = true;
asio_opts.server_cert_file_ = config.getString(certificate_file_property);
asio_opts.server_key_file_ = config.getString(private_key_file_property);
Poco::Net::Context::Params params;
params.certificateFile = config.getString(certificate_file_property);
if (params.certificateFile.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Server certificate file in config '{}' is empty", certificate_file_property);
params.privateKeyFile = config.getString(private_key_file_property);
if (params.privateKeyFile.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Server key file in config '{}' is empty", private_key_file_property);
auto pass_phrase = config.getString("openSSL.server.privateKeyPassphraseHandler.options.password", "");
auto certificate_data = std::make_shared<CertificateReloader::Data>(params.certificateFile, params.privateKeyFile, pass_phrase);
if (config.has(root_ca_file_property))
asio_opts.root_cert_file_ = config.getString(root_ca_file_property);
params.caLocation = config.getString(root_ca_file_property);
if (config.getBool("openSSL.server.loadDefaultCAFile", false))
asio_opts.load_default_ca_file_ = true;
params.loadDefaultCAs = config.getBool("openSSL.server.loadDefaultCAFile", false);
params.verificationMode = Poco::Net::Utility::convertVerificationMode(config.getString("openSSL.server.verificationMode", "none"));
if (config.getString("openSSL.server.verificationMode", "none") == "none")
asio_opts.skip_verification_ = true;
std::string disabled_protocols_list = config.getString("openSSL.server.disableProtocols", "");
Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
int disabled_protocols = 0;
for (const auto & token : dp_tok)
{
if (token == "sslv2")
disabled_protocols |= Poco::Net::Context::PROTO_SSLV2;
else if (token == "sslv3")
disabled_protocols |= Poco::Net::Context::PROTO_SSLV3;
else if (token == "tlsv1")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1;
else if (token == "tlsv1_1")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_1;
else if (token == "tlsv1_2")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_2;
}
asio_opts.ssl_context_provider_server_ = [params, certificate_data, disabled_protocols]
{
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_SERVER_USE, params);
context.disableProtocols(disabled_protocols);
SSL_CTX * ssl_ctx = context.takeSslContext();
SSL_CTX_set_cert_cb(ssl_ctx, callSetCertificate, reinterpret_cast<void *>(certificate_data.get()));
return ssl_ctx;
};
asio_opts.ssl_context_provider_client_ = [ctx_params = std::move(params)]
{
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_CLIENT_USE, ctx_params);
return context.takeSslContext();
};
}
#endif

View File

@ -1750,7 +1750,8 @@ class ToDeleteTreeCollector
uint32_t nodes_observed = 1; /// root node
std::list<KeeperStorageBase::Delta> deltas;
using UncommittedChildren = std::unordered_set<std::string_view, StringHashForHeterogeneousLookup, StringHashForHeterogeneousLookup::transparent_key_equal>;
using UncommittedChildren
= std::unordered_set<std::string_view, StringHashForHeterogeneousLookup, StringHashForHeterogeneousLookup::transparent_key_equal>;
public:
enum class CollectStatus

View File

@ -573,13 +573,15 @@ public:
struct PathCmp
{
using is_transparent = std::true_type;
auto operator()(const std::string_view a,
const std::string_view b) const
{
return a.size() < b.size() || (a.size() == b.size() && a < b);
size_t level_a = std::count(a.begin(), a.end(), '/');
size_t level_b = std::count(b.begin(), b.end(), '/');
return level_a < level_b || (level_a == level_b && a < b);
}
using is_transparent = void; // required to make find() work with different type than key_type
};
Ephemerals ephemerals;

View File

@ -3739,6 +3739,72 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveInMultiRequest)
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D"));
}
{
SCOPED_TRACE("Recursive Remove For Subtree With Updated Node");
int create_zxid = ++zxid;
auto ops = prepare_create_tree();
/// First create nodes
const auto create_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(create_request, 1, 0, create_zxid);
auto create_responses = storage.processRequest(create_request, 1, create_zxid);
ASSERT_EQ(create_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(create_responses[0].response));
/// Small limit
int remove_zxid = ++zxid;
ops = {
zkutil::makeSetRequest("/A/B", "", -1),
zkutil::makeRemoveRecursiveRequest("/A", 3),
};
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
auto remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_FALSE(is_multi_ok(remove_responses[0].response));
/// Big limit
remove_zxid = ++zxid;
ops[1] = zkutil::makeRemoveRecursiveRequest("/A", 4);
remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/C"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D"));
}
{
SCOPED_TRACE("[BUG] Recursive Remove Level Sorting");
int new_zxid = ++zxid;
Coordination::Requests ops = {
zkutil::makeCreateRequest("/a", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/a/bbbbbb", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A/B", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A/CCCCCCCCCCCC", "", zkutil::CreateMode::Persistent),
zkutil::makeRemoveRecursiveRequest("/A", 3),
};
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
auto remove_responses = storage.processRequest(remove_request, 1, new_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
ASSERT_TRUE(exists("/a"));
ASSERT_TRUE(exists("/a/bbbbbb"));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/CCCCCCCCCCCC"));
}
}
TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)

View File

@ -33,7 +33,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54
static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1;
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
static constexpr auto DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 4;
static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453;
static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1;
@ -86,6 +87,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469;
/// Packets size header
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
static constexpr auto DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL = 54471;
/// Version of ClickHouse TCP protocol.
///
/// Should be incremented manually on protocol changes.
@ -93,6 +96,6 @@ static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases).
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54470;
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54471;
}

View File

@ -946,7 +946,7 @@ class IColumn;
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
M(UInt64, parallel_replicas_mark_segment_size, 0, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing. Value should be in range [128; 16384]", 0) \
M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as '<archive> :: <file>' if archive has correct extension", 0) \
M(Bool, parallel_replicas_local_plan, false, "Build local plan for local replica", 0) \
\
@ -1271,6 +1271,7 @@ class IColumn;
M(Bool, output_format_orc_string_as_string, true, "Use ORC String type instead of Binary for String columns", 0) \
M(ORCCompression, output_format_orc_compression_method, "zstd", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \
M(Double, output_format_orc_dictionary_key_size_threshold, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled", 0) \
\
M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
\

View File

@ -71,6 +71,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.9",
{
{"output_format_orc_dictionary_key_size_threshold", 0.0, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled"},
{"input_format_json_empty_as_default", false, false, "Added new setting to allow to treat empty fields in JSON input as default values."},
{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
@ -78,6 +79,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
{"output_format_always_quote_identifiers", false, false, "New setting."},
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
{"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"},
{"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"},
{"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"},
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},

View File

@ -59,35 +59,27 @@ void cckMetadataPathForOrdinary(const ASTCreateQuery & create, const String & me
}
/// validate validates the database engine that's specified in the create query for
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query)
void DatabaseFactory::validate(const ASTCreateQuery & create_query) const
{
auto * storage = create_query.storage;
/// Check engine may have arguments
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
const String & engine_name = storage->engine->name;
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
const EngineFeatures & engine_features = database_engines.at(engine_name).features;
if (storage->engine->arguments && !engine_may_have_arguments)
/// Check engine may have arguments
if (storage->engine->arguments && !engine_features.supports_arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name);
/// Check engine may have settings
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
bool has_unexpected_element = storage->engine->parameters || storage->partition_by ||
storage->primary_key || storage->order_by ||
storage->sample_by;
if (has_unexpected_element || (!may_have_settings && storage->settings))
if (has_unexpected_element || (!engine_features.supports_settings && storage->settings))
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST,
"Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name);
/// Check engine with table overrides
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
if (create_query.table_overrides && !engines_with_table_overrides.contains(engine_name))
if (create_query.table_overrides && !engine_features.supports_table_overrides)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name);
}
@ -121,9 +113,9 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
return impl;
}
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn)
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features)
{
if (!database_engines.emplace(name, std::move(creator_fn)).second)
if (!database_engines.emplace(name, Creator{std::move(creator_fn), features}).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "DatabaseFactory: the database engine name '{}' is not unique", name);
}
@ -154,7 +146,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
.context = context};
// creator_fn creates and returns a DatabasePtr with the supplied arguments
auto creator_fn = database_engines.at(engine_name);
auto creator_fn = database_engines.at(engine_name).creator_fn;
return creator_fn(arguments);
}

View File

@ -43,13 +43,30 @@ public:
ContextPtr & context;
};
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
struct EngineFeatures
{
bool supports_arguments = false;
bool supports_settings = false;
bool supports_table_overrides = false;
};
using CreatorFn = std::function<DatabasePtr(const Arguments & arguments)>;
using DatabaseEngines = std::unordered_map<std::string, CreatorFn>;
struct Creator
{
CreatorFn creator_fn;
EngineFeatures features;
};
void registerDatabase(const std::string & name, CreatorFn creator_fn);
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
using DatabaseEngines = std::unordered_map<std::string, Creator>;
void registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features = EngineFeatures{
.supports_arguments = false,
.supports_settings = false,
.supports_table_overrides = false,
});
const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
@ -65,6 +82,10 @@ private:
DatabaseEngines database_engines;
DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
/// validate validates the database engine that's specified in the create query for
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query) const;
};
}

View File

@ -257,6 +257,6 @@ void registerDatabaseFilesystem(DatabaseFactory & factory)
return std::make_shared<DatabaseFilesystem>(args.database_name, init_path, args.context);
};
factory.registerDatabase("Filesystem", create_fn);
factory.registerDatabase("Filesystem", create_fn, {.supports_arguments = true});
}
}

View File

@ -253,7 +253,7 @@ void registerDatabaseHDFS(DatabaseFactory & factory)
return std::make_shared<DatabaseHDFS>(args.database_name, source_url, args.context);
};
factory.registerDatabase("HDFS", create_fn);
factory.registerDatabase("HDFS", create_fn, {.supports_arguments = true});
}
} // DB

View File

@ -398,6 +398,6 @@ void registerDatabaseLazy(DatabaseFactory & factory)
cache_expiration_time_seconds,
args.context);
};
factory.registerDatabase("Lazy", create_fn);
factory.registerDatabase("Lazy", create_fn, {.supports_arguments = true});
}
}

View File

@ -2001,6 +2001,6 @@ void registerDatabaseReplicated(DatabaseFactory & factory)
replica_name,
std::move(database_replicated_settings), args.context);
};
factory.registerDatabase("Replicated", create_fn);
factory.registerDatabase("Replicated", create_fn, {.supports_arguments = true, .supports_settings = true});
}
}

View File

@ -326,7 +326,7 @@ void registerDatabaseS3(DatabaseFactory & factory)
return std::make_shared<DatabaseS3>(args.database_name, config, args.context);
};
factory.registerDatabase("S3", create_fn);
factory.registerDatabase("S3", create_fn, {.supports_arguments = true});
}
}
#endif

View File

@ -290,8 +290,14 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
binlog_client,
std::move(materialize_mode_settings));
};
factory.registerDatabase("MaterializeMySQL", create_fn);
factory.registerDatabase("MaterializedMySQL", create_fn);
DatabaseFactory::EngineFeatures features{
.supports_arguments = true,
.supports_settings = true,
.supports_table_overrides = true,
};
factory.registerDatabase("MaterializeMySQL", create_fn, features);
factory.registerDatabase("MaterializedMySQL", create_fn, features);
}
}

View File

@ -584,7 +584,7 @@ void registerDatabaseMySQL(DatabaseFactory & factory)
throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message);
}
};
factory.registerDatabase("MySQL", create_fn);
factory.registerDatabase("MySQL", create_fn, {.supports_arguments = true, .supports_settings = true});
}
}

View File

@ -546,7 +546,11 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory)
args.database_name, configuration.database, connection_info,
std::move(postgresql_replica_settings));
};
factory.registerDatabase("MaterializedPostgreSQL", create_fn);
factory.registerDatabase("MaterializedPostgreSQL", create_fn, {
.supports_arguments = true,
.supports_settings = true,
.supports_table_overrides = true,
});
}
}

View File

@ -558,7 +558,7 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory)
pool,
use_table_cache);
};
factory.registerDatabase("PostgreSQL", create_fn);
factory.registerDatabase("PostgreSQL", create_fn, {.supports_arguments = true});
}
}

View File

@ -13,6 +13,7 @@
#include <DataTypes/DataTypeDateTime64.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/algorithm/string/join.hpp>
#include <Common/quoteString.h>
#include <Core/PostgreSQL/Utils.h>
#include <base/FnTraits.h>
@ -292,7 +293,7 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
template<typename T>
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
T & tx, const String & postgres_table, const String & postgres_schema, bool use_nulls, bool with_primary_key, bool with_replica_identity_index)
T & tx, const String & postgres_table, const String & postgres_schema, bool use_nulls, bool with_primary_key, bool with_replica_identity_index, const Strings & columns)
{
PostgreSQLTableStructure table;
@ -302,6 +303,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
? " AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public')"
: fmt::format(" AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = {})", quoteString(postgres_schema));
std::string columns_part;
if (!columns.empty())
columns_part = fmt::format(" AND attname IN ('{}')", boost::algorithm::join(columns, "','"));
std::string query = fmt::format(
"SELECT attname AS name, " /// column name
"format_type(atttypid, atttypmod) AS type, " /// data type
@ -312,9 +317,9 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
"attnum as att_num, "
"attgenerated as generated " /// if column has GENERATED
"FROM pg_attribute "
"WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
"WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) {}"
"AND NOT attisdropped AND attnum > 0 "
"ORDER BY attnum ASC", where);
"ORDER BY attnum ASC", where, columns_part);
auto postgres_table_with_schema = postgres_schema.empty() ? postgres_table : doubleQuoteString(postgres_schema) + '.' + doubleQuoteString(postgres_table);
table.physical_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, false);
@ -415,7 +420,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connection, const String & postgres_table, const String & postgres_schema, bool use_nulls)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTableStructure(tx, postgres_table, postgres_schema, use_nulls, false, false);
auto result = fetchPostgreSQLTableStructure(tx, postgres_table, postgres_schema, use_nulls, false, false, {});
tx.commit();
return result;
}
@ -433,17 +438,17 @@ std::set<String> fetchPostgreSQLTablesList(pqxx::connection & connection, const
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::ReadTransaction & tx, const String & postgres_table, const String & postgres_schema,
bool use_nulls, bool with_primary_key, bool with_replica_identity_index);
bool use_nulls, bool with_primary_key, bool with_replica_identity_index, const Strings & columns);
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::ReplicationTransaction & tx, const String & postgres_table, const String & postgres_schema,
bool use_nulls, bool with_primary_key, bool with_replica_identity_index);
bool use_nulls, bool with_primary_key, bool with_replica_identity_index, const Strings & columns);
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::nontransaction & tx, const String & postgres_table, const String & postrges_schema,
bool use_nulls, bool with_primary_key, bool with_replica_identity_index);
bool use_nulls, bool with_primary_key, bool with_replica_identity_index, const Strings & columns);
std::set<String> fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema);

View File

@ -48,7 +48,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
template<typename T>
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
T & tx, const String & postgres_table, const String & postgres_schema, bool use_nulls = true,
bool with_primary_key = false, bool with_replica_identity_index = false);
bool with_primary_key = false, bool with_replica_identity_index = false, const Strings & columns = {});
template<typename T>
std::set<String> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema);

View File

@ -220,7 +220,7 @@ void registerDatabaseSQLite(DatabaseFactory & factory)
return std::make_shared<DatabaseSQLite>(args.context, engine_define, args.create_query.attach, database_path);
};
factory.registerDatabase("SQLite", create_fn);
factory.registerDatabase("SQLite", create_fn, {.supports_arguments = true});
}
}

View File

@ -350,6 +350,12 @@ public:
return delegate;
}
UInt32 getRefCount(const String & path) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->getRefCount(wrapped_path);
}
#if USE_AWS_S3
std::shared_ptr<const S3::Client> getS3StorageClient() const override
{

View File

@ -28,6 +28,7 @@ extern const Event CachedReadBufferReadFromCacheMicroseconds;
extern const Event CachedReadBufferCacheWriteMicroseconds;
extern const Event CachedReadBufferReadFromSourceBytes;
extern const Event CachedReadBufferReadFromCacheBytes;
extern const Event CachedReadBufferPredownloadedBytes;
extern const Event CachedReadBufferCacheWriteBytes;
extern const Event CachedReadBufferCreateBufferMicroseconds;
@ -644,6 +645,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload);
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
ProfileEvents::increment(ProfileEvents::CachedReadBufferPredownloadedBytes, current_impl_buffer_size);
std::string failure_reason;
bool continue_predownload = file_segment.reserve(

View File

@ -244,6 +244,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride;
format_settings.orc.output_dictionary_key_size_threshold = settings.output_format_orc_dictionary_key_size_threshold;
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down;
format_settings.orc.reader_time_zone_name = settings.input_format_orc_reader_time_zone_name;

View File

@ -415,6 +415,7 @@ struct FormatSettings
bool filter_push_down = true;
UInt64 output_row_index_stride = 10'000;
String reader_time_zone_name = "GMT";
double output_dictionary_key_size_threshold = 0.0;
} orc{};
/// For capnProto format we should determine how to

View File

@ -27,7 +27,8 @@ class FunctionStringOrArrayToT : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr)
static FunctionPtr create(ContextPtr) { return createImpl(); }
static FunctionPtr createImpl()
{
return std::make_shared<FunctionStringOrArrayToT>();
}

View File

@ -1,4 +1,4 @@
#include <Functions/IFunction.h>
#include <Functions/array/arrayResize.h>
#include <Functions/FunctionFactory.h>
#include <Functions/GatherUtils/GatherUtils.h>
#include <DataTypes/DataTypeArray.h>
@ -21,117 +21,99 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
class FunctionArrayResize : public IFunction
DataTypePtr FunctionArrayResize::getReturnTypeImpl(const DataTypes & arguments) const
{
public:
static constexpr auto name = "arrayResize";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayResize>(); }
const size_t number_of_arguments = arguments.size();
String getName() const override { return name; }
if (number_of_arguments < 2 || number_of_arguments > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
getName(), number_of_arguments);
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
if (arguments[0]->onlyNull())
return arguments[0];
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be an array but it has type {}.",
getName(), arguments[0]->getName());
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
if (WhichDataType(array_type->getNestedType()).isNothing())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} cannot resize {}", getName(), array_type->getName());
if (!isInteger(removeNullable(arguments[1])) && !arguments[1]->onlyNull())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument {} for function {} must be integer but it has type {}.",
toString(1), getName(), arguments[1]->getName());
if (number_of_arguments == 2)
return arguments[0];
else /* if (number_of_arguments == 3) */
return std::make_shared<DataTypeArray>(getLeastSupertype(DataTypes{array_type->getNestedType(), arguments[2]}));
}
ColumnPtr FunctionArrayResize::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t input_rows_count) const
{
if (return_type->onlyNull())
return return_type->createColumnConstWithDefaultValue(input_rows_count);
auto result_column = return_type->createColumn();
auto array_column = arguments[0].column;
auto size_column = arguments[1].column;
if (!arguments[0].type->equals(*return_type))
array_column = castColumn(arguments[0], return_type);
const DataTypePtr & return_nested_type = typeid_cast<const DataTypeArray &>(*return_type).getNestedType();
size_t size = array_column->size();
ColumnPtr appended_column;
if (arguments.size() == 3)
{
const size_t number_of_arguments = arguments.size();
appended_column = arguments[2].column;
if (!arguments[2].type->equals(*return_nested_type))
appended_column = castColumn(arguments[2], return_nested_type);
}
else
appended_column = return_nested_type->createColumnConstWithDefaultValue(size);
if (number_of_arguments < 2 || number_of_arguments > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
getName(), number_of_arguments);
std::unique_ptr<GatherUtils::IArraySource> array_source;
std::unique_ptr<GatherUtils::IValueSource> value_source;
if (arguments[0]->onlyNull())
return arguments[0];
bool is_const = false;
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be an array but it has type {}.",
getName(), arguments[0]->getName());
if (WhichDataType(array_type->getNestedType()).isNothing())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} cannot resize {}", getName(), array_type->getName());
if (!isInteger(removeNullable(arguments[1])) && !arguments[1]->onlyNull())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument {} for function {} must be integer but it has type {}.",
toString(1), getName(), arguments[1]->getName());
if (number_of_arguments == 2)
return arguments[0];
else /* if (number_of_arguments == 3) */
return std::make_shared<DataTypeArray>(getLeastSupertype(DataTypes{array_type->getNestedType(), arguments[2]}));
if (const auto * const_array_column = typeid_cast<const ColumnConst *>(array_column.get()))
{
is_const = true;
array_column = const_array_column->getDataColumnPtr();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t input_rows_count) const override
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(array_column.get()))
array_source = GatherUtils::createArraySource(*argument_column_array, is_const, size);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "First arguments for function {} must be array.", getName());
bool is_appended_const = false;
if (const auto * const_appended_column = typeid_cast<const ColumnConst *>(appended_column.get()))
{
if (return_type->onlyNull())
return return_type->createColumnConstWithDefaultValue(input_rows_count);
auto result_column = return_type->createColumn();
auto array_column = arguments[0].column;
auto size_column = arguments[1].column;
if (!arguments[0].type->equals(*return_type))
array_column = castColumn(arguments[0], return_type);
const DataTypePtr & return_nested_type = typeid_cast<const DataTypeArray &>(*return_type).getNestedType();
size_t size = array_column->size();
ColumnPtr appended_column;
if (arguments.size() == 3)
{
appended_column = arguments[2].column;
if (!arguments[2].type->equals(*return_nested_type))
appended_column = castColumn(arguments[2], return_nested_type);
}
else
appended_column = return_nested_type->createColumnConstWithDefaultValue(size);
std::unique_ptr<GatherUtils::IArraySource> array_source;
std::unique_ptr<GatherUtils::IValueSource> value_source;
bool is_const = false;
if (const auto * const_array_column = typeid_cast<const ColumnConst *>(array_column.get()))
{
is_const = true;
array_column = const_array_column->getDataColumnPtr();
}
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(array_column.get()))
array_source = GatherUtils::createArraySource(*argument_column_array, is_const, size);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "First arguments for function {} must be array.", getName());
bool is_appended_const = false;
if (const auto * const_appended_column = typeid_cast<const ColumnConst *>(appended_column.get()))
{
is_appended_const = true;
appended_column = const_appended_column->getDataColumnPtr();
}
value_source = GatherUtils::createValueSource(*appended_column, is_appended_const, size);
auto sink = GatherUtils::createArraySink(typeid_cast<ColumnArray &>(*result_column), size);
if (isColumnConst(*size_column))
GatherUtils::resizeConstantSize(*array_source, *value_source, *sink, size_column->getInt(0));
else
GatherUtils::resizeDynamicSize(*array_source, *value_source, *sink, *size_column);
return result_column;
is_appended_const = true;
appended_column = const_appended_column->getDataColumnPtr();
}
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForNulls() const override { return false; }
};
value_source = GatherUtils::createValueSource(*appended_column, is_appended_const, size);
auto sink = GatherUtils::createArraySink(typeid_cast<ColumnArray &>(*result_column), size);
if (isColumnConst(*size_column))
GatherUtils::resizeConstantSize(*array_source, *value_source, *sink, size_column->getInt(0));
else
GatherUtils::resizeDynamicSize(*array_source, *value_source, *sink, *size_column);
return result_column;
}
REGISTER_FUNCTION(ArrayResize)
{

View File

@ -0,0 +1,28 @@
#pragma once
#include <Functions/IFunction.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
class FunctionArrayResize : public IFunction
{
public:
static constexpr auto name = "arrayResize";
static FunctionPtr createImpl() { return std::make_shared<FunctionArrayResize>(); }
static FunctionPtr create(ContextPtr) { return createImpl(); }
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t input_rows_count) const override;
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForNulls() const override { return false; }
};
}

View File

@ -15,7 +15,6 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_COLUMN;
}
@ -38,13 +37,6 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.empty())
throw Exception(
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Function {} needs at least one argument; passed {}.",
getName(),
arguments.size());
DataTypes arguments_types;
for (size_t index = 0; index < arguments.size(); ++index)
{
@ -68,9 +60,16 @@ public:
}
ColumnPtr
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
size_t num_arguments = arguments.size();
if (num_arguments == 0)
{
auto res_col = result_type->createColumn();
res_col->insertDefault();
return ColumnConst::create(std::move(res_col), input_rows_count);
}
Columns holders(num_arguments);
Columns tuple_columns(num_arguments);

View File

@ -1,4 +1,4 @@
#include <Functions/IFunction.h>
#include <Functions/array/emptyArrayToSingle.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeArray.h>
@ -20,35 +20,6 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value.
*/
class FunctionEmptyArrayToSingle : public IFunction
{
public:
static constexpr auto name = "emptyArrayToSingle";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionEmptyArrayToSingle>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be array.", getName());
return arguments[0];
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override;
};
namespace
{
namespace FunctionEmptyArrayToSingleImpl
@ -366,6 +337,14 @@ namespace
}
}
DataTypePtr FunctionEmptyArrayToSingle::getReturnTypeImpl(const DataTypes & arguments) const
{
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be array.", getName());
return arguments[0];
}
ColumnPtr FunctionEmptyArrayToSingle::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{

View File

@ -0,0 +1,29 @@
#pragma once
#include <Functions/IFunction.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
/** emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value.
*/
class FunctionEmptyArrayToSingle : public IFunction
{
public:
static constexpr auto name = "emptyArrayToSingle";
static FunctionPtr createImpl() { return std::make_shared<FunctionEmptyArrayToSingle>(); }
static FunctionPtr create(ContextPtr) { return createImpl(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override;
};
}

View File

@ -1,65 +1,7 @@
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>
#include <Functions/array/length.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** Calculates the length of a string in bytes.
*/
struct LengthImpl
{
static constexpr auto is_fixed_to_constant = true;
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = offsets[i] - 1 - offsets[i - 1];
}
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res, size_t)
{
res = n;
}
static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray<UInt64> & /*res*/, size_t)
{
}
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = offsets[i] - offsets[i - 1];
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument");
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument");
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument");
}
};
struct NameLength
{
static constexpr auto name = "length";
};
using FunctionLength = FunctionStringOrArrayToT<LengthImpl, NameLength, UInt64, false>;
REGISTER_FUNCTION(Length)
{

View File

@ -0,0 +1,66 @@
#pragma once
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** Calculates the length of a string in bytes.
*/
struct LengthImpl
{
static constexpr auto is_fixed_to_constant = true;
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = offsets[i] - 1 - offsets[i - 1];
}
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res, size_t)
{
res = n;
}
static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray<UInt64> & /*res*/, size_t)
{
}
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = offsets[i] - offsets[i - 1];
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument");
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument");
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument");
}
};
struct NameLength
{
static constexpr auto name = "length";
};
using FunctionLength = FunctionStringOrArrayToT<LengthImpl, NameLength, UInt64, false>;
}

View File

@ -2028,8 +2028,9 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
return {std::move(first_actions), std::move(second_actions), std::move(split_nodes_mapping)};
}
ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const Names & array_joined_columns) const
{
std::unordered_set<std::string_view> array_joined_columns_set(array_joined_columns.begin(), array_joined_columns.end());
struct Frame
{
const Node * node = nullptr;
@ -2072,7 +2073,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
if (cur.next_child_to_visit == cur.node->children.size())
{
bool depend_on_array_join = false;
if (cur.node->type == ActionType::INPUT && array_joined_columns.contains(cur.node->result_name))
if (cur.node->type == ActionType::INPUT && array_joined_columns_set.contains(cur.node->result_name))
depend_on_array_join = true;
for (const auto * child : cur.node->children)

View File

@ -340,7 +340,7 @@ public:
SplitResult split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping = false, bool avoid_duplicate_inputs = false) const;
/// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
SplitResult splitActionsBeforeArrayJoin(const Names & array_joined_columns) const;
/// Splits actions into two parts. First part has minimal size sufficient for calculation of column_name.
/// Outputs of initial actions must contain column_name.

View File

@ -0,0 +1,13 @@
#pragma once
#include <Core/Names.h>
namespace DB
{
struct ArrayJoin
{
Names columns;
bool is_left = false;
};
}

View File

@ -6,6 +6,9 @@
#include <Columns/ColumnMap.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/array/length.h>
#include <Functions/array/arrayResize.h>
#include <Functions/array/emptyArrayToSingle.h>
#include <Interpreters/Context.h>
#include <Interpreters/ArrayJoinAction.h>
@ -59,26 +62,31 @@ ColumnWithTypeAndName convertArrayJoinColumn(const ColumnWithTypeAndName & src_c
return array_col;
}
ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, ContextPtr context)
: columns(array_joined_columns_)
, is_left(array_join_is_left)
, is_unaligned(context->getSettingsRef().enable_unaligned_array_join)
, max_block_size(context->getSettingsRef().max_block_size)
ArrayJoinAction::ArrayJoinAction(const Names & columns_, bool is_left_, bool is_unaligned_, size_t max_block_size_)
: columns(columns_.begin(), columns_.end())
, is_left(is_left_)
, is_unaligned(is_unaligned_)
, max_block_size(max_block_size_)
{
if (columns.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "No arrays to join");
if (is_unaligned)
{
function_length = FunctionFactory::instance().get("length", context);
function_greatest = FunctionFactory::instance().get("greatest", context);
function_array_resize = FunctionFactory::instance().get("arrayResize", context);
function_length = std::make_unique<FunctionToOverloadResolverAdaptor>(FunctionLength::createImpl());
function_array_resize = std::make_unique<FunctionToOverloadResolverAdaptor>(FunctionArrayResize::createImpl());
}
else if (is_left)
function_builder = FunctionFactory::instance().get("emptyArrayToSingle", context);
function_builder = std::make_unique<FunctionToOverloadResolverAdaptor>(FunctionEmptyArrayToSingle::createImpl());
}
void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const
void ArrayJoinAction::prepare(const Names & columns, ColumnsWithTypeAndName & sample)
{
NameSet columns_set(columns.begin(), columns.end());
prepare(columns_set, sample);
}
void ArrayJoinAction::prepare(const NameSet & columns, ColumnsWithTypeAndName & sample)
{
for (auto & current : sample)
{
@ -103,6 +111,35 @@ ArrayJoinResultIteratorPtr ArrayJoinAction::execute(Block block)
return std::make_unique<ArrayJoinResultIterator>(this, std::move(block));
}
static void updateMaxLength(ColumnUInt64 & max_length, UInt64 length)
{
for (auto & value : max_length.getData())
value = std::max(value, length);
}
static void updateMaxLength(ColumnUInt64 & max_length, const IColumn & length)
{
if (const auto * length_const = typeid_cast<const ColumnConst *>(&length))
{
updateMaxLength(max_length, length_const->getUInt(0));
return;
}
const auto * length_uint64 = typeid_cast<const ColumnUInt64 *>(&length);
if (!length_uint64)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected UInt64 for array length, got {}", length.getName());
auto & max_lenght_data = max_length.getData();
const auto & length_data = length_uint64->getData();
size_t num_rows = max_lenght_data.size();
if (num_rows != length_data.size())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Different columns sizes in ARRAY JOIN: {} and {}", num_rows, length_data.size());
for (size_t row = 0; row < num_rows; ++row)
max_lenght_data[row] = std::max(max_lenght_data[row], length_data[row]);
}
ArrayJoinResultIterator::ArrayJoinResultIterator(const ArrayJoinAction * array_join_, Block block_)
: array_join(array_join_), block(std::move(block_)), total_rows(block.rows()), current_row(0)
@ -111,7 +148,6 @@ ArrayJoinResultIterator::ArrayJoinResultIterator(const ArrayJoinAction * array_j
bool is_unaligned = array_join->is_unaligned;
bool is_left = array_join->is_left;
const auto & function_length = array_join->function_length;
const auto & function_greatest = array_join->function_greatest;
const auto & function_array_resize = array_join->function_array_resize;
const auto & function_builder = array_join->function_builder;
@ -125,11 +161,7 @@ ArrayJoinResultIterator::ArrayJoinResultIterator(const ArrayJoinAction * array_j
/// Resize all array joined columns to the longest one, (at least 1 if LEFT ARRAY JOIN), padded with default values.
auto rows = block.rows();
auto uint64 = std::make_shared<DataTypeUInt64>();
ColumnWithTypeAndName column_of_max_length{{}, uint64, {}};
if (is_left)
column_of_max_length = ColumnWithTypeAndName(uint64->createColumnConst(rows, 1u), uint64, {});
else
column_of_max_length = ColumnWithTypeAndName(uint64->createColumnConst(rows, 0u), uint64, {});
auto max_length = ColumnUInt64::create(rows, (is_left ? 1u : 0u));
for (const auto & name : columns)
{
@ -138,11 +170,10 @@ ArrayJoinResultIterator::ArrayJoinResultIterator(const ArrayJoinAction * array_j
ColumnWithTypeAndName array_col = convertArrayJoinColumn(src_col);
ColumnsWithTypeAndName tmp_block{array_col}; //, {{}, uint64, {}}};
auto len_col = function_length->build(tmp_block)->execute(tmp_block, uint64, rows);
ColumnsWithTypeAndName tmp_block2{column_of_max_length, {len_col, uint64, {}}};
column_of_max_length.column = function_greatest->build(tmp_block2)->execute(tmp_block2, uint64, rows);
updateMaxLength(*max_length, *len_col);
}
ColumnWithTypeAndName column_of_max_length{std::move(max_length), uint64, {}};
for (const auto & name : columns)
{
auto & src_col = block.getByName(name);

View File

@ -33,14 +33,14 @@ public:
/// For unaligned [LEFT] ARRAY JOIN
FunctionOverloadResolverPtr function_length;
FunctionOverloadResolverPtr function_greatest;
FunctionOverloadResolverPtr function_array_resize;
/// For LEFT ARRAY JOIN.
FunctionOverloadResolverPtr function_builder;
ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, ContextPtr context);
void prepare(ColumnsWithTypeAndName & sample) const;
ArrayJoinAction(const Names & columns_, bool is_left_, bool is_unaligned_, size_t max_block_size_);
static void prepare(const NameSet & columns, ColumnsWithTypeAndName & sample);
static void prepare(const Names & columns, ColumnsWithTypeAndName & sample);
ArrayJoinResultIteratorPtr execute(Block block);
};

View File

@ -718,7 +718,12 @@ FileCache::getOrSet(
}
}
chassert(file_segments_limit ? file_segments.back()->range().left <= result_range.right : file_segments.back()->range().contains(result_range.right));
chassert(file_segments_limit
? file_segments.back()->range().left <= result_range.right
: file_segments.back()->range().contains(result_range.right),
fmt::format("Unexpected state. Back: {}, result range: {}, limit: {}",
file_segments.back()->range().toString(), result_range.toString(), file_segments_limit));
chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));

View File

@ -532,7 +532,7 @@ void executeQueryWithParallelReplicas(
max_replicas_to_use = shard.getAllNodeCount();
}
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(max_replicas_to_use, settings.parallel_replicas_mark_segment_size);
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(max_replicas_to_use);
auto external_tables = new_context->getExternalTables();

View File

@ -1059,16 +1059,16 @@ std::string ExpressionActionsChain::dumpChain() const
return ss.str();
}
ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_)
ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(const Names & array_join_columns_, ColumnsWithTypeAndName required_columns_)
: Step({})
, array_join(std::move(array_join_))
, array_join_columns(array_join_columns_.begin(), array_join_columns_.end())
, result_columns(std::move(required_columns_))
{
for (auto & column : result_columns)
{
required_columns.emplace_back(NameAndTypePair(column.name, column.type));
if (array_join->columns.contains(column.name))
if (array_join_columns.contains(column.name))
{
const auto & array = getArrayJoinDataType(column.type);
column.type = array->getNestedType();
@ -1085,12 +1085,12 @@ void ExpressionActionsChain::ArrayJoinStep::finalize(const NameSet & required_ou
for (const auto & column : result_columns)
{
if (array_join->columns.contains(column.name) || required_output_.contains(column.name))
if (array_join_columns.contains(column.name) || required_output_.contains(column.name))
new_result_columns.emplace_back(column);
}
for (const auto & column : required_columns)
{
if (array_join->columns.contains(column.name) || required_output_.contains(column.name))
if (array_join_columns.contains(column.name) || required_output_.contains(column.name))
new_required_columns.emplace_back(column);
}

View File

@ -3,6 +3,7 @@
#include <Core/Block.h>
#include <Core/ColumnNumbers.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ArrayJoin.h>
#include <Interpreters/ExpressionActionsSettings.h>
#include <variant>
@ -22,9 +23,6 @@ class TableJoin;
class IJoin;
using JoinPtr = std::shared_ptr<IJoin>;
class ArrayJoinAction;
using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
@ -223,11 +221,11 @@ struct ExpressionActionsChain : WithContext
struct ArrayJoinStep : public Step
{
ArrayJoinActionPtr array_join;
const NameSet array_join_columns;
NamesAndTypesList required_columns;
ColumnsWithTypeAndName result_columns;
ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_);
ArrayJoinStep(const Names & array_join_columns_, ColumnsWithTypeAndName required_columns_);
NamesAndTypesList getRequiredColumns() const override { return required_columns; }
ColumnsWithTypeAndName getResultColumns() const override { return result_columns; }

View File

@ -215,7 +215,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAG & acti
auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left);
auto sample_columns = actions.getResultColumns();
array_join->prepare(sample_columns);
ArrayJoinAction::prepare(array_join.columns, sample_columns);
actions = ActionsDAG(sample_columns);
NamesAndTypesList new_columns_after_array_join;
@ -889,9 +889,11 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons
}
/// "Big" ARRAY JOIN.
ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAG & actions, bool array_join_is_left) const
ArrayJoin ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAG & actions, bool array_join_is_left) const
{
NameSet result_columns;
Names result_columns;
result_columns.reserve(syntax->array_join_result_to_source.size());
for (const auto & result_source : syntax->array_join_result_to_source)
{
/// Assign new names to columns, if needed.
@ -902,19 +904,19 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAG & a
}
/// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
result_columns.insert(result_source.first);
result_columns.push_back(result_source.first);
}
return std::make_shared<ArrayJoinAction>(result_columns, array_join_is_left, getContext());
return {std::move(result_columns), array_join_is_left};
}
ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types)
std::optional<ArrayJoin> SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types)
{
const auto * select_query = getSelectQuery();
auto [array_join_expression_list, is_array_join_left] = select_query->arrayJoinExpressionList();
if (!array_join_expression_list)
return nullptr;
return {};
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
@ -923,7 +925,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
auto array_join = addMultipleArrayJoinAction(step.actions()->dag, is_array_join_left);
before_array_join = chain.getLastActions();
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(array_join, step.getResultColumns()));
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(array_join.columns, step.getResultColumns()));
chain.addStep();

View File

@ -174,7 +174,7 @@ protected:
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain);
ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAG & actions, bool is_left) const;
ArrayJoin addMultipleArrayJoinAction(ActionsDAG & actions, bool is_left) const;
void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false);
@ -234,7 +234,7 @@ struct ExpressionAnalysisResult
bool use_grouping_set_key = false;
ActionsAndProjectInputsFlagPtr before_array_join;
ArrayJoinActionPtr array_join;
std::optional<ArrayJoin> array_join;
ActionsAndProjectInputsFlagPtr before_join;
ActionsAndProjectInputsFlagPtr converting_join_columns;
JoinPtr join;
@ -388,7 +388,7 @@ private:
*/
/// Before aggregation:
ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types);
std::optional<ArrayJoin> appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types);
bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & converting_join_columns);

View File

@ -380,100 +380,99 @@ BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query)
BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector<UUID> & uuids_to_wait)
{
if (query.kind != ASTDropQuery::Kind::Detach && query.kind != ASTDropQuery::Kind::Drop && query.kind != ASTDropQuery::Kind::Truncate)
return {};
const auto & database_name = query.getDatabase();
auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, "");
database = tryGetDatabase(database_name, query.if_exists);
if (database)
if (!database)
return {};
bool drop = query.kind == ASTDropQuery::Kind::Drop;
bool truncate = query.kind == ASTDropQuery::Kind::Truncate;
getContext()->checkAccess(AccessType::DROP_DATABASE, database_name);
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases");
if (query.if_empty)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
if (!truncate && database->hasReplicationThread())
database->stopReplication();
if (database->shouldBeEmptyOnDetach())
{
if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop
|| query.kind == ASTDropQuery::Kind::Truncate)
/// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish.
/// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below.
auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name);
ASTDropQuery query_for_table;
query_for_table.kind = query.kind;
// For truncate operation on database, drop the tables
if (truncate)
query_for_table.kind = query.has_all_tables ? ASTDropQuery::Kind::Truncate : ASTDropQuery::Kind::Drop;
query_for_table.if_exists = true;
query_for_table.if_empty = false;
query_for_table.setDatabase(database_name);
query_for_table.sync = query.sync;
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
/// since in this case getTablesIterator() may do some additional work,
/// see DatabaseMaterializedMySQL::getTablesIterator()
auto table_context = Context::createCopy(getContext());
table_context->setInternalQuery(true);
/// Do not hold extra shared pointers to tables
std::vector<std::pair<StorageID, bool>> tables_to_drop;
// NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`.
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
bool drop = query.kind == ASTDropQuery::Kind::Drop;
bool truncate = query.kind == ASTDropQuery::Kind::Truncate;
auto table_ptr = iterator->table();
tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()});
}
getContext()->checkAccess(AccessType::DROP_DATABASE, database_name);
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases");
if (query.if_empty)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
if (!truncate && database->hasReplicationThread())
database->stopReplication();
if (database->shouldBeEmptyOnDetach())
/// Prepare tables for shutdown in parallel.
ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables");
for (const auto & [name, _] : tables_to_drop)
{
auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context);
runner([my_table_ptr = std::move(table_ptr)]()
{
/// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish.
/// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below.
auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name);
my_table_ptr->flushAndPrepareForShutdown();
});
}
runner.waitForAllToFinishAndRethrowFirstError();
ASTDropQuery query_for_table;
query_for_table.kind = query.kind;
// For truncate operation on database, drop the tables
if (truncate)
query_for_table.kind = query.has_all_tables ? ASTDropQuery::Kind::Truncate : ASTDropQuery::Kind::Drop;
query_for_table.if_exists = true;
query_for_table.if_empty = false;
query_for_table.setDatabase(database_name);
query_for_table.sync = query.sync;
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
/// since in this case getTablesIterator() may do some additional work,
/// see DatabaseMaterializedMySQL::getTablesIterator()
auto table_context = Context::createCopy(getContext());
table_context->setInternalQuery(true);
/// Do not hold extra shared pointers to tables
std::vector<std::pair<StorageID, bool>> tables_to_drop;
// NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`.
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
auto table_ptr = iterator->table();
tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()});
}
/// Prepare tables for shutdown in parallel.
ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables");
for (const auto & [name, _] : tables_to_drop)
{
auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context);
runner([my_table_ptr = std::move(table_ptr)]()
{
my_table_ptr->flushAndPrepareForShutdown();
});
}
runner.waitForAllToFinishAndRethrowFirstError();
for (const auto & table : tables_to_drop)
{
query_for_table.setTable(table.first.getTableName());
query_for_table.is_dictionary = table.second;
DatabasePtr db;
UUID table_to_wait = UUIDHelpers::Nil;
executeToTableImpl(table_context, query_for_table, db, table_to_wait);
uuids_to_wait.push_back(table_to_wait);
}
}
// only if operation is DETACH
if ((!drop || !truncate) && query.sync)
{
/// Avoid "some tables are still in use" when sync mode is enabled
for (const auto & table_uuid : uuids_to_wait)
database->waitDetachedTableNotInUse(table_uuid);
}
/// Protects from concurrent CREATE TABLE queries
auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name);
// only if operation is DETACH
if (!drop || !truncate)
database->assertCanBeDetached(true);
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
if (!truncate)
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
for (const auto & table : tables_to_drop)
{
query_for_table.setTable(table.first.getTableName());
query_for_table.is_dictionary = table.second;
DatabasePtr db;
UUID table_to_wait = UUIDHelpers::Nil;
executeToTableImpl(table_context, query_for_table, db, table_to_wait);
uuids_to_wait.push_back(table_to_wait);
}
}
// only if operation is DETACH
if ((!drop || !truncate) && query.sync)
{
/// Avoid "some tables are still in use" when sync mode is enabled
for (const auto & table_uuid : uuids_to_wait)
database->waitDetachedTableNotInUse(table_uuid);
}
/// Protects from concurrent CREATE TABLE queries
auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name);
// only if operation is DETACH
if (!drop || !truncate)
database->assertCanBeDetached(true);
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
if (!truncate)
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
return {};
}

View File

@ -86,6 +86,7 @@
#include <Core/Settings.h>
#include <Core/ServerSettings.h>
#include <Interpreters/Aggregator.h>
#include <Interpreters/ArrayJoinAction.h>
#include <Interpreters/HashTablesStatistics.h>
#include <Interpreters/IJoin.h>
#include <QueryPipeline/SizeLimits.h>
@ -1676,7 +1677,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
if (expressions.array_join)
{
QueryPlanStepPtr array_join_step
= std::make_unique<ArrayJoinStep>(query_plan.getCurrentDataStream(), expressions.array_join);
= std::make_unique<ArrayJoinStep>(
query_plan.getCurrentDataStream(),
*expressions.array_join,
settings.enable_unaligned_array_join,
settings.max_block_size);
array_join_step->setStepDescription("ARRAY JOIN");
query_plan.addStep(std::move(array_join_step));

View File

@ -1,10 +1,42 @@
#pragma once
#include <vector>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <base/defines.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
class AbstractFunction
{
friend class FunctionSecretArgumentsFinder;
public:
class Argument
{
public:
virtual ~Argument() = default;
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
virtual bool isIdentifier() const = 0;
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
};
class Arguments
{
public:
virtual ~Arguments() = default;
virtual size_t size() const = 0;
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
};
virtual ~AbstractFunction() = default;
virtual String name() const = 0;
bool hasArguments() const { return !!arguments; }
protected:
std::unique_ptr<Arguments> arguments;
};
class FunctionSecretArgumentsFinder
{
public:
@ -23,6 +55,485 @@ public:
return count != 0 || !nested_maps.empty();
}
};
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
protected:
const std::unique_ptr<AbstractFunction> function;
Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= function->arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
(function->name() == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function->name() == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
(function->name() == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function->name() == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = function->arguments->size();
while (count > 0)
{
const auto f = function->arguments->at(count - 1)->getFunction();
if (!f)
break;
if (f->name() == "headers")
result.nested_maps.push_back(f->name());
else if (f->name() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = function->arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= function->arguments->size())
return false;
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
}
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
{
return argument.tryGetString(res, allow_identifier);
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (function->arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
auto table_function = function->arguments->at(arg_num)->getFunction();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (function->arguments->size() == 0)
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = function->arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function->name();
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (function->arguments->size() <= arg_idx)
return false;
return function->arguments->at(arg_idx)->isIdentifier();
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < function->arguments->size(); ++i)
{
const auto & argument = function->arguments->at(i);
const auto equals_func = argument->getFunction();
if (!equals_func || (equals_func->name() != "equals"))
continue;
if (!equals_func->arguments || equals_func->arguments->size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
};
}

View File

@ -1,35 +1,97 @@
#pragma once
#include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/KnownObjectNames.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST
class FunctionAST : public AbstractFunction
{
public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_)
class ArgumentAST : public Argument
{
if (!function.arguments)
public:
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<ASTFunction>())
return std::make_unique<FunctionAST>(*f);
return nullptr;
}
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
private:
const IAST * argument = nullptr;
};
class ArgumentsAST : public Arguments
{
public:
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override
{
return std::make_unique<ArgumentAST>(arguments->at(n).get());
}
private:
const ASTs * arguments = nullptr;
};
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
{
if (!function->arguments)
return;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
const auto * expr_list = function->arguments->as<ASTExpressionList>();
if (!expr_list)
return;
arguments = &expr_list->children;
switch (function.kind)
arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
}
String name() const override { return function->name; }
private:
const ASTFunction * function = nullptr;
};
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
{
if (!function->hasArguments())
return;
switch (function_.kind)
{
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
case ASTFunction::Kind::WINDOW_FUNCTION: break;
@ -43,507 +105,7 @@ public:
}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const ASTFunction & function;
const ASTs * arguments = nullptr;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
(function.name == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function.name == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function.name == "remote") || (function.name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
(function.name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function.name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = arguments->size();
while (count > 0)
{
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
if (!f)
break;
if (f->name == "headers")
result.nested_maps.push_back(f->name);
else if (f->name != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments->size())
return false;
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument.as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument.as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments->empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function.name;
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments->size() <= arg_idx)
return false;
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments->size(); ++i)
{
const auto & argument = (*arguments)[i];
const auto * equals_func = argument->as<ASTFunction>();
if (!equals_func || (equals_func->name != "equals"))
continue;
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->children;
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
};
}

View File

@ -1674,11 +1674,12 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
PlannerActionsVisitor actions_visitor(planner_context);
std::unordered_set<std::string> array_join_expressions_output_nodes;
NameSet array_join_column_names;
Names array_join_column_names;
array_join_column_names.reserve(array_join_node.getJoinExpressions().getNodes().size());
for (auto & array_join_expression : array_join_node.getJoinExpressions().getNodes())
{
const auto & array_join_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(array_join_expression);
array_join_column_names.insert(array_join_column_identifier);
array_join_column_names.push_back(array_join_column_identifier);
auto & array_join_expression_column = array_join_expression->as<ColumnNode &>();
auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow());
@ -1727,8 +1728,13 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
drop_unused_columns_before_array_join_transform_step->setStepDescription("DROP unused columns before ARRAY JOIN");
plan.addStep(std::move(drop_unused_columns_before_array_join_transform_step));
auto array_join_action = std::make_shared<ArrayJoinAction>(array_join_column_names, array_join_node.isLeft(), planner_context->getQueryContext());
auto array_join_step = std::make_unique<ArrayJoinStep>(plan.getCurrentDataStream(), std::move(array_join_action));
const auto & settings = planner_context->getQueryContext()->getSettingsRef();
auto array_join_step = std::make_unique<ArrayJoinStep>(
plan.getCurrentDataStream(),
ArrayJoin{std::move(array_join_column_names), array_join_node.isLeft()},
settings.enable_unaligned_array_join,
settings.max_block_size);
array_join_step->setStepDescription("ARRAY JOIN");
plan.addStep(std::move(array_join_step));

Some files were not shown because too many files have changed in this diff Show More